Merge branch 'nond-rep' into repackage

repack-nvid
Salvatore Filippone 8 months ago
commit b5f1442ac8

@ -58,10 +58,11 @@ subroutine psb_cgather_vect(globx, locx, desc_a, info, iroot)
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows
integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i
integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx
complex(psb_spk_), allocatable :: llocx(:)
integer(psb_mpk_), allocatable :: szs(:)
character(len=20) :: name, ch_err
name='psb_cgatherv'
@ -125,32 +126,34 @@ subroutine psb_cgather_vect(globx, locx, desc_a, info, iroot)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
globx(:) = czero
llocx = locx%get_vect()
do i=1,desc_a%get_local_rows()
call psb_loc_to_glob(i,idx,desc_a,info)
globx(idx) = llocx(i)
end do
llocx = locx%get_vect()
! adjust overlapped elements
do i=1, size(desc_a%ovrlap_elem,1)
if (me /= desc_a%ovrlap_elem(i,3)) then
idx = desc_a%ovrlap_elem(i,1)
call psb_loc_to_glob(idx,desc_a,info)
globx(idx) = czero
llocx(idx) = czero
end if
end do
call psb_sum(ctxt,globx(1:m),root=root)
allocate(szs(np))
loc_rows = desc_a%get_local_rows()
call psb_gather(ctxt,loc_rows,szs,root=root)
if ((me == root).or.(root == -1)) then
if (sum(szs) /= m) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
end if
call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root)
call psb_erractionrestore(err_act)
return

@ -58,10 +58,11 @@ subroutine psb_dgather_vect(globx, locx, desc_a, info, iroot)
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows
integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i
integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx
real(psb_dpk_), allocatable :: llocx(:)
integer(psb_mpk_), allocatable :: szs(:)
character(len=20) :: name, ch_err
name='psb_dgatherv'
@ -125,32 +126,34 @@ subroutine psb_dgather_vect(globx, locx, desc_a, info, iroot)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
globx(:) = dzero
llocx = locx%get_vect()
do i=1,desc_a%get_local_rows()
call psb_loc_to_glob(i,idx,desc_a,info)
globx(idx) = llocx(i)
end do
llocx = locx%get_vect()
! adjust overlapped elements
do i=1, size(desc_a%ovrlap_elem,1)
if (me /= desc_a%ovrlap_elem(i,3)) then
idx = desc_a%ovrlap_elem(i,1)
call psb_loc_to_glob(idx,desc_a,info)
globx(idx) = dzero
llocx(idx) = dzero
end if
end do
call psb_sum(ctxt,globx(1:m),root=root)
allocate(szs(np))
loc_rows = desc_a%get_local_rows()
call psb_gather(ctxt,loc_rows,szs,root=root)
if ((me == root).or.(root == -1)) then
if (sum(szs) /= m) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
end if
call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root)
call psb_erractionrestore(err_act)
return

@ -58,10 +58,11 @@ subroutine psb_igather_vect(globx, locx, desc_a, info, iroot)
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows
integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i
integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx
integer(psb_ipk_), allocatable :: llocx(:)
integer(psb_mpk_), allocatable :: szs(:)
character(len=20) :: name, ch_err
name='psb_igatherv'
@ -125,32 +126,34 @@ subroutine psb_igather_vect(globx, locx, desc_a, info, iroot)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
globx(:) = izero
llocx = locx%get_vect()
do i=1,desc_a%get_local_rows()
call psb_loc_to_glob(i,idx,desc_a,info)
globx(idx) = llocx(i)
end do
llocx = locx%get_vect()
! adjust overlapped elements
do i=1, size(desc_a%ovrlap_elem,1)
if (me /= desc_a%ovrlap_elem(i,3)) then
idx = desc_a%ovrlap_elem(i,1)
call psb_loc_to_glob(idx,desc_a,info)
globx(idx) = izero
llocx(idx) = izero
end if
end do
call psb_sum(ctxt,globx(1:m),root=root)
allocate(szs(np))
loc_rows = desc_a%get_local_rows()
call psb_gather(ctxt,loc_rows,szs,root=root)
if ((me == root).or.(root == -1)) then
if (sum(szs) /= m) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
end if
call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root)
call psb_erractionrestore(err_act)
return

@ -58,10 +58,11 @@ subroutine psb_lgather_vect(globx, locx, desc_a, info, iroot)
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows
integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i
integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx
integer(psb_lpk_), allocatable :: llocx(:)
integer(psb_mpk_), allocatable :: szs(:)
character(len=20) :: name, ch_err
name='psb_lgatherv'
@ -125,32 +126,34 @@ subroutine psb_lgather_vect(globx, locx, desc_a, info, iroot)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
globx(:) = lzero
llocx = locx%get_vect()
do i=1,desc_a%get_local_rows()
call psb_loc_to_glob(i,idx,desc_a,info)
globx(idx) = llocx(i)
end do
llocx = locx%get_vect()
! adjust overlapped elements
do i=1, size(desc_a%ovrlap_elem,1)
if (me /= desc_a%ovrlap_elem(i,3)) then
idx = desc_a%ovrlap_elem(i,1)
call psb_loc_to_glob(idx,desc_a,info)
globx(idx) = lzero
llocx(idx) = lzero
end if
end do
call psb_sum(ctxt,globx(1:m),root=root)
allocate(szs(np))
loc_rows = desc_a%get_local_rows()
call psb_gather(ctxt,loc_rows,szs,root=root)
if ((me == root).or.(root == -1)) then
if (sum(szs) /= m) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
end if
call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root)
call psb_erractionrestore(err_act)
return

@ -58,10 +58,11 @@ subroutine psb_sgather_vect(globx, locx, desc_a, info, iroot)
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows
integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i
integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx
real(psb_spk_), allocatable :: llocx(:)
integer(psb_mpk_), allocatable :: szs(:)
character(len=20) :: name, ch_err
name='psb_sgatherv'
@ -125,32 +126,34 @@ subroutine psb_sgather_vect(globx, locx, desc_a, info, iroot)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
globx(:) = szero
llocx = locx%get_vect()
do i=1,desc_a%get_local_rows()
call psb_loc_to_glob(i,idx,desc_a,info)
globx(idx) = llocx(i)
end do
llocx = locx%get_vect()
! adjust overlapped elements
do i=1, size(desc_a%ovrlap_elem,1)
if (me /= desc_a%ovrlap_elem(i,3)) then
idx = desc_a%ovrlap_elem(i,1)
call psb_loc_to_glob(idx,desc_a,info)
globx(idx) = szero
llocx(idx) = szero
end if
end do
call psb_sum(ctxt,globx(1:m),root=root)
allocate(szs(np))
loc_rows = desc_a%get_local_rows()
call psb_gather(ctxt,loc_rows,szs,root=root)
if ((me == root).or.(root == -1)) then
if (sum(szs) /= m) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
end if
call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root)
call psb_erractionrestore(err_act)
return

@ -58,10 +58,11 @@ subroutine psb_zgather_vect(globx, locx, desc_a, info, iroot)
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank
integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows
integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i
integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx
complex(psb_dpk_), allocatable :: llocx(:)
integer(psb_mpk_), allocatable :: szs(:)
character(len=20) :: name, ch_err
name='psb_zgatherv'
@ -125,32 +126,34 @@ subroutine psb_zgather_vect(globx, locx, desc_a, info, iroot)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
globx(:) = zzero
llocx = locx%get_vect()
do i=1,desc_a%get_local_rows()
call psb_loc_to_glob(i,idx,desc_a,info)
globx(idx) = llocx(i)
end do
llocx = locx%get_vect()
! adjust overlapped elements
do i=1, size(desc_a%ovrlap_elem,1)
if (me /= desc_a%ovrlap_elem(i,3)) then
idx = desc_a%ovrlap_elem(i,1)
call psb_loc_to_glob(idx,desc_a,info)
globx(idx) = zzero
llocx(idx) = zzero
end if
end do
call psb_sum(ctxt,globx(1:m),root=root)
allocate(szs(np))
loc_rows = desc_a%get_local_rows()
call psb_gather(ctxt,loc_rows,szs,root=root)
if ((me == root).or.(root == -1)) then
if (sum(szs) /= m) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
end if
call psb_realloc(m,globx,info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
end if
call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root)
call psb_erractionrestore(err_act)
return

@ -39,6 +39,7 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \
auxil/psi_c_serial_mod.o auxil/psi_z_serial_mod.o \
psi_mod.o psi_i_mod.o psi_l_mod.o psi_s_mod.o psi_d_mod.o psi_c_mod.o psi_z_mod.o\
auxil/psb_ip_reord_mod.o\
auxil/psi_acx_mod.o auxil/psi_alcx_mod.o auxil/psi_lcx_mod.o \
auxil/psb_m_ip_reord_mod.o auxil/psb_e_ip_reord_mod.o \
auxil/psb_s_ip_reord_mod.o auxil/psb_d_ip_reord_mod.o \
auxil/psb_c_ip_reord_mod.o auxil/psb_z_ip_reord_mod.o \

@ -99,6 +99,33 @@ module psi_c_serial_mod
end subroutine psi_caxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_cabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (inout) :: w(:)
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_cxyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_spk_

@ -99,6 +99,33 @@ module psi_d_serial_mod
end subroutine psi_daxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_dabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (inout) :: w(:)
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_dxyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_dpk_

@ -99,6 +99,33 @@ module psi_e_serial_mod
end subroutine psi_eaxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_eabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (inout) :: w(:)
integer(psb_epk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_exyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_egthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_

@ -99,6 +99,33 @@ module psi_i2_serial_mod
end subroutine psi_i2axpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_i2abgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (inout) :: w(:)
integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_i2xyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_

@ -99,6 +99,33 @@ module psi_m_serial_mod
end subroutine psi_maxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_mabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (inout) :: w(:)
integer(psb_mpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_mxyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_

@ -99,6 +99,33 @@ module psi_s_serial_mod
end subroutine psi_saxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_sabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (inout) :: w(:)
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_sxyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_spk_

@ -99,6 +99,33 @@ module psi_z_serial_mod
end subroutine psi_zaxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_zabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (inout) :: w(:)
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_zxyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_dpk_

@ -34,6 +34,14 @@ module psi_c_collective_mod
use psb_desc_const_mod
interface psb_gather
module procedure psb_cgather_s, psb_cgather_v
end interface psb_gather
interface psb_gatherv
module procedure psb_cgatherv_v
end interface
interface psb_sum
module procedure psb_csums, psb_csumv, psb_csumm
end interface
@ -76,6 +84,250 @@ contains
!
! gather
!
subroutine psb_cgather_s(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
complex(psb_spk_), intent(inout) :: dat, resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,1,psb_mpi_c_spk_,&
& resv,1,psb_mpi_c_spk_,icomm,info)
else
call mpi_gather(dat,1,psb_mpi_c_spk_,&
& resv,1,psb_mpi_c_spk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,1,psb_mpi_c_spk_,&
& resv,1,psb_mpi_c_spk_,icomm,request,info)
else
call mpi_igather(dat,1,psb_mpi_c_spk_,&
& resv,1,psb_mpi_c_spk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_cgather_s
subroutine psb_cgather_v(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
complex(psb_spk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,size(dat),psb_mpi_c_spk_,&
& resv,size(dat),psb_mpi_c_spk_,icomm,info)
else
call mpi_gather(dat,size(dat),psb_mpi_c_spk_,&
& resv,size(dat),psb_mpi_c_spk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,size(dat),psb_mpi_c_spk_,&
& resv,size(dat),psb_mpi_c_spk_,icomm,request,info)
else
call mpi_igather(dat,size(dat),psb_mpi_c_spk_,&
& resv,size(dat),psb_mpi_c_spk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_cgather_v
subroutine psb_cgatherv_v(ctxt,dat,resv,szs,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
complex(psb_spk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_mpk_), intent(in), optional :: szs(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info,i
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
integer(psb_mpk_), allocatable :: displs(:)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_allgatherv(dat,size(dat),psb_mpi_c_spk_,&
& resv,szs,displs,psb_mpi_c_spk_,icomm,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_gatherv(dat,size(dat),psb_mpi_c_spk_,&
& resv,szs,displs,psb_mpi_c_spk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_iallgatherv(dat,size(dat),psb_mpi_c_spk_,&
& resv,szs,displs,psb_mpi_c_spk_,icomm,request,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_igatherv(dat,size(dat),psb_mpi_c_spk_,&
& resv,szs,displs,psb_mpi_c_spk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_cgatherv_v
!
! SUM
!
@ -124,20 +376,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -190,20 +452,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -258,20 +530,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_, icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_, icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,root_, icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -328,20 +610,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_camx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_camx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -395,20 +687,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
psb_mpi_c_spk_,mpi_camx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -463,20 +765,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -532,20 +844,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_camn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_camn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -599,20 +921,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -667,20 +999,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -901,12 +1243,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,1,&
call mpi_scan(dat_,dat,1,&
& psb_mpi_c_spk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,1,&
call mpi_iscan(dat_,dat,1,&
& psb_mpi_c_spk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -952,12 +1295,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,1,&
call mpi_exscan(dat_,dat,1,&
& psb_mpi_c_spk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,1,&
call mpi_iexscan(dat_,dat,1,&
& psb_mpi_c_spk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -980,12 +1324,13 @@ contains
complex(psb_spk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
complex(psb_spk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
icomm = psb_get_mpi_comm(ctxt)
@ -1003,12 +1348,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,size(dat),&
call mpi_scan(dat_,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iscan(dat_,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1029,12 +1375,13 @@ contains
complex(psb_spk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
complex(psb_spk_), allocatable :: dat_(:)
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
complex(psb_spk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -1053,12 +1400,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_exscan(dat_,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iexscan(dat_,dat,size(dat),&
& psb_mpi_c_spk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1271,6 +1619,5 @@ contains
Enddo
end subroutine psb_c_e_simple_triad_a2av
end module psi_c_collective_mod

@ -45,6 +45,14 @@ module psi_d_collective_mod
module procedure psb_d_nrm2s, psb_d_nrm2v
end interface psb_nrm2
interface psb_gather
module procedure psb_dgather_s, psb_dgather_v
end interface psb_gather
interface psb_gatherv
module procedure psb_dgatherv_v
end interface
interface psb_sum
module procedure psb_dsums, psb_dsumv, psb_dsumm
end interface
@ -110,6 +118,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
real(psb_dpk_) :: dat_
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -134,20 +143,29 @@ contains
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_r_dpk_,mpi_max,icomm,info)
call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_r_dpk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_r_dpk_,mpi_max,root_,icomm,info)
if (iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,psb_mpi_r_dpk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,psb_mpi_r_dpk_,mpi_max,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info)
if (iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -174,6 +192,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
real(psb_dpk_) :: dat_(1) ! This is a dummy
#if !defined(SERIAL_MPI)
@ -200,21 +219,31 @@ contains
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
if (root_ == -1) then
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -242,6 +271,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
real(psb_dpk_) :: dat_(1,1) ! this is a dummy
#if !defined(SERIAL_MPI)
@ -268,28 +298,37 @@ contains
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,info)
endif
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,info)
endif
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_dmaxm
@ -340,18 +379,27 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_r_dpk_,mpi_min,icomm,info)
call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_r_dpk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_r_dpk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,psb_mpi_r_dpk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,psb_mpi_r_dpk_,mpi_min,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -405,20 +453,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -473,20 +531,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -545,20 +613,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_dnrm2_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_dnrm2_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -612,20 +690,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),psb_mpi_r_dpk_,&
call mpi_allreduce(mpi_in_place,dat,size(dat),psb_mpi_r_dpk_,&
& mpi_dnrm2_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),psb_mpi_r_dpk_,&
& mpi_dnrm2_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),psb_mpi_r_dpk_,&
& mpi_dnrm2_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),psb_mpi_r_dpk_,&
& mpi_dnrm2_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_dnrm2_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -636,6 +724,250 @@ contains
end subroutine psb_d_nrm2v
!
! gather
!
subroutine psb_dgather_s(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
real(psb_dpk_), intent(inout) :: dat, resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,1,psb_mpi_r_dpk_,&
& resv,1,psb_mpi_r_dpk_,icomm,info)
else
call mpi_gather(dat,1,psb_mpi_r_dpk_,&
& resv,1,psb_mpi_r_dpk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,1,psb_mpi_r_dpk_,&
& resv,1,psb_mpi_r_dpk_,icomm,request,info)
else
call mpi_igather(dat,1,psb_mpi_r_dpk_,&
& resv,1,psb_mpi_r_dpk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_dgather_s
subroutine psb_dgather_v(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
real(psb_dpk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,size(dat),psb_mpi_r_dpk_,&
& resv,size(dat),psb_mpi_r_dpk_,icomm,info)
else
call mpi_gather(dat,size(dat),psb_mpi_r_dpk_,&
& resv,size(dat),psb_mpi_r_dpk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,size(dat),psb_mpi_r_dpk_,&
& resv,size(dat),psb_mpi_r_dpk_,icomm,request,info)
else
call mpi_igather(dat,size(dat),psb_mpi_r_dpk_,&
& resv,size(dat),psb_mpi_r_dpk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_dgather_v
subroutine psb_dgatherv_v(ctxt,dat,resv,szs,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
real(psb_dpk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_mpk_), intent(in), optional :: szs(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info,i
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
integer(psb_mpk_), allocatable :: displs(:)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_allgatherv(dat,size(dat),psb_mpi_r_dpk_,&
& resv,szs,displs,psb_mpi_r_dpk_,icomm,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_gatherv(dat,size(dat),psb_mpi_r_dpk_,&
& resv,szs,displs,psb_mpi_r_dpk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_iallgatherv(dat,size(dat),psb_mpi_r_dpk_,&
& resv,szs,displs,psb_mpi_r_dpk_,icomm,request,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_igatherv(dat,size(dat),psb_mpi_r_dpk_,&
& resv,szs,displs,psb_mpi_r_dpk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_dgatherv_v
!
! SUM
!
@ -684,20 +1016,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -750,20 +1092,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -818,20 +1170,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_, icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_, icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,root_, icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -888,20 +1250,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_damx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_damx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -955,20 +1327,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
psb_mpi_r_dpk_,mpi_damx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1023,20 +1405,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1092,20 +1484,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_damn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_damn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1159,20 +1561,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1227,20 +1639,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1461,12 +1883,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,1,&
call mpi_scan(dat_,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,1,&
call mpi_iscan(dat_,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -1512,12 +1935,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,1,&
call mpi_exscan(dat_,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,1,&
call mpi_iexscan(dat_,dat,1,&
& psb_mpi_r_dpk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -1540,12 +1964,13 @@ contains
real(psb_dpk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
real(psb_dpk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
icomm = psb_get_mpi_comm(ctxt)
@ -1563,12 +1988,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,size(dat),&
call mpi_scan(dat_,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iscan(dat_,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1589,12 +2015,13 @@ contains
real(psb_dpk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
real(psb_dpk_), allocatable :: dat_(:)
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
real(psb_dpk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -1613,12 +2040,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_exscan(dat_,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iexscan(dat_,dat,size(dat),&
& psb_mpi_r_dpk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1831,6 +2259,5 @@ contains
Enddo
end subroutine psb_d_e_simple_triad_a2av
end module psi_d_collective_mod

@ -42,6 +42,14 @@ module psi_e_collective_mod
end interface psb_min
interface psb_gather
module procedure psb_egather_s, psb_egather_v
end interface psb_gather
interface psb_gatherv
module procedure psb_egatherv_v
end interface
interface psb_sum
module procedure psb_esums, psb_esumv, psb_esumm
end interface
@ -107,6 +115,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_epk_) :: dat_
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -131,20 +140,29 @@ contains
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_epk_,mpi_max,icomm,info)
call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_epk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_epk_,mpi_max,root_,icomm,info)
if (iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,psb_mpi_epk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,psb_mpi_epk_,mpi_max,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_epk_,mpi_max,root_,icomm,request,info)
if (iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_epk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -171,6 +189,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_epk_) :: dat_(1) ! This is a dummy
#if !defined(SERIAL_MPI)
@ -197,21 +216,31 @@ contains
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
if (root_ == -1) then
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -239,6 +268,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_epk_) :: dat_(1,1) ! this is a dummy
#if !defined(SERIAL_MPI)
@ -265,28 +295,37 @@ contains
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,info)
endif
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,info)
endif
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_emaxm
@ -337,18 +376,27 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_epk_,mpi_min,icomm,info)
call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_epk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_epk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,psb_mpi_epk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,psb_mpi_epk_,mpi_min,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_epk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_epk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -402,20 +450,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -470,20 +528,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -494,6 +562,250 @@ contains
!
! gather
!
subroutine psb_egather_s(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_epk_), intent(inout) :: dat, resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,1,psb_mpi_epk_,&
& resv,1,psb_mpi_epk_,icomm,info)
else
call mpi_gather(dat,1,psb_mpi_epk_,&
& resv,1,psb_mpi_epk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,1,psb_mpi_epk_,&
& resv,1,psb_mpi_epk_,icomm,request,info)
else
call mpi_igather(dat,1,psb_mpi_epk_,&
& resv,1,psb_mpi_epk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_egather_s
subroutine psb_egather_v(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_epk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,size(dat),psb_mpi_epk_,&
& resv,size(dat),psb_mpi_epk_,icomm,info)
else
call mpi_gather(dat,size(dat),psb_mpi_epk_,&
& resv,size(dat),psb_mpi_epk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,size(dat),psb_mpi_epk_,&
& resv,size(dat),psb_mpi_epk_,icomm,request,info)
else
call mpi_igather(dat,size(dat),psb_mpi_epk_,&
& resv,size(dat),psb_mpi_epk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_egather_v
subroutine psb_egatherv_v(ctxt,dat,resv,szs,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_epk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_mpk_), intent(in), optional :: szs(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info,i
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
integer(psb_mpk_), allocatable :: displs(:)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_allgatherv(dat,size(dat),psb_mpi_epk_,&
& resv,szs,displs,psb_mpi_epk_,icomm,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_gatherv(dat,size(dat),psb_mpi_epk_,&
& resv,szs,displs,psb_mpi_epk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_iallgatherv(dat,size(dat),psb_mpi_epk_,&
& resv,szs,displs,psb_mpi_epk_,icomm,request,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_igatherv(dat,size(dat),psb_mpi_epk_,&
& resv,szs,displs,psb_mpi_epk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_egatherv_v
!
! SUM
!
@ -542,20 +854,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_epk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_epk_,mpi_sum,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_epk_,mpi_sum,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_epk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -608,20 +930,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -676,20 +1008,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_, icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_, icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,root_, icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -746,20 +1088,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_eamx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_eamx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -813,20 +1165,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
psb_mpi_epk_,mpi_eamx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -881,20 +1243,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -950,20 +1322,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_eamn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_eamn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1017,20 +1399,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1085,20 +1477,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1319,12 +1721,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,1,&
call mpi_scan(dat_,dat,1,&
& psb_mpi_epk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,1,&
call mpi_iscan(dat_,dat,1,&
& psb_mpi_epk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -1370,12 +1773,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,1,&
call mpi_exscan(dat_,dat,1,&
& psb_mpi_epk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,1,&
call mpi_iexscan(dat_,dat,1,&
& psb_mpi_epk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -1398,12 +1802,13 @@ contains
integer(psb_epk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_epk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
icomm = psb_get_mpi_comm(ctxt)
@ -1421,12 +1826,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,size(dat),&
call mpi_scan(dat_,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iscan(dat_,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1447,12 +1853,13 @@ contains
integer(psb_epk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_epk_), allocatable :: dat_(:)
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_epk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -1471,12 +1878,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_exscan(dat_,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iexscan(dat_,dat,size(dat),&
& psb_mpi_epk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1689,6 +2097,5 @@ contains
Enddo
end subroutine psb_e_e_simple_triad_a2av
end module psi_e_collective_mod

@ -42,6 +42,14 @@ module psi_i2_collective_mod
end interface psb_min
interface psb_gather
module procedure psb_i2gather_s, psb_i2gather_v
end interface psb_gather
interface psb_gatherv
module procedure psb_i2gatherv_v
end interface
interface psb_sum
module procedure psb_i2sums, psb_i2sumv, psb_i2summ
end interface
@ -107,6 +115,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_i2pk_) :: dat_
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -131,20 +140,29 @@ contains
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_i2pk_,mpi_max,icomm,info)
call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_i2pk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_i2pk_,mpi_max,root_,icomm,info)
if (iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,psb_mpi_i2pk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,psb_mpi_i2pk_,mpi_max,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_i2pk_,mpi_max,root_,icomm,request,info)
if (iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_i2pk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -171,6 +189,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_i2pk_) :: dat_(1) ! This is a dummy
#if !defined(SERIAL_MPI)
@ -197,21 +216,31 @@ contains
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
if (root_ == -1) then
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -239,6 +268,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_i2pk_) :: dat_(1,1) ! this is a dummy
#if !defined(SERIAL_MPI)
@ -265,28 +295,37 @@ contains
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,info)
endif
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,info)
endif
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_i2maxm
@ -337,18 +376,27 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_i2pk_,mpi_min,icomm,info)
call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_i2pk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_i2pk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,psb_mpi_i2pk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,psb_mpi_i2pk_,mpi_min,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_i2pk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_i2pk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -402,20 +450,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -470,20 +528,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -494,6 +562,250 @@ contains
!
! gather
!
subroutine psb_i2gather_s(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_i2pk_), intent(inout) :: dat, resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,1,psb_mpi_i2pk_,&
& resv,1,psb_mpi_i2pk_,icomm,info)
else
call mpi_gather(dat,1,psb_mpi_i2pk_,&
& resv,1,psb_mpi_i2pk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,1,psb_mpi_i2pk_,&
& resv,1,psb_mpi_i2pk_,icomm,request,info)
else
call mpi_igather(dat,1,psb_mpi_i2pk_,&
& resv,1,psb_mpi_i2pk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_i2gather_s
subroutine psb_i2gather_v(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_i2pk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,size(dat),psb_mpi_i2pk_,&
& resv,size(dat),psb_mpi_i2pk_,icomm,info)
else
call mpi_gather(dat,size(dat),psb_mpi_i2pk_,&
& resv,size(dat),psb_mpi_i2pk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,size(dat),psb_mpi_i2pk_,&
& resv,size(dat),psb_mpi_i2pk_,icomm,request,info)
else
call mpi_igather(dat,size(dat),psb_mpi_i2pk_,&
& resv,size(dat),psb_mpi_i2pk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_i2gather_v
subroutine psb_i2gatherv_v(ctxt,dat,resv,szs,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_i2pk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_mpk_), intent(in), optional :: szs(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info,i
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
integer(psb_mpk_), allocatable :: displs(:)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_allgatherv(dat,size(dat),psb_mpi_i2pk_,&
& resv,szs,displs,psb_mpi_i2pk_,icomm,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_gatherv(dat,size(dat),psb_mpi_i2pk_,&
& resv,szs,displs,psb_mpi_i2pk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_iallgatherv(dat,size(dat),psb_mpi_i2pk_,&
& resv,szs,displs,psb_mpi_i2pk_,icomm,request,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_igatherv(dat,size(dat),psb_mpi_i2pk_,&
& resv,szs,displs,psb_mpi_i2pk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_i2gatherv_v
!
! SUM
!
@ -542,20 +854,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -608,20 +930,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -676,20 +1008,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_, icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_, icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,root_, icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -746,20 +1088,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_i2amx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_i2amx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -813,20 +1165,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
psb_mpi_i2pk_,mpi_i2amx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -881,20 +1243,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -950,20 +1322,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_i2amn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_i2amn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1017,20 +1399,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1085,20 +1477,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1319,12 +1721,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,1,&
call mpi_scan(dat_,dat,1,&
& psb_mpi_i2pk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,1,&
call mpi_iscan(dat_,dat,1,&
& psb_mpi_i2pk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -1370,12 +1773,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,1,&
call mpi_exscan(dat_,dat,1,&
& psb_mpi_i2pk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,1,&
call mpi_iexscan(dat_,dat,1,&
& psb_mpi_i2pk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -1398,12 +1802,13 @@ contains
integer(psb_i2pk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_i2pk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
icomm = psb_get_mpi_comm(ctxt)
@ -1421,12 +1826,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,size(dat),&
call mpi_scan(dat_,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iscan(dat_,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1447,12 +1853,13 @@ contains
integer(psb_i2pk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_i2pk_), allocatable :: dat_(:)
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_i2pk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -1471,12 +1878,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_exscan(dat_,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iexscan(dat_,dat,size(dat),&
& psb_mpi_i2pk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1689,6 +2097,5 @@ contains
Enddo
end subroutine psb_i2_e_simple_triad_a2av
end module psi_i2_collective_mod

@ -42,6 +42,14 @@ module psi_m_collective_mod
end interface psb_min
interface psb_gather
module procedure psb_mgather_s, psb_mgather_v
end interface psb_gather
interface psb_gatherv
module procedure psb_mgatherv_v
end interface
interface psb_sum
module procedure psb_msums, psb_msumv, psb_msumm
end interface
@ -107,6 +115,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_mpk_) :: dat_
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -131,20 +140,29 @@ contains
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_mpk_,mpi_max,icomm,info)
call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_mpk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_mpk_,mpi_max,root_,icomm,info)
if (iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,psb_mpi_mpk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,psb_mpi_mpk_,mpi_max,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_mpk_,mpi_max,root_,icomm,request,info)
if (iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_mpk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -171,6 +189,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_mpk_) :: dat_(1) ! This is a dummy
#if !defined(SERIAL_MPI)
@ -197,21 +216,31 @@ contains
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
if (root_ == -1) then
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -239,6 +268,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_mpk_) :: dat_(1,1) ! this is a dummy
#if !defined(SERIAL_MPI)
@ -265,28 +295,37 @@ contains
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,info)
endif
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,info)
endif
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_mmaxm
@ -337,18 +376,27 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_mpk_,mpi_min,icomm,info)
call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_mpk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_mpk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,psb_mpi_mpk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,psb_mpi_mpk_,mpi_min,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_mpk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_mpk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -402,20 +450,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -470,20 +528,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -494,6 +562,250 @@ contains
!
! gather
!
subroutine psb_mgather_s(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(inout) :: dat, resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,1,psb_mpi_mpk_,&
& resv,1,psb_mpi_mpk_,icomm,info)
else
call mpi_gather(dat,1,psb_mpi_mpk_,&
& resv,1,psb_mpi_mpk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,1,psb_mpi_mpk_,&
& resv,1,psb_mpi_mpk_,icomm,request,info)
else
call mpi_igather(dat,1,psb_mpi_mpk_,&
& resv,1,psb_mpi_mpk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_mgather_s
subroutine psb_mgather_v(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,size(dat),psb_mpi_mpk_,&
& resv,size(dat),psb_mpi_mpk_,icomm,info)
else
call mpi_gather(dat,size(dat),psb_mpi_mpk_,&
& resv,size(dat),psb_mpi_mpk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,size(dat),psb_mpi_mpk_,&
& resv,size(dat),psb_mpi_mpk_,icomm,request,info)
else
call mpi_igather(dat,size(dat),psb_mpi_mpk_,&
& resv,size(dat),psb_mpi_mpk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_mgather_v
subroutine psb_mgatherv_v(ctxt,dat,resv,szs,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_mpk_), intent(in), optional :: szs(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info,i
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
integer(psb_mpk_), allocatable :: displs(:)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_allgatherv(dat,size(dat),psb_mpi_mpk_,&
& resv,szs,displs,psb_mpi_mpk_,icomm,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_gatherv(dat,size(dat),psb_mpi_mpk_,&
& resv,szs,displs,psb_mpi_mpk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_iallgatherv(dat,size(dat),psb_mpi_mpk_,&
& resv,szs,displs,psb_mpi_mpk_,icomm,request,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_igatherv(dat,size(dat),psb_mpi_mpk_,&
& resv,szs,displs,psb_mpi_mpk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_mgatherv_v
!
! SUM
!
@ -542,20 +854,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_mpk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_mpk_,mpi_sum,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_mpk_,mpi_sum,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_mpk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -608,20 +930,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -676,20 +1008,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_, icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_, icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,root_, icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -746,20 +1088,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_mamx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_mamx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -813,20 +1165,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
psb_mpi_mpk_,mpi_mamx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -881,20 +1243,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -950,20 +1322,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_mamn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_mamn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1017,20 +1399,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1085,20 +1477,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1319,12 +1721,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,1,&
call mpi_scan(dat_,dat,1,&
& psb_mpi_mpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,1,&
call mpi_iscan(dat_,dat,1,&
& psb_mpi_mpk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -1370,12 +1773,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,1,&
call mpi_exscan(dat_,dat,1,&
& psb_mpi_mpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,1,&
call mpi_iexscan(dat_,dat,1,&
& psb_mpi_mpk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -1398,12 +1802,13 @@ contains
integer(psb_mpk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_mpk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
icomm = psb_get_mpi_comm(ctxt)
@ -1421,12 +1826,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,size(dat),&
call mpi_scan(dat_,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iscan(dat_,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1447,12 +1853,13 @@ contains
integer(psb_mpk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_), allocatable :: dat_(:)
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
integer(psb_mpk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -1471,12 +1878,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_exscan(dat_,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iexscan(dat_,dat,size(dat),&
& psb_mpi_mpk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1689,6 +2097,5 @@ contains
Enddo
end subroutine psb_m_e_simple_triad_a2av
end module psi_m_collective_mod

@ -45,6 +45,14 @@ module psi_s_collective_mod
module procedure psb_s_nrm2s, psb_s_nrm2v
end interface psb_nrm2
interface psb_gather
module procedure psb_sgather_s, psb_sgather_v
end interface psb_gather
interface psb_gatherv
module procedure psb_sgatherv_v
end interface
interface psb_sum
module procedure psb_ssums, psb_ssumv, psb_ssumm
end interface
@ -110,6 +118,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
real(psb_spk_) :: dat_
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -134,20 +143,29 @@ contains
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_r_spk_,mpi_max,icomm,info)
call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_r_spk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_r_spk_,mpi_max,root_,icomm,info)
if (iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,psb_mpi_r_spk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,psb_mpi_r_spk_,mpi_max,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_spk_,mpi_max,root_,icomm,request,info)
if (iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_spk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -174,6 +192,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
real(psb_spk_) :: dat_(1) ! This is a dummy
#if !defined(SERIAL_MPI)
@ -200,21 +219,31 @@ contains
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
if (root_ == -1) then
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -242,6 +271,7 @@ contains
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
real(psb_spk_) :: dat_(1,1) ! this is a dummy
#if !defined(SERIAL_MPI)
@ -268,28 +298,37 @@ contains
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,info)
endif
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,info)
endif
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_max,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_smaxm
@ -340,18 +379,27 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_r_spk_,mpi_min,icomm,info)
call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_r_spk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_r_spk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,psb_mpi_r_spk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,psb_mpi_r_spk_,mpi_min,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_spk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_spk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -405,20 +453,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -473,20 +531,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_min,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -545,20 +613,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_snrm2_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_snrm2_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -612,20 +690,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),psb_mpi_r_spk_,&
call mpi_allreduce(mpi_in_place,dat,size(dat),psb_mpi_r_spk_,&
& mpi_snrm2_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),psb_mpi_r_spk_,&
& mpi_snrm2_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),psb_mpi_r_spk_,&
& mpi_snrm2_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),psb_mpi_r_spk_,&
& mpi_snrm2_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_snrm2_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -636,6 +724,250 @@ contains
end subroutine psb_s_nrm2v
!
! gather
!
subroutine psb_sgather_s(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
real(psb_spk_), intent(inout) :: dat, resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,1,psb_mpi_r_spk_,&
& resv,1,psb_mpi_r_spk_,icomm,info)
else
call mpi_gather(dat,1,psb_mpi_r_spk_,&
& resv,1,psb_mpi_r_spk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,1,psb_mpi_r_spk_,&
& resv,1,psb_mpi_r_spk_,icomm,request,info)
else
call mpi_igather(dat,1,psb_mpi_r_spk_,&
& resv,1,psb_mpi_r_spk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_sgather_s
subroutine psb_sgather_v(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
real(psb_spk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,size(dat),psb_mpi_r_spk_,&
& resv,size(dat),psb_mpi_r_spk_,icomm,info)
else
call mpi_gather(dat,size(dat),psb_mpi_r_spk_,&
& resv,size(dat),psb_mpi_r_spk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,size(dat),psb_mpi_r_spk_,&
& resv,size(dat),psb_mpi_r_spk_,icomm,request,info)
else
call mpi_igather(dat,size(dat),psb_mpi_r_spk_,&
& resv,size(dat),psb_mpi_r_spk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_sgather_v
subroutine psb_sgatherv_v(ctxt,dat,resv,szs,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
real(psb_spk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_mpk_), intent(in), optional :: szs(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info,i
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
integer(psb_mpk_), allocatable :: displs(:)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_allgatherv(dat,size(dat),psb_mpi_r_spk_,&
& resv,szs,displs,psb_mpi_r_spk_,icomm,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_gatherv(dat,size(dat),psb_mpi_r_spk_,&
& resv,szs,displs,psb_mpi_r_spk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_iallgatherv(dat,size(dat),psb_mpi_r_spk_,&
& resv,szs,displs,psb_mpi_r_spk_,icomm,request,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_igatherv(dat,size(dat),psb_mpi_r_spk_,&
& resv,szs,displs,psb_mpi_r_spk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_sgatherv_v
!
! SUM
!
@ -684,20 +1016,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -750,20 +1092,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -818,20 +1170,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_, icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_, icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,root_, icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -888,20 +1250,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_samx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_samx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -955,20 +1327,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
psb_mpi_r_spk_,mpi_samx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1023,20 +1405,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1092,20 +1484,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_samn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_samn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1159,20 +1561,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1227,20 +1639,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1461,12 +1883,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,1,&
call mpi_scan(dat_,dat,1,&
& psb_mpi_r_spk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,1,&
call mpi_iscan(dat_,dat,1,&
& psb_mpi_r_spk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -1512,12 +1935,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,1,&
call mpi_exscan(dat_,dat,1,&
& psb_mpi_r_spk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,1,&
call mpi_iexscan(dat_,dat,1,&
& psb_mpi_r_spk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -1540,12 +1964,13 @@ contains
real(psb_spk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
real(psb_spk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
icomm = psb_get_mpi_comm(ctxt)
@ -1563,12 +1988,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,size(dat),&
call mpi_scan(dat_,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iscan(dat_,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1589,12 +2015,13 @@ contains
real(psb_spk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
real(psb_spk_), allocatable :: dat_(:)
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
real(psb_spk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -1613,12 +2040,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_exscan(dat_,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iexscan(dat_,dat,size(dat),&
& psb_mpi_r_spk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1831,6 +2259,5 @@ contains
Enddo
end subroutine psb_s_e_simple_triad_a2av
end module psi_s_collective_mod

@ -34,6 +34,14 @@ module psi_z_collective_mod
use psb_desc_const_mod
interface psb_gather
module procedure psb_zgather_s, psb_zgather_v
end interface psb_gather
interface psb_gatherv
module procedure psb_zgatherv_v
end interface
interface psb_sum
module procedure psb_zsums, psb_zsumv, psb_zsumm
end interface
@ -76,6 +84,250 @@ contains
!
! gather
!
subroutine psb_zgather_s(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
complex(psb_dpk_), intent(inout) :: dat, resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,1,psb_mpi_c_dpk_,&
& resv,1,psb_mpi_c_dpk_,icomm,info)
else
call mpi_gather(dat,1,psb_mpi_c_dpk_,&
& resv,1,psb_mpi_c_dpk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,1,psb_mpi_c_dpk_,&
& resv,1,psb_mpi_c_dpk_,icomm,request,info)
else
call mpi_igather(dat,1,psb_mpi_c_dpk_,&
& resv,1,psb_mpi_c_dpk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_zgather_s
subroutine psb_zgather_v(ctxt,dat,resv,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
complex(psb_dpk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allgather(dat,size(dat),psb_mpi_c_dpk_,&
& resv,size(dat),psb_mpi_c_dpk_,icomm,info)
else
call mpi_gather(dat,size(dat),psb_mpi_c_dpk_,&
& resv,size(dat),psb_mpi_c_dpk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallgather(dat,size(dat),psb_mpi_c_dpk_,&
& resv,size(dat),psb_mpi_c_dpk_,icomm,request,info)
else
call mpi_igather(dat,size(dat),psb_mpi_c_dpk_,&
& resv,size(dat),psb_mpi_c_dpk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_zgather_v
subroutine psb_zgatherv_v(ctxt,dat,resv,szs,root,mode,request)
#ifdef MPI_MOD
use mpi
#endif
implicit none
#ifdef MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
complex(psb_dpk_), intent(inout) :: dat(:), resv(:)
integer(psb_mpk_), intent(in), optional :: root
integer(psb_mpk_), intent(in), optional :: szs(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_mpk_) :: root_
integer(psb_mpk_) :: iam, np, info,i
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
integer(psb_mpk_), allocatable :: displs(:)
logical :: collective_start, collective_end, collective_sync
#if defined(SERIAL_MPI)
resv(0) = dat
#else
call psb_info(ctxt,iam,np)
if (present(root)) then
root_ = root
else
root_ = -1
endif
icomm = psb_get_mpi_comm(ctxt)
if (present(mode)) then
collective_sync = .false.
collective_start = iand(mode,psb_collective_start_) /= 0
collective_end = iand(mode,psb_collective_end_) /= 0
if (.not.present(request)) then
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
else
collective_sync = .true.
collective_start = .false.
collective_end = .false.
end if
if (collective_sync) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_allgatherv(dat,size(dat),psb_mpi_c_dpk_,&
& resv,szs,displs,psb_mpi_c_dpk_,icomm,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_gatherv(dat,size(dat),psb_mpi_c_dpk_,&
& resv,szs,displs,psb_mpi_c_dpk_,root_,icomm,info)
endif
else
if (collective_start) then
if (root_ == -1) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
call mpi_iallgatherv(dat,size(dat),psb_mpi_c_dpk_,&
& resv,szs,displs,psb_mpi_c_dpk_,icomm,request,info)
else
if (iam == root_) then
if (size(szs) < np) write(0,*) 'Error: bad input sizes'
allocate(displs(np))
displs(1) = 0
do i=2, np
displs(i) = displs(i-1) + szs(i-1)
end do
else
allocate(displs(0))
end if
call mpi_igatherv(dat,size(dat),psb_mpi_c_dpk_,&
& resv,szs,displs,psb_mpi_c_dpk_,root_,icomm,request,info)
endif
else if (collective_end) then
call mpi_wait(request,status,info)
end if
end if
#endif
end subroutine psb_zgatherv_v
!
! SUM
!
@ -124,20 +376,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -190,20 +452,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -258,20 +530,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_,icomm,info)
end if
end if
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_, icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_, icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,root_, icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -328,20 +610,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_zamx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_zamx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -395,20 +687,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
psb_mpi_c_dpk_,mpi_zamx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -463,20 +765,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -532,20 +844,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,1,&
call mpi_allreduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_zamn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,1,&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,1,&
call mpi_iallreduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_zamn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,1,&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,1,&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,1,&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -599,20 +921,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -667,20 +999,30 @@ contains
end if
if (collective_sync) then
if (root_ == -1) then
call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_allreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,icomm,info)
else
call mpi_reduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info)
if(iam==root_) then
call mpi_reduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info)
else
call mpi_reduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info)
end if
endif
else
if (collective_start) then
if (root_ == -1) then
call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),&
call mpi_iallreduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,icomm,request,info)
else
call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info)
if(iam==root_) then
call mpi_ireduce(mpi_in_place,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info)
else
call mpi_ireduce(dat,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info)
end if
end if
else if (collective_end) then
call mpi_wait(request,status,info)
@ -901,12 +1243,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,1,&
call mpi_scan(dat_,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,1,&
call mpi_iscan(dat_,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -952,12 +1295,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,1,&
call mpi_exscan(dat_,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,1,&
call mpi_iexscan(dat_,dat,1,&
& psb_mpi_c_dpk_,mpi_sum,icomm,request,minfo)
else if (collective_end) then
call mpi_wait(request,status,minfo)
@ -980,12 +1324,13 @@ contains
complex(psb_dpk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
complex(psb_dpk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
icomm = psb_get_mpi_comm(ctxt)
@ -1003,12 +1348,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_scan(MPI_IN_PLACE,dat,size(dat),&
call mpi_scan(dat_,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iscan(dat_,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1029,12 +1375,13 @@ contains
complex(psb_dpk_), intent(inout) :: dat(:)
integer(psb_ipk_), intent(in), optional :: mode
integer(psb_mpk_), intent(inout), optional :: request
complex(psb_dpk_), allocatable :: dat_(:)
integer(psb_ipk_) :: iam, np, info
integer(psb_mpk_) :: minfo
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: status(mpi_status_size)
logical :: collective_start, collective_end, collective_sync
complex(psb_dpk_), allocatable :: dat_(:)
#if !defined(SERIAL_MPI)
call psb_info(ctxt,iam,np)
@ -1053,12 +1400,13 @@ contains
collective_start = .false.
collective_end = .false.
end if
dat_ = dat
if (collective_sync) then
call mpi_exscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_exscan(dat_,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,icomm,minfo)
else
if (collective_start) then
call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),&
call mpi_iexscan(dat_,dat,size(dat),&
& psb_mpi_c_dpk_,mpi_sum,icomm,request,info)
else if (collective_end) then
call mpi_wait(request,status,info)
@ -1271,6 +1619,5 @@ contains
Enddo
end subroutine psb_z_e_simple_triad_a2av
end module psi_z_collective_mod

@ -143,6 +143,20 @@ module psb_c_psblas_mod
end subroutine psb_caxpby
end interface
interface psb_abgdxyz
subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_c_vect_type, psb_cspmat_type
type(psb_c_vect_type), intent (inout) :: x
type(psb_c_vect_type), intent (inout) :: y
type(psb_c_vect_type), intent (inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_cabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_camax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_spk_, psb_ipk_, &

@ -143,6 +143,20 @@ module psb_d_psblas_mod
end subroutine psb_daxpby
end interface
interface psb_abgdxyz
subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_d_vect_type, psb_dspmat_type
type(psb_d_vect_type), intent (inout) :: x
type(psb_d_vect_type), intent (inout) :: y
type(psb_d_vect_type), intent (inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_dabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_damax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &

@ -143,6 +143,20 @@ module psb_s_psblas_mod
end subroutine psb_saxpby
end interface
interface psb_abgdxyz
subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_s_vect_type, psb_sspmat_type
type(psb_s_vect_type), intent (inout) :: x
type(psb_s_vect_type), intent (inout) :: y
type(psb_s_vect_type), intent (inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_sabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_samax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_spk_, psb_ipk_, &

@ -143,6 +143,20 @@ module psb_z_psblas_mod
end subroutine psb_zaxpby
end interface
interface psb_abgdxyz
subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_z_vect_type, psb_zspmat_type
type(psb_z_vect_type), intent (inout) :: x
type(psb_z_vect_type), intent (inout) :: y
type(psb_z_vect_type), intent (inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_zabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_zamax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &

@ -155,6 +155,9 @@ module psb_c_base_vect_mod
procedure, pass(z) :: axpby_v2 => c_base_axpby_v2
procedure, pass(z) :: axpby_a2 => c_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => c_base_abgdxyz
procedure, pass(w) :: xyzw => c_base_xyzw
!
! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners
@ -1018,7 +1021,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine c_base_axpby_v(m,alpha, x, beta, y, info)
@ -1047,7 +1050,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned
!! \param info return code
@ -1078,7 +1081,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x(:) The array to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine c_base_axpby_a(m,alpha, x, beta, y, info)
@ -1126,6 +1129,64 @@ contains
end subroutine c_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_c_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_c_base_vect_type), intent(inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=czero)) call x%sync()
if (y%is_dev().and.(beta/=czero)) call y%sync()
if (z%is_dev().and.(delta/=czero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine c_base_abgdxyz
subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_c_base_vect_type), intent(inout) :: z
class(psb_c_base_vect_type), intent(inout) :: w
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=czero)) call x%sync()
if (y%is_dev().and.(b/=czero)) call y%sync()
if (z%is_dev().and.(d/=czero)) call z%sync()
if (w%is_dev().and.(f/=czero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine c_base_xyzw
!
! Multiple variants of two operations:

@ -579,7 +579,111 @@ module psb_c_csr_mat_mod
end subroutine psb_c_csr_scals
end interface
!> \namespace psb_base_mod \class psb_lc_csr_sparse_mat
type, extends(psb_c_csr_sparse_mat) :: psb_c_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => c_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_c_ecsr_csmm
procedure, pass(a) :: csmv => psb_c_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_c_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_c_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_c_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_c_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_c_ecsr_cmp_nerwp
procedure, pass(a) :: free => c_ecsr_free
procedure, pass(a) :: mold => psb_c_ecsr_mold
end type psb_c_ecsr_sparse_mat
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_csmv
interface
subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_c_ecsr_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_c_ecsr_csmv
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo
interface
subroutine psb_c_ecsr_cmp_nerwp(a,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_ecsr_cmp_nerwp
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo
interface
subroutine psb_c_cp_ecsr_from_coo(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_cp_ecsr_from_coo
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_cp_from_fmt
interface
subroutine psb_c_cp_ecsr_from_fmt(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_cp_ecsr_from_fmt
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_mv_from_coo
interface
subroutine psb_c_mv_ecsr_from_coo(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_mv_ecsr_from_coo
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_mv_from_fmt
interface
subroutine psb_c_mv_ecsr_from_fmt(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_mv_ecsr_from_fmt
end interface
!> \memberof psb_c_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_c_ecsr_mold(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_lc_csr_sparse_mat
!! \extends psb_lc_base_mat_mod::psb_lc_base_sparse_mat
!!
!! psb_lc_csr_sparse_mat type and the related methods.
@ -1178,6 +1282,26 @@ contains
function c_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function c_ecsr_get_fmt
subroutine c_ecsr_free(a)
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_c_csr_sparse_mat%free()
return
end subroutine c_ecsr_free
! == ===================================
!
!

@ -79,12 +79,14 @@
module psb_c_mat_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat
use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat,&
& psb_c_ecsr_sparse_mat
use psb_c_csc_mat_mod, only : psb_c_csc_sparse_mat, psb_lc_csc_sparse_mat
type :: psb_cspmat_type
class(psb_c_base_sparse_mat), allocatable :: a
class(psb_c_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_lc_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,7 @@ module psb_c_mat_mod
procedure, pass(a) :: cscnv_ip => psb_c_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_c_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_c_split_nd
procedure, pass(a) :: clone => psb_cspmat_clone
procedure, pass(a) :: move_alloc => psb_cspmat_type_move
!
@ -840,6 +843,18 @@ module psb_c_mat_mod
!
!
interface
subroutine psb_c_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold
end subroutine psb_c_split_nd
end interface
!
! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target
@ -859,7 +874,6 @@ module psb_c_mat_mod
end subroutine psb_c_cscnv
end interface
interface
subroutine psb_c_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat
@ -871,7 +885,6 @@ module psb_c_mat_mod
end subroutine psb_c_cscnv_ip
end interface
interface
subroutine psb_c_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat

@ -102,6 +102,9 @@ module psb_c_vect_mod
procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => c_vect_abgdxyz
procedure, pass(z) :: xyzw => c_vect_xyzw
procedure, pass(y) :: mlt_v => c_vect_mlt_v
procedure, pass(y) :: mlt_a => c_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2
@ -771,6 +774,38 @@ contains
end subroutine c_vect_axpby_a2
subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_vect_type), intent(inout) :: x
class(psb_c_vect_type), intent(inout) :: y
class(psb_c_vect_type), intent(inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine c_vect_abgdxyz
subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_vect_type), intent(inout) :: x
class(psb_c_vect_type), intent(inout) :: y
class(psb_c_vect_type), intent(inout) :: z
class(psb_c_vect_type), intent(inout) :: w
complex(psb_spk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine c_vect_xyzw
subroutine c_vect_mlt_v(x, y, info)
use psi_serial_mod
implicit none
@ -1134,7 +1169,7 @@ contains
end if
end function c_vect_nrm2_weight
function c_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res)
use psi_serial_mod
implicit none

@ -155,6 +155,9 @@ module psb_d_base_vect_mod
procedure, pass(z) :: axpby_v2 => d_base_axpby_v2
procedure, pass(z) :: axpby_a2 => d_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => d_base_abgdxyz
procedure, pass(w) :: xyzw => d_base_xyzw
!
! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners
@ -1025,7 +1028,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine d_base_axpby_v(m,alpha, x, beta, y, info)
@ -1054,7 +1057,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned
!! \param info return code
@ -1085,7 +1088,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x(:) The array to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine d_base_axpby_a(m,alpha, x, beta, y, info)
@ -1133,6 +1136,64 @@ contains
end subroutine d_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_d_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_base_vect_type), intent(inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=dzero)) call x%sync()
if (y%is_dev().and.(beta/=dzero)) call y%sync()
if (z%is_dev().and.(delta/=dzero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine d_base_abgdxyz
subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_base_vect_type), intent(inout) :: z
class(psb_d_base_vect_type), intent(inout) :: w
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=dzero)) call x%sync()
if (y%is_dev().and.(b/=dzero)) call y%sync()
if (z%is_dev().and.(d/=dzero)) call z%sync()
if (w%is_dev().and.(f/=dzero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine d_base_xyzw
!
! Multiple variants of two operations:

@ -579,7 +579,111 @@ module psb_d_csr_mat_mod
end subroutine psb_d_csr_scals
end interface
!> \namespace psb_base_mod \class psb_ld_csr_sparse_mat
type, extends(psb_d_csr_sparse_mat) :: psb_d_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => d_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_d_ecsr_csmm
procedure, pass(a) :: csmv => psb_d_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_d_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_d_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_d_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_d_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_d_ecsr_cmp_nerwp
procedure, pass(a) :: free => d_ecsr_free
procedure, pass(a) :: mold => psb_d_ecsr_mold
end type psb_d_ecsr_sparse_mat
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_csmv
interface
subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_d_ecsr_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_ecsr_csmv
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo
interface
subroutine psb_d_ecsr_cmp_nerwp(a,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_ecsr_cmp_nerwp
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo
interface
subroutine psb_d_cp_ecsr_from_coo(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_cp_ecsr_from_coo
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_cp_from_fmt
interface
subroutine psb_d_cp_ecsr_from_fmt(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_cp_ecsr_from_fmt
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_mv_from_coo
interface
subroutine psb_d_mv_ecsr_from_coo(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_mv_ecsr_from_coo
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_mv_from_fmt
interface
subroutine psb_d_mv_ecsr_from_fmt(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_mv_ecsr_from_fmt
end interface
!> \memberof psb_d_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_d_ecsr_mold(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_ld_csr_sparse_mat
!! \extends psb_ld_base_mat_mod::psb_ld_base_sparse_mat
!!
!! psb_ld_csr_sparse_mat type and the related methods.
@ -1178,6 +1282,26 @@ contains
function d_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function d_ecsr_get_fmt
subroutine d_ecsr_free(a)
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_d_csr_sparse_mat%free()
return
end subroutine d_ecsr_free
! == ===================================
!
!

@ -79,12 +79,14 @@
module psb_d_mat_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat
use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat,&
& psb_d_ecsr_sparse_mat
use psb_d_csc_mat_mod, only : psb_d_csc_sparse_mat, psb_ld_csc_sparse_mat
type :: psb_dspmat_type
class(psb_d_base_sparse_mat), allocatable :: a
class(psb_d_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_ld_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,7 @@ module psb_d_mat_mod
procedure, pass(a) :: cscnv_ip => psb_d_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_d_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_d_split_nd
procedure, pass(a) :: clone => psb_dspmat_clone
procedure, pass(a) :: move_alloc => psb_dspmat_type_move
!
@ -840,6 +843,18 @@ module psb_d_mat_mod
!
!
interface
subroutine psb_d_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold
end subroutine psb_d_split_nd
end interface
!
! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target
@ -859,7 +874,6 @@ module psb_d_mat_mod
end subroutine psb_d_cscnv
end interface
interface
subroutine psb_d_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat
@ -871,7 +885,6 @@ module psb_d_mat_mod
end subroutine psb_d_cscnv_ip
end interface
interface
subroutine psb_d_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat

@ -102,6 +102,9 @@ module psb_d_vect_mod
procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => d_vect_abgdxyz
procedure, pass(z) :: xyzw => d_vect_xyzw
procedure, pass(y) :: mlt_v => d_vect_mlt_v
procedure, pass(y) :: mlt_a => d_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2
@ -778,6 +781,38 @@ contains
end subroutine d_vect_axpby_a2
subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_vect_type), intent(inout) :: x
class(psb_d_vect_type), intent(inout) :: y
class(psb_d_vect_type), intent(inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine d_vect_abgdxyz
subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_vect_type), intent(inout) :: x
class(psb_d_vect_type), intent(inout) :: y
class(psb_d_vect_type), intent(inout) :: z
class(psb_d_vect_type), intent(inout) :: w
real(psb_dpk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine d_vect_xyzw
subroutine d_vect_mlt_v(x, y, info)
use psi_serial_mod
implicit none
@ -1141,7 +1176,7 @@ contains
end if
end function d_vect_nrm2_weight
function d_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res)
use psi_serial_mod
implicit none

@ -155,6 +155,9 @@ module psb_s_base_vect_mod
procedure, pass(z) :: axpby_v2 => s_base_axpby_v2
procedure, pass(z) :: axpby_a2 => s_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => s_base_abgdxyz
procedure, pass(w) :: xyzw => s_base_xyzw
!
! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners
@ -1025,7 +1028,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine s_base_axpby_v(m,alpha, x, beta, y, info)
@ -1054,7 +1057,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned
!! \param info return code
@ -1085,7 +1088,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x(:) The array to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine s_base_axpby_a(m,alpha, x, beta, y, info)
@ -1133,6 +1136,64 @@ contains
end subroutine s_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_s_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_base_vect_type), intent(inout) :: x
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_s_base_vect_type), intent(inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=szero)) call x%sync()
if (y%is_dev().and.(beta/=szero)) call y%sync()
if (z%is_dev().and.(delta/=szero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine s_base_abgdxyz
subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_base_vect_type), intent(inout) :: x
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_s_base_vect_type), intent(inout) :: z
class(psb_s_base_vect_type), intent(inout) :: w
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=szero)) call x%sync()
if (y%is_dev().and.(b/=szero)) call y%sync()
if (z%is_dev().and.(d/=szero)) call z%sync()
if (w%is_dev().and.(f/=szero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine s_base_xyzw
!
! Multiple variants of two operations:

@ -579,7 +579,111 @@ module psb_s_csr_mat_mod
end subroutine psb_s_csr_scals
end interface
!> \namespace psb_base_mod \class psb_ls_csr_sparse_mat
type, extends(psb_s_csr_sparse_mat) :: psb_s_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => s_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_s_ecsr_csmm
procedure, pass(a) :: csmv => psb_s_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_s_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_s_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_s_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_s_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_s_ecsr_cmp_nerwp
procedure, pass(a) :: free => s_ecsr_free
procedure, pass(a) :: mold => psb_s_ecsr_mold
end type psb_s_ecsr_sparse_mat
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_csmv
interface
subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_s_ecsr_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta, x(:)
real(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_s_ecsr_csmv
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo
interface
subroutine psb_s_ecsr_cmp_nerwp(a,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_ecsr_cmp_nerwp
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo
interface
subroutine psb_s_cp_ecsr_from_coo(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_cp_ecsr_from_coo
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_cp_from_fmt
interface
subroutine psb_s_cp_ecsr_from_fmt(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_cp_ecsr_from_fmt
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_mv_from_coo
interface
subroutine psb_s_mv_ecsr_from_coo(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_mv_ecsr_from_coo
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_mv_from_fmt
interface
subroutine psb_s_mv_ecsr_from_fmt(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_mv_ecsr_from_fmt
end interface
!> \memberof psb_s_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_s_ecsr_mold(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_ls_csr_sparse_mat
!! \extends psb_ls_base_mat_mod::psb_ls_base_sparse_mat
!!
!! psb_ls_csr_sparse_mat type and the related methods.
@ -1178,6 +1282,26 @@ contains
function s_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function s_ecsr_get_fmt
subroutine s_ecsr_free(a)
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_s_csr_sparse_mat%free()
return
end subroutine s_ecsr_free
! == ===================================
!
!

@ -79,12 +79,14 @@
module psb_s_mat_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat
use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat,&
& psb_s_ecsr_sparse_mat
use psb_s_csc_mat_mod, only : psb_s_csc_sparse_mat, psb_ls_csc_sparse_mat
type :: psb_sspmat_type
class(psb_s_base_sparse_mat), allocatable :: a
class(psb_s_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_ls_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,7 @@ module psb_s_mat_mod
procedure, pass(a) :: cscnv_ip => psb_s_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_s_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_s_split_nd
procedure, pass(a) :: clone => psb_sspmat_clone
procedure, pass(a) :: move_alloc => psb_sspmat_type_move
!
@ -840,6 +843,18 @@ module psb_s_mat_mod
!
!
interface
subroutine psb_s_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold
end subroutine psb_s_split_nd
end interface
!
! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target
@ -859,7 +874,6 @@ module psb_s_mat_mod
end subroutine psb_s_cscnv
end interface
interface
subroutine psb_s_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat
@ -871,7 +885,6 @@ module psb_s_mat_mod
end subroutine psb_s_cscnv_ip
end interface
interface
subroutine psb_s_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat

@ -102,6 +102,9 @@ module psb_s_vect_mod
procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => s_vect_abgdxyz
procedure, pass(z) :: xyzw => s_vect_xyzw
procedure, pass(y) :: mlt_v => s_vect_mlt_v
procedure, pass(y) :: mlt_a => s_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2
@ -778,6 +781,38 @@ contains
end subroutine s_vect_axpby_a2
subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_vect_type), intent(inout) :: x
class(psb_s_vect_type), intent(inout) :: y
class(psb_s_vect_type), intent(inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine s_vect_abgdxyz
subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_vect_type), intent(inout) :: x
class(psb_s_vect_type), intent(inout) :: y
class(psb_s_vect_type), intent(inout) :: z
class(psb_s_vect_type), intent(inout) :: w
real(psb_spk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine s_vect_xyzw
subroutine s_vect_mlt_v(x, y, info)
use psi_serial_mod
implicit none
@ -1141,7 +1176,7 @@ contains
end if
end function s_vect_nrm2_weight
function s_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res)
use psi_serial_mod
implicit none

@ -155,6 +155,9 @@ module psb_z_base_vect_mod
procedure, pass(z) :: axpby_v2 => z_base_axpby_v2
procedure, pass(z) :: axpby_a2 => z_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => z_base_abgdxyz
procedure, pass(w) :: xyzw => z_base_xyzw
!
! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners
@ -1018,7 +1021,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine z_base_axpby_v(m,alpha, x, beta, y, info)
@ -1047,7 +1050,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned
!! \param info return code
@ -1078,7 +1081,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x(:) The array to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine z_base_axpby_a(m,alpha, x, beta, y, info)
@ -1126,6 +1129,64 @@ contains
end subroutine z_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_z_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_base_vect_type), intent(inout) :: x
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_z_base_vect_type), intent(inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=zzero)) call x%sync()
if (y%is_dev().and.(beta/=zzero)) call y%sync()
if (z%is_dev().and.(delta/=zzero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine z_base_abgdxyz
subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_base_vect_type), intent(inout) :: x
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_z_base_vect_type), intent(inout) :: z
class(psb_z_base_vect_type), intent(inout) :: w
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=zzero)) call x%sync()
if (y%is_dev().and.(b/=zzero)) call y%sync()
if (z%is_dev().and.(d/=zzero)) call z%sync()
if (w%is_dev().and.(f/=zzero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine z_base_xyzw
!
! Multiple variants of two operations:

@ -579,7 +579,111 @@ module psb_z_csr_mat_mod
end subroutine psb_z_csr_scals
end interface
!> \namespace psb_base_mod \class psb_lz_csr_sparse_mat
type, extends(psb_z_csr_sparse_mat) :: psb_z_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => z_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_z_ecsr_csmm
procedure, pass(a) :: csmv => psb_z_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_z_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_z_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_z_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_z_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_z_ecsr_cmp_nerwp
procedure, pass(a) :: free => z_ecsr_free
procedure, pass(a) :: mold => psb_z_ecsr_mold
end type psb_z_ecsr_sparse_mat
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_csmv
interface
subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_z_ecsr_sparse_mat), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
complex(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_z_ecsr_csmv
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo
interface
subroutine psb_z_ecsr_cmp_nerwp(a,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_ecsr_cmp_nerwp
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo
interface
subroutine psb_z_cp_ecsr_from_coo(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_cp_ecsr_from_coo
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_cp_from_fmt
interface
subroutine psb_z_cp_ecsr_from_fmt(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_cp_ecsr_from_fmt
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_mv_from_coo
interface
subroutine psb_z_mv_ecsr_from_coo(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_mv_ecsr_from_coo
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_mv_from_fmt
interface
subroutine psb_z_mv_ecsr_from_fmt(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_mv_ecsr_from_fmt
end interface
!> \memberof psb_z_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_z_ecsr_mold(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(in) :: a
class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_lz_csr_sparse_mat
!! \extends psb_lz_base_mat_mod::psb_lz_base_sparse_mat
!!
!! psb_lz_csr_sparse_mat type and the related methods.
@ -1178,6 +1282,26 @@ contains
function z_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function z_ecsr_get_fmt
subroutine z_ecsr_free(a)
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_z_csr_sparse_mat%free()
return
end subroutine z_ecsr_free
! == ===================================
!
!

@ -79,12 +79,14 @@
module psb_z_mat_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat
use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat,&
& psb_z_ecsr_sparse_mat
use psb_z_csc_mat_mod, only : psb_z_csc_sparse_mat, psb_lz_csc_sparse_mat
type :: psb_zspmat_type
class(psb_z_base_sparse_mat), allocatable :: a
class(psb_z_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_lz_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,7 @@ module psb_z_mat_mod
procedure, pass(a) :: cscnv_ip => psb_z_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_z_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_z_split_nd
procedure, pass(a) :: clone => psb_zspmat_clone
procedure, pass(a) :: move_alloc => psb_zspmat_type_move
!
@ -840,6 +843,18 @@ module psb_z_mat_mod
!
!
interface
subroutine psb_z_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold
end subroutine psb_z_split_nd
end interface
!
! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target
@ -859,7 +874,6 @@ module psb_z_mat_mod
end subroutine psb_z_cscnv
end interface
interface
subroutine psb_z_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat
@ -871,7 +885,6 @@ module psb_z_mat_mod
end subroutine psb_z_cscnv_ip
end interface
interface
subroutine psb_z_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat

@ -102,6 +102,9 @@ module psb_z_vect_mod
procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => z_vect_abgdxyz
procedure, pass(z) :: xyzw => z_vect_xyzw
procedure, pass(y) :: mlt_v => z_vect_mlt_v
procedure, pass(y) :: mlt_a => z_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2
@ -771,6 +774,38 @@ contains
end subroutine z_vect_axpby_a2
subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_vect_type), intent(inout) :: x
class(psb_z_vect_type), intent(inout) :: y
class(psb_z_vect_type), intent(inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine z_vect_abgdxyz
subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_vect_type), intent(inout) :: x
class(psb_z_vect_type), intent(inout) :: y
class(psb_z_vect_type), intent(inout) :: z
class(psb_z_vect_type), intent(inout) :: w
complex(psb_dpk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine z_vect_xyzw
subroutine z_vect_mlt_v(x, y, info)
use psi_serial_mod
implicit none
@ -1134,7 +1169,7 @@ contains
end if
end function z_vect_nrm2_weight
function z_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res)
use psi_serial_mod
implicit none

@ -250,7 +250,7 @@ Module psb_c_tools_mod
end interface
interface psb_spasb
subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import
implicit none
type(psb_cspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_c_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_c_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_cspasb
end interface

@ -250,7 +250,7 @@ Module psb_d_tools_mod
end interface
interface psb_spasb
subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import
implicit none
type(psb_dspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_d_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_d_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_dspasb
end interface

@ -250,7 +250,7 @@ Module psb_s_tools_mod
end interface
interface psb_spasb
subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import
implicit none
type(psb_sspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_s_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_s_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_sspasb
end interface

@ -250,7 +250,7 @@ Module psb_z_tools_mod
end interface
interface psb_spasb
subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import
implicit none
type(psb_zspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_z_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_z_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_zspasb
end interface

@ -741,3 +741,86 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info)
return
end subroutine psb_caddconst_vect
subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect
implicit none
type(psb_c_vect_type), intent (inout) :: x
type(psb_c_vect_type), intent (inout) :: y
type(psb_c_vect_type), intent (inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_c_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_cabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err
logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_cspmv'
info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name)
goto 9999
end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
@ -178,14 +194,46 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then
! Matrix is not transposed
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& czero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& czero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& czero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,cone,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& czero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
call psb_csmm(alpha,a,x,beta,y,info)
if(info /= psb_success_) then
info = psb_err_from_subroutine_non_
call psb_errpush(info,name)

@ -741,3 +741,86 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info)
return
end subroutine psb_daddconst_vect
subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect
implicit none
type(psb_d_vect_type), intent (inout) :: x
type(psb_d_vect_type), intent (inout) :: y
type(psb_d_vect_type), intent (inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_d_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err
logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_dspmv'
info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name)
goto 9999
end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
@ -178,14 +194,46 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then
! Matrix is not transposed
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,done,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
call psb_csmm(alpha,a,x,beta,y,info)
if(info /= psb_success_) then
info = psb_err_from_subroutine_non_
call psb_errpush(info,name)

@ -741,3 +741,86 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info)
return
end subroutine psb_saddconst_vect
subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect
implicit none
type(psb_s_vect_type), intent (inout) :: x
type(psb_s_vect_type), intent (inout) :: y
type(psb_s_vect_type), intent (inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_s_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_sabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err
logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_sspmv'
info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name)
goto 9999
end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
@ -178,14 +194,46 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then
! Matrix is not transposed
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& szero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& szero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& szero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,sone,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& szero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
call psb_csmm(alpha,a,x,beta,y,info)
if(info /= psb_success_) then
info = psb_err_from_subroutine_non_
call psb_errpush(info,name)

@ -741,3 +741,86 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info)
return
end subroutine psb_zaddconst_vect
subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect
implicit none
type(psb_z_vect_type), intent (inout) :: x
type(psb_z_vect_type), intent (inout) :: y
type(psb_z_vect_type), intent (inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_z_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_zabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err
logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_zspmv'
info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name)
goto 9999
end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
@ -178,14 +194,46 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then
! Matrix is not transposed
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,zone,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
call psb_csmm(alpha,a,x,beta,y,info)
if(info /= psb_success_) then
info = psb_err_from_subroutine_non_
call psb_errpush(info,name)

@ -4312,6 +4312,266 @@ contains
end subroutine psb_ccsrspspmm
#endif
subroutine psb_c_ecsr_mold(a,b,info)
use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_mold
use psb_error_mod
implicit none
class(psb_c_ecsr_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_c_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_ecsr_mold
subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_csmv
implicit none
class(psb_c_ecsr_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='c_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == cone).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_c_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_c_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_c_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
complex(psb_spk_), intent(in) :: alpha, x(*),val(*)
complex(psb_spk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
complex(psb_spk_) :: acc
if (alpha == czero) return
if (alpha == cone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = czero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -cone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = czero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = czero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_c_ecsr_csmv_inner
end subroutine psb_c_ecsr_csmv
subroutine psb_c_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_cmp_nerwp
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_c_ecsr_cmp_nerwp
subroutine psb_c_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_coo
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_cp_ecsr_from_coo
subroutine psb_c_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_coo
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_mv_ecsr_from_coo
subroutine psb_c_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_fmt
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_mv_ecsr_from_fmt
subroutine psb_c_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_c_base_mat_mod
use psb_realloc_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_fmt
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_cp_ecsr_from_fmt
!
!
! lc version

@ -1213,6 +1213,56 @@ subroutine psb_c_b_csclip(a,b,info,&
end subroutine psb_c_b_csclip
subroutine psb_c_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_c_mat_mod, psb_protect_name => psb_c_split_nd
implicit none
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold
type(psb_c_coo_sparse_mat) :: acoo
type(psb_c_csr_sparse_mat), allocatable :: aclip
type(psb_c_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_split_nd
subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod
use psb_string_mod
@ -1246,54 +1296,65 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999
end if
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_c_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_c_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
select case (psb_toupper(type))
case ('CSR')
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_c_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_c_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a%a, info)
call altmp%cp_from_fmt(a%a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call move_alloc(altmp,b%a)
call b%trim()
call b%set_asb()
call psb_erractionrestore(err_act)
@ -1303,7 +1364,79 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_c_base_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_c_base_sparse_mat), intent(in), optional :: mold
class(psb_c_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_c_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_c_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_c_cscnv
subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
@ -1312,13 +1445,12 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
use psb_c_mat_mod, psb_protect_name => psb_c_cscnv_ip
implicit none
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_c_base_sparse_mat), intent(in), optional :: mold
class(psb_c_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip'
@ -1345,46 +1477,55 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
goto 9999
end if
if (present(mold)) then
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_c_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_c_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
allocate(altmp, mold=mold,stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
else if (present(type)) then
if (debug) write(psb_err_unit,*) 'Converting in-place from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
select case (psb_toupper(type))
case ('CSR')
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_c_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_c_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (debug) write(psb_err_unit,*) 'Converting in-place from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a%a)
call a%trim()
call a%set_asb()
call psb_erractionrestore(err_act)
@ -1394,6 +1535,77 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_c_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_c_base_sparse_mat), intent(in), optional :: mold
class(psb_c_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_c_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_c_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_c_cscnv_ip

@ -4312,6 +4312,266 @@ contains
end subroutine psb_dcsrspspmm
#endif
subroutine psb_d_ecsr_mold(a,b,info)
use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_mold
use psb_error_mod
implicit none
class(psb_d_ecsr_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_d_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_ecsr_mold
subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_csmv
implicit none
class(psb_d_ecsr_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='d_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == done).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_d_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_d_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_d_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
real(psb_dpk_), intent(in) :: alpha, x(*),val(*)
real(psb_dpk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
real(psb_dpk_) :: acc
if (alpha == dzero) return
if (alpha == done) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -done) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_d_ecsr_csmv_inner
end subroutine psb_d_ecsr_csmv
subroutine psb_d_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_cmp_nerwp
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_d_ecsr_cmp_nerwp
subroutine psb_d_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_coo
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_cp_ecsr_from_coo
subroutine psb_d_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_coo
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_mv_ecsr_from_coo
subroutine psb_d_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_fmt
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_mv_ecsr_from_fmt
subroutine psb_d_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_d_base_mat_mod
use psb_realloc_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_fmt
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_cp_ecsr_from_fmt
!
!
! ld version

@ -1213,6 +1213,56 @@ subroutine psb_d_b_csclip(a,b,info,&
end subroutine psb_d_b_csclip
subroutine psb_d_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_d_mat_mod, psb_protect_name => psb_d_split_nd
implicit none
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold
type(psb_d_coo_sparse_mat) :: acoo
type(psb_d_csr_sparse_mat), allocatable :: aclip
type(psb_d_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_split_nd
subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod
use psb_string_mod
@ -1246,54 +1296,65 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999
end if
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_d_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_d_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
select case (psb_toupper(type))
case ('CSR')
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_d_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_d_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a%a, info)
call altmp%cp_from_fmt(a%a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call move_alloc(altmp,b%a)
call b%trim()
call b%set_asb()
call psb_erractionrestore(err_act)
@ -1303,7 +1364,79 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_d_base_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_d_base_sparse_mat), intent(in), optional :: mold
class(psb_d_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_d_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_d_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_d_cscnv
subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
@ -1312,13 +1445,12 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
use psb_d_mat_mod, psb_protect_name => psb_d_cscnv_ip
implicit none
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_d_base_sparse_mat), intent(in), optional :: mold
class(psb_d_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip'
@ -1345,46 +1477,55 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
goto 9999
end if
if (present(mold)) then
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_d_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_d_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
allocate(altmp, mold=mold,stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
else if (present(type)) then
if (debug) write(psb_err_unit,*) 'Converting in-place from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
select case (psb_toupper(type))
case ('CSR')
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_d_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_d_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (debug) write(psb_err_unit,*) 'Converting in-place from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a%a)
call a%trim()
call a%set_asb()
call psb_erractionrestore(err_act)
@ -1394,6 +1535,77 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_d_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_d_base_sparse_mat), intent(in), optional :: mold
class(psb_d_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_d_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_d_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_d_cscnv_ip

@ -4312,6 +4312,266 @@ contains
end subroutine psb_scsrspspmm
#endif
subroutine psb_s_ecsr_mold(a,b,info)
use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_mold
use psb_error_mod
implicit none
class(psb_s_ecsr_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_s_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_ecsr_mold
subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_csmv
implicit none
class(psb_s_ecsr_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta, x(:)
real(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='s_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == sone).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_s_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_s_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_s_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
real(psb_spk_), intent(in) :: alpha, x(*),val(*)
real(psb_spk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
real(psb_spk_) :: acc
if (alpha == szero) return
if (alpha == sone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = szero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -sone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = szero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = szero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_s_ecsr_csmv_inner
end subroutine psb_s_ecsr_csmv
subroutine psb_s_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_cmp_nerwp
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_s_ecsr_cmp_nerwp
subroutine psb_s_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_coo
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_cp_ecsr_from_coo
subroutine psb_s_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_coo
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_mv_ecsr_from_coo
subroutine psb_s_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_fmt
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_mv_ecsr_from_fmt
subroutine psb_s_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_s_base_mat_mod
use psb_realloc_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_fmt
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_cp_ecsr_from_fmt
!
!
! ls version

@ -1213,6 +1213,56 @@ subroutine psb_s_b_csclip(a,b,info,&
end subroutine psb_s_b_csclip
subroutine psb_s_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_s_mat_mod, psb_protect_name => psb_s_split_nd
implicit none
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold
type(psb_s_coo_sparse_mat) :: acoo
type(psb_s_csr_sparse_mat), allocatable :: aclip
type(psb_s_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_split_nd
subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod
use psb_string_mod
@ -1246,54 +1296,65 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999
end if
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_s_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_s_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
select case (psb_toupper(type))
case ('CSR')
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_s_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_s_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a%a, info)
call altmp%cp_from_fmt(a%a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call move_alloc(altmp,b%a)
call b%trim()
call b%set_asb()
call psb_erractionrestore(err_act)
@ -1303,7 +1364,79 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_s_base_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_s_base_sparse_mat), intent(in), optional :: mold
class(psb_s_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_s_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_s_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_s_cscnv
subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
@ -1312,13 +1445,12 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
use psb_s_mat_mod, psb_protect_name => psb_s_cscnv_ip
implicit none
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_s_base_sparse_mat), intent(in), optional :: mold
class(psb_s_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip'
@ -1345,46 +1477,55 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
goto 9999
end if
if (present(mold)) then
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_s_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_s_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
allocate(altmp, mold=mold,stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
else if (present(type)) then
if (debug) write(psb_err_unit,*) 'Converting in-place from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
select case (psb_toupper(type))
case ('CSR')
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_s_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_s_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (debug) write(psb_err_unit,*) 'Converting in-place from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a%a)
call a%trim()
call a%set_asb()
call psb_erractionrestore(err_act)
@ -1394,6 +1535,77 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_s_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_s_base_sparse_mat), intent(in), optional :: mold
class(psb_s_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_s_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_s_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_s_cscnv_ip

@ -4312,6 +4312,266 @@ contains
end subroutine psb_zcsrspspmm
#endif
subroutine psb_z_ecsr_mold(a,b,info)
use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_mold
use psb_error_mod
implicit none
class(psb_z_ecsr_sparse_mat), intent(in) :: a
class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_z_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_ecsr_mold
subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_csmv
implicit none
class(psb_z_ecsr_sparse_mat), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
complex(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='z_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == zone).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_z_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_z_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_z_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
complex(psb_dpk_), intent(in) :: alpha, x(*),val(*)
complex(psb_dpk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
complex(psb_dpk_) :: acc
if (alpha == zzero) return
if (alpha == zone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = zzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -zone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = zzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = zzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_z_ecsr_csmv_inner
end subroutine psb_z_ecsr_csmv
subroutine psb_z_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_cmp_nerwp
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_z_ecsr_cmp_nerwp
subroutine psb_z_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_coo
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_cp_ecsr_from_coo
subroutine psb_z_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_coo
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_mv_ecsr_from_coo
subroutine psb_z_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_fmt
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_mv_ecsr_from_fmt
subroutine psb_z_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_z_base_mat_mod
use psb_realloc_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_fmt
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_cp_ecsr_from_fmt
!
!
! lz version

@ -1213,6 +1213,56 @@ subroutine psb_z_b_csclip(a,b,info,&
end subroutine psb_z_b_csclip
subroutine psb_z_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_z_mat_mod, psb_protect_name => psb_z_split_nd
implicit none
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold
type(psb_z_coo_sparse_mat) :: acoo
type(psb_z_csr_sparse_mat), allocatable :: aclip
type(psb_z_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_split_nd
subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod
use psb_string_mod
@ -1246,54 +1296,65 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999
end if
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_z_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_z_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
select case (psb_toupper(type))
case ('CSR')
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_z_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_z_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a%a, info)
call altmp%cp_from_fmt(a%a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call move_alloc(altmp,b%a)
call b%trim()
call b%set_asb()
call psb_erractionrestore(err_act)
@ -1303,7 +1364,79 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_z_base_sparse_mat), intent(in) :: a
class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_z_base_sparse_mat), intent(in), optional :: mold
class(psb_z_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_z_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_z_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_z_cscnv
subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
@ -1312,13 +1445,12 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
use psb_z_mat_mod, psb_protect_name => psb_z_cscnv_ip
implicit none
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_z_base_sparse_mat), intent(in), optional :: mold
class(psb_z_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip'
@ -1345,46 +1477,55 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
goto 9999
end if
if (present(mold)) then
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_z_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_z_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
allocate(altmp, mold=mold,stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
else if (present(type)) then
if (debug) write(psb_err_unit,*) 'Converting in-place from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
select case (psb_toupper(type))
case ('CSR')
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_z_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_z_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
allocate(altmp, mold=psb_get_mat_default(a),stat=info)
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (debug) write(psb_err_unit,*) 'Converting in-place from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a%a)
call a%trim()
call a%set_asb()
call psb_erractionrestore(err_act)
@ -1394,6 +1535,77 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_z_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_z_base_sparse_mat), intent(in), optional :: mold
class(psb_z_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_z_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_z_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_z_cscnv_ip

@ -1567,3 +1567,300 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine caxpbyv2
subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='cabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == czero) then
if (gamma == czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = czero
end do
else if (delta /= czero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = czero
end do
else if (delta /= czero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = czero ! gamma*y(i)
end do
else if (delta /= czero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= czero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= czero) then
if (gamma == czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = czero
end do
else if (delta /= czero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = czero
end do
else if (delta /= czero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= czero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= czero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_cabgdxyz
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (inout) :: w(:)
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='cabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==czero).or.(b==czero).or. &
& (c==czero).or.(d==czero).or.&
& (e==czero).or.(f==czero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_cxyzw

@ -1567,3 +1567,300 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine daxpbyv2
subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='dabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == dzero) then
if (gamma == dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = dzero
end do
else if (delta /= dzero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = dzero
end do
else if (delta /= dzero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = dzero ! gamma*y(i)
end do
else if (delta /= dzero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= dzero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= dzero) then
if (gamma == dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = dzero
end do
else if (delta /= dzero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = dzero
end do
else if (delta /= dzero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= dzero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= dzero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_dabgdxyz
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (inout) :: w(:)
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='dabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==dzero).or.(b==dzero).or. &
& (c==dzero).or.(d==dzero).or.&
& (e==dzero).or.(f==dzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_dxyzw

@ -1567,3 +1567,300 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine eaxpbyv2
subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='eabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == ezero) then
if (gamma == ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = ezero
end do
else if (delta /= ezero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = ezero
end do
else if (delta /= ezero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = ezero ! gamma*y(i)
end do
else if (delta /= ezero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= ezero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= ezero) then
if (gamma == ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = ezero
end do
else if (delta /= ezero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = ezero
end do
else if (delta /= ezero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= ezero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= ezero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_eabgdxyz
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (inout) :: w(:)
integer(psb_epk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='eabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==ezero).or.(b==ezero).or. &
& (c==ezero).or.(d==ezero).or.&
& (e==ezero).or.(f==ezero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_exyzw

@ -1567,3 +1567,300 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine i2axpbyv2
subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='i2abgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == i2zero) then
if (gamma == i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = i2zero ! gamma*y(i)
end do
else if (delta /= i2zero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= i2zero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= i2zero) then
if (gamma == i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= i2zero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= i2zero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_i2abgdxyz
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (inout) :: w(:)
integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='i2abgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==i2zero).or.(b==i2zero).or. &
& (c==i2zero).or.(d==i2zero).or.&
& (e==i2zero).or.(f==i2zero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_i2xyzw

@ -1567,3 +1567,300 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine maxpbyv2
subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='mabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == mzero) then
if (gamma == mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = mzero
end do
else if (delta /= mzero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = mzero
end do
else if (delta /= mzero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = mzero ! gamma*y(i)
end do
else if (delta /= mzero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= mzero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= mzero) then
if (gamma == mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = mzero
end do
else if (delta /= mzero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = mzero
end do
else if (delta /= mzero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= mzero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= mzero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_mabgdxyz
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (inout) :: w(:)
integer(psb_mpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='mabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==mzero).or.(b==mzero).or. &
& (c==mzero).or.(d==mzero).or.&
& (e==mzero).or.(f==mzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_mxyzw

@ -1567,3 +1567,300 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine saxpbyv2
subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='sabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == szero) then
if (gamma == szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = szero
end do
else if (delta /= szero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = szero
end do
else if (delta /= szero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = szero ! gamma*y(i)
end do
else if (delta /= szero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= szero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= szero) then
if (gamma == szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = szero
end do
else if (delta /= szero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = szero
end do
else if (delta /= szero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= szero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= szero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_sabgdxyz
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (inout) :: w(:)
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='sabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==szero).or.(b==szero).or. &
& (c==szero).or.(d==szero).or.&
& (e==szero).or.(f==szero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_sxyzw

@ -1567,3 +1567,300 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine zaxpbyv2
subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='zabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == zzero) then
if (gamma == zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = zzero
end do
else if (delta /= zzero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = zzero
end do
else if (delta /= zzero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = zzero ! gamma*y(i)
end do
else if (delta /= zzero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= zzero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= zzero) then
if (gamma == zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = zzero
end do
else if (delta /= zzero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = zzero
end do
else if (delta /= zzero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= zzero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= zzero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_zabgdxyz
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (inout) :: w(:)
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='zabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==zzero).or.(b==zzero).or. &
& (c==zzero).or.(d==zzero).or.&
& (e==zzero).or.(f==zzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_zxyzw

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory)
!
!
subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_cspasb
use psb_sort_mod
use psi_mod
@ -58,6 +58,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_c_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals....
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_
name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins
if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_c_coo_sparse_mat) :: acoo
!!$ type(psb_c_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_c_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory)
!
!
subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_dspasb
use psb_sort_mod
use psi_mod
@ -58,6 +58,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_d_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals....
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_
name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins
if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_d_coo_sparse_mat) :: acoo
!!$ type(psb_d_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_d_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory)
!
!
subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_sspasb
use psb_sort_mod
use psi_mod
@ -58,6 +58,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_s_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals....
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_
name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins
if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_s_coo_sparse_mat) :: acoo
!!$ type(psb_s_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_s_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory)
!
!
subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_zspasb
use psb_sort_mod
use psi_mod
@ -58,6 +58,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_z_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals....
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_
name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins
if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_z_coo_sparse_mat) :: acoo
!!$ type(psb_z_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_z_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -255,6 +255,46 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe
return(i);
}
int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta,
cuFloatComplex gamma, cuFloatComplex delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ)
{ int j=0, i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuCabgdxyz(handle,n, alpha,beta,gamma,delta,
(cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,(cuFloatComplex *) devVecZ->v_);
return(i);
}
int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b,
cuFloatComplex c, cuFloatComplex d,
cuFloatComplex e, cuFloatComplex f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW)
{ int j=0, i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ;
struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuCxyzw(handle,n, a,b,c,d,e,f,
(cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,
(cuFloatComplex *) devVecZ->v_,(cuFloatComplex *) devVecW->v_);
return(i);
}
int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha,
void *deviceVecA, void *deviceVecB)
{ int i = 0;

@ -69,6 +69,14 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);
int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta,
cuFloatComplex gamma, cuFloatComplex delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b,
cuFloatComplex c, cuFloatComplex d,
cuFloatComplex e, cuFloatComplex f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW);
int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB);
int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA,
void *deviceVecB, cuFloatComplex beta, void *deviceVecZ);

@ -241,6 +241,42 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
return(i);
}
int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ)
{ int j=0, i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuDabgdxyz(handle,n, alpha,beta,gamma,delta,
(double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_);
return(i);
}
int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW)
{ int j=0, i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ;
struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuDxyzw(handle,n, a,b,c,d,e,f,
(double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_,(double*) devVecW->v_);
return(i);
}
int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB)
{ int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;

@ -67,6 +67,11 @@ int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA);
int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY);
int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW);
int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB);
int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
void *deviceVecB, double beta, void *deviceVecZ);

@ -30,7 +30,7 @@
!
module psb_c_cuda_vect_mod
module psb_c_cuda_vect_mod
use iso_c_binding
use psb_const_mod
use psb_error_mod
@ -90,6 +90,7 @@ module psb_c_cuda_vect_mod
procedure, pass(x) :: dot_a => c_cuda_dot_a
procedure, pass(y) :: axpby_v => c_cuda_axpby_v
procedure, pass(y) :: axpby_a => c_cuda_axpby_a
procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz
procedure, pass(y) :: mlt_v => c_cuda_mlt_v
procedure, pass(y) :: mlt_a => c_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2
@ -667,7 +668,9 @@ contains
use psi_serial_mod
implicit none
class(psb_c_vect_cuda), intent(inout) :: x
! Since we are overwriting, make sure to do it
! on the GPU side
call x%set_dev()
call x%set_scal(czero)
end subroutine c_cuda_zero
@ -909,6 +912,131 @@ contains
end subroutine c_cuda_axpby_v
subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_c_vect_cuda), intent(inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz
logical :: gpu_done
info = psb_success_
if (.true.) then
gpu_done = .false.
select type(xx => x)
class is (psb_c_vect_cuda)
select type(yy => y)
class is (psb_c_vect_cuda)
select type(zz => z)
class is (psb_c_vect_cuda)
! Do something different here
if ((beta /= czero).and.yy%is_host())&
& call yy%sync()
if ((delta /= czero).and.zz%is_host())&
& call zz%sync()
if (xx%is_host()) call xx%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
else
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine c_cuda_abgdxyz
subroutine c_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_c_base_vect_type), intent(inout) :: z
class(psb_c_vect_cuda), intent(inout) :: w
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, nw
logical :: gpu_done
info = psb_success_
gpu_done = .false.
if ((a==czero).or.(b==czero).or. &
& (c==czero).or.(d==czero).or.&
& (e==czero).or.(f==czero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
select type(xx => x)
class is (psb_c_vect_cuda)
select type(yy => y)
class is (psb_c_vect_cuda)
select type(zz => z)
class is (psb_c_vect_cuda)
! Do something different here
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if (zz%is_host()) call zz%sync()
if (w%is_host()) call w%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
nw = getMultiVecDeviceSize(w%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m).or.(nw<m)) then
info = psb_err_internal_error_
else
info = xyzwMultiVecDevice(m,a,b,c,d,e,f,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect,w%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
call w%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
if (w%is_host()) call w%sync()
call y%axpby(m,a,x,b,info)
call z%axpby(m,c,y,d,info)
call w%axpby(m,e,z,f,info)
end if
end if
end subroutine c_cuda_xyzw
subroutine c_cuda_axpby_a(m,alpha, x, beta, y, info)
use psi_serial_mod
implicit none
@ -1488,7 +1616,7 @@ contains
!!$ complex(psb_spk_), external :: ddot
!!$ integer(psb_ipk_) :: info
!!$
!!$ res = dzero
!!$ res = czero
!!$ !
!!$ ! Note: this is the gpu implementation.
!!$ ! When we get here, we are sure that X is of
@ -1542,13 +1670,13 @@ contains
!!$
!!$ select type(xx => x)
!!$ type is (psb_c_base_multivect_type)
!!$ if ((beta /= dzero).and.(y%is_dev()))&
!!$ if ((beta /= czero).and.(y%is_dev()))&
!!$ & call y%sync()
!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info)
!!$ call y%set_host()
!!$ type is (psb_c_multivect_cuda)
!!$ ! Do something different here
!!$ if ((beta /= dzero).and.y%is_host())&
!!$ if ((beta /= czero).and.y%is_host())&
!!$ & call y%sync()
!!$ if (xx%is_host()) call xx%sync()
!!$ nx = getMultiVecDeviceSize(xx%deviceVect)
@ -1793,7 +1921,7 @@ contains
implicit none
class(psb_c_multivect_cuda), intent(inout) :: x
if (allocated(x%v)) x%v=dzero
if (allocated(x%v)) x%v=czero
call x%set_host()
end subroutine c_cuda_multi_zero

@ -28,8 +28,6 @@
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module psb_c_vectordev_mod
use psb_base_vectordev_mod
@ -304,7 +302,6 @@ module psb_c_vectordev_mod
end function asumMultiVecDeviceFloatComplex
end interface
interface axpbyMultiVecDevice
function axpbyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,beta,deviceVecB) &
& result(res) bind(c,name='axpbyMultiVecDeviceFloatComplex')
@ -316,6 +313,30 @@ module psb_c_vectordev_mod
end function axpbyMultiVecDeviceFloatComplex
end interface
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceFloatComplex(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='abgdxyzMultiVecDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
complex(c_float_complex), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function abgdxyzMultiVecDeviceFloatComplex
end interface
interface xyzwMultiVecDevice
function xyzwMultiVecDeviceFloatComplex(n,a,b,c,d,e,f,deviceVecX,&
& deviceVecY,deviceVecZ,deviceVecW) &
& result(res) bind(c,name='xyzwMultiVecDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
complex(c_float_complex), value :: a,b,c,d,e,f
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ, deviceVecW
end function xyzwMultiVecDeviceFloatComplex
end interface
interface axyMultiVecDevice
function axyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,deviceVecB) &
& result(res) bind(c,name='axyMultiVecDeviceFloatComplex')

@ -90,6 +90,7 @@ module psb_d_cuda_vect_mod
procedure, pass(x) :: dot_a => d_cuda_dot_a
procedure, pass(y) :: axpby_v => d_cuda_axpby_v
procedure, pass(y) :: axpby_a => d_cuda_axpby_a
procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz
procedure, pass(y) :: mlt_v => d_cuda_mlt_v
procedure, pass(y) :: mlt_a => d_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2
@ -667,7 +668,9 @@ contains
use psi_serial_mod
implicit none
class(psb_d_vect_cuda), intent(inout) :: x
! Since we are overwriting, make sure to do it
! on the GPU side
call x%set_dev()
call x%set_scal(dzero)
end subroutine d_cuda_zero
@ -909,6 +912,131 @@ contains
end subroutine d_cuda_axpby_v
subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_vect_cuda), intent(inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz
logical :: gpu_done
info = psb_success_
if (.true.) then
gpu_done = .false.
select type(xx => x)
class is (psb_d_vect_cuda)
select type(yy => y)
class is (psb_d_vect_cuda)
select type(zz => z)
class is (psb_d_vect_cuda)
! Do something different here
if ((beta /= dzero).and.yy%is_host())&
& call yy%sync()
if ((delta /= dzero).and.zz%is_host())&
& call zz%sync()
if (xx%is_host()) call xx%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
else
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine d_cuda_abgdxyz
subroutine d_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_base_vect_type), intent(inout) :: z
class(psb_d_vect_cuda), intent(inout) :: w
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, nw
logical :: gpu_done
info = psb_success_
gpu_done = .false.
if ((a==dzero).or.(b==dzero).or. &
& (c==dzero).or.(d==dzero).or.&
& (e==dzero).or.(f==dzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
select type(xx => x)
class is (psb_d_vect_cuda)
select type(yy => y)
class is (psb_d_vect_cuda)
select type(zz => z)
class is (psb_d_vect_cuda)
! Do something different here
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if (zz%is_host()) call zz%sync()
if (w%is_host()) call w%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
nw = getMultiVecDeviceSize(w%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m).or.(nw<m)) then
info = psb_err_internal_error_
else
info = xyzwMultiVecDevice(m,a,b,c,d,e,f,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect,w%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
call w%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
if (w%is_host()) call w%sync()
call y%axpby(m,a,x,b,info)
call z%axpby(m,c,y,d,info)
call w%axpby(m,e,z,f,info)
end if
end if
end subroutine d_cuda_xyzw
subroutine d_cuda_axpby_a(m,alpha, x, beta, y, info)
use psi_serial_mod
implicit none

@ -28,8 +28,6 @@
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module psb_d_vectordev_mod
use psb_base_vectordev_mod
@ -304,7 +302,6 @@ module psb_d_vectordev_mod
end function asumMultiVecDeviceDouble
end interface
interface axpbyMultiVecDevice
function axpbyMultiVecDeviceDouble(n,alpha,deviceVecA,beta,deviceVecB) &
& result(res) bind(c,name='axpbyMultiVecDeviceDouble')
@ -316,6 +313,30 @@ module psb_d_vectordev_mod
end function axpbyMultiVecDeviceDouble
end interface
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceDouble(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='abgdxyzMultiVecDeviceDouble')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
real(c_double), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function abgdxyzMultiVecDeviceDouble
end interface
interface xyzwMultiVecDevice
function xyzwMultiVecDeviceDouble(n,a,b,c,d,e,f,deviceVecX,&
& deviceVecY,deviceVecZ,deviceVecW) &
& result(res) bind(c,name='xyzwMultiVecDeviceDouble')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
real(c_double), value :: a,b,c,d,e,f
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ, deviceVecW
end function xyzwMultiVecDeviceDouble
end interface
interface axyMultiVecDevice
function axyMultiVecDeviceDouble(n,alpha,deviceVecA,deviceVecB) &
& result(res) bind(c,name='axyMultiVecDeviceDouble')

@ -650,7 +650,9 @@ contains
use psi_serial_mod
implicit none
class(psb_i_vect_cuda), intent(inout) :: x
! Since we are overwriting, make sure to do it
! on the GPU side
call x%set_dev()
call x%set_scal(izero)
end subroutine i_cuda_zero
@ -1170,7 +1172,7 @@ contains
!!$ integer(psb_ipk_), external :: ddot
!!$ integer(psb_ipk_) :: info
!!$
!!$ res = dzero
!!$ res = izero
!!$ !
!!$ ! Note: this is the gpu implementation.
!!$ ! When we get here, we are sure that X is of
@ -1224,13 +1226,13 @@ contains
!!$
!!$ select type(xx => x)
!!$ type is (psb_i_base_multivect_type)
!!$ if ((beta /= dzero).and.(y%is_dev()))&
!!$ if ((beta /= izero).and.(y%is_dev()))&
!!$ & call y%sync()
!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info)
!!$ call y%set_host()
!!$ type is (psb_i_multivect_cuda)
!!$ ! Do something different here
!!$ if ((beta /= dzero).and.y%is_host())&
!!$ if ((beta /= izero).and.y%is_host())&
!!$ & call y%sync()
!!$ if (xx%is_host()) call xx%sync()
!!$ nx = getMultiVecDeviceSize(xx%deviceVect)
@ -1475,7 +1477,7 @@ contains
implicit none
class(psb_i_multivect_cuda), intent(inout) :: x
if (allocated(x%v)) x%v=dzero
if (allocated(x%v)) x%v=izero
call x%set_host()
end subroutine i_cuda_multi_zero

@ -28,8 +28,6 @@
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module psb_i_vectordev_mod
use psb_base_vectordev_mod

@ -90,6 +90,7 @@ module psb_s_cuda_vect_mod
procedure, pass(x) :: dot_a => s_cuda_dot_a
procedure, pass(y) :: axpby_v => s_cuda_axpby_v
procedure, pass(y) :: axpby_a => s_cuda_axpby_a
procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz
procedure, pass(y) :: mlt_v => s_cuda_mlt_v
procedure, pass(y) :: mlt_a => s_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2
@ -667,7 +668,9 @@ contains
use psi_serial_mod
implicit none
class(psb_s_vect_cuda), intent(inout) :: x
! Since we are overwriting, make sure to do it
! on the GPU side
call x%set_dev()
call x%set_scal(szero)
end subroutine s_cuda_zero
@ -909,6 +912,131 @@ contains
end subroutine s_cuda_axpby_v
subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_base_vect_type), intent(inout) :: x
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_s_vect_cuda), intent(inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz
logical :: gpu_done
info = psb_success_
if (.true.) then
gpu_done = .false.
select type(xx => x)
class is (psb_s_vect_cuda)
select type(yy => y)
class is (psb_s_vect_cuda)
select type(zz => z)
class is (psb_s_vect_cuda)
! Do something different here
if ((beta /= szero).and.yy%is_host())&
& call yy%sync()
if ((delta /= szero).and.zz%is_host())&
& call zz%sync()
if (xx%is_host()) call xx%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
else
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine s_cuda_abgdxyz
subroutine s_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_base_vect_type), intent(inout) :: x
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_s_base_vect_type), intent(inout) :: z
class(psb_s_vect_cuda), intent(inout) :: w
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, nw
logical :: gpu_done
info = psb_success_
gpu_done = .false.
if ((a==szero).or.(b==szero).or. &
& (c==szero).or.(d==szero).or.&
& (e==szero).or.(f==szero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
select type(xx => x)
class is (psb_s_vect_cuda)
select type(yy => y)
class is (psb_s_vect_cuda)
select type(zz => z)
class is (psb_s_vect_cuda)
! Do something different here
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if (zz%is_host()) call zz%sync()
if (w%is_host()) call w%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
nw = getMultiVecDeviceSize(w%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m).or.(nw<m)) then
info = psb_err_internal_error_
else
info = xyzwMultiVecDevice(m,a,b,c,d,e,f,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect,w%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
call w%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
if (w%is_host()) call w%sync()
call y%axpby(m,a,x,b,info)
call z%axpby(m,c,y,d,info)
call w%axpby(m,e,z,f,info)
end if
end if
end subroutine s_cuda_xyzw
subroutine s_cuda_axpby_a(m,alpha, x, beta, y, info)
use psi_serial_mod
implicit none
@ -1488,7 +1616,7 @@ contains
!!$ real(psb_spk_), external :: ddot
!!$ integer(psb_ipk_) :: info
!!$
!!$ res = dzero
!!$ res = szero
!!$ !
!!$ ! Note: this is the gpu implementation.
!!$ ! When we get here, we are sure that X is of
@ -1542,13 +1670,13 @@ contains
!!$
!!$ select type(xx => x)
!!$ type is (psb_s_base_multivect_type)
!!$ if ((beta /= dzero).and.(y%is_dev()))&
!!$ if ((beta /= szero).and.(y%is_dev()))&
!!$ & call y%sync()
!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info)
!!$ call y%set_host()
!!$ type is (psb_s_multivect_cuda)
!!$ ! Do something different here
!!$ if ((beta /= dzero).and.y%is_host())&
!!$ if ((beta /= szero).and.y%is_host())&
!!$ & call y%sync()
!!$ if (xx%is_host()) call xx%sync()
!!$ nx = getMultiVecDeviceSize(xx%deviceVect)
@ -1793,7 +1921,7 @@ contains
implicit none
class(psb_s_multivect_cuda), intent(inout) :: x
if (allocated(x%v)) x%v=dzero
if (allocated(x%v)) x%v=szero
call x%set_host()
end subroutine s_cuda_multi_zero

@ -28,8 +28,6 @@
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module psb_s_vectordev_mod
use psb_base_vectordev_mod
@ -304,7 +302,6 @@ module psb_s_vectordev_mod
end function asumMultiVecDeviceFloat
end interface
interface axpbyMultiVecDevice
function axpbyMultiVecDeviceFloat(n,alpha,deviceVecA,beta,deviceVecB) &
& result(res) bind(c,name='axpbyMultiVecDeviceFloat')
@ -316,6 +313,30 @@ module psb_s_vectordev_mod
end function axpbyMultiVecDeviceFloat
end interface
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceFloat(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='abgdxyzMultiVecDeviceFloat')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
real(c_float), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function abgdxyzMultiVecDeviceFloat
end interface
interface xyzwMultiVecDevice
function xyzwMultiVecDeviceFloat(n,a,b,c,d,e,f,deviceVecX,&
& deviceVecY,deviceVecZ,deviceVecW) &
& result(res) bind(c,name='xyzwMultiVecDeviceFloat')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
real(c_float), value :: a,b,c,d,e,f
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ, deviceVecW
end function xyzwMultiVecDeviceFloat
end interface
interface axyMultiVecDevice
function axyMultiVecDeviceFloat(n,alpha,deviceVecA,deviceVecB) &
& result(res) bind(c,name='axyMultiVecDeviceFloat')

@ -90,6 +90,7 @@ module psb_z_cuda_vect_mod
procedure, pass(x) :: dot_a => z_cuda_dot_a
procedure, pass(y) :: axpby_v => z_cuda_axpby_v
procedure, pass(y) :: axpby_a => z_cuda_axpby_a
procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz
procedure, pass(y) :: mlt_v => z_cuda_mlt_v
procedure, pass(y) :: mlt_a => z_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2
@ -667,7 +668,9 @@ contains
use psi_serial_mod
implicit none
class(psb_z_vect_cuda), intent(inout) :: x
! Since we are overwriting, make sure to do it
! on the GPU side
call x%set_dev()
call x%set_scal(zzero)
end subroutine z_cuda_zero
@ -909,6 +912,131 @@ contains
end subroutine z_cuda_axpby_v
subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_base_vect_type), intent(inout) :: x
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_z_vect_cuda), intent(inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz
logical :: gpu_done
info = psb_success_
if (.true.) then
gpu_done = .false.
select type(xx => x)
class is (psb_z_vect_cuda)
select type(yy => y)
class is (psb_z_vect_cuda)
select type(zz => z)
class is (psb_z_vect_cuda)
! Do something different here
if ((beta /= zzero).and.yy%is_host())&
& call yy%sync()
if ((delta /= zzero).and.zz%is_host())&
& call zz%sync()
if (xx%is_host()) call xx%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
else
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine z_cuda_abgdxyz
subroutine z_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_base_vect_type), intent(inout) :: x
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_z_base_vect_type), intent(inout) :: z
class(psb_z_vect_cuda), intent(inout) :: w
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, nw
logical :: gpu_done
info = psb_success_
gpu_done = .false.
if ((a==zzero).or.(b==zzero).or. &
& (c==zzero).or.(d==zzero).or.&
& (e==zzero).or.(f==zzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
select type(xx => x)
class is (psb_z_vect_cuda)
select type(yy => y)
class is (psb_z_vect_cuda)
select type(zz => z)
class is (psb_z_vect_cuda)
! Do something different here
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if (zz%is_host()) call zz%sync()
if (w%is_host()) call w%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
nw = getMultiVecDeviceSize(w%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m).or.(nw<m)) then
info = psb_err_internal_error_
else
info = xyzwMultiVecDevice(m,a,b,c,d,e,f,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect,w%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
call w%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
if (w%is_host()) call w%sync()
call y%axpby(m,a,x,b,info)
call z%axpby(m,c,y,d,info)
call w%axpby(m,e,z,f,info)
end if
end if
end subroutine z_cuda_xyzw
subroutine z_cuda_axpby_a(m,alpha, x, beta, y, info)
use psi_serial_mod
implicit none
@ -1488,7 +1616,7 @@ contains
!!$ complex(psb_dpk_), external :: ddot
!!$ integer(psb_ipk_) :: info
!!$
!!$ res = dzero
!!$ res = zzero
!!$ !
!!$ ! Note: this is the gpu implementation.
!!$ ! When we get here, we are sure that X is of
@ -1542,13 +1670,13 @@ contains
!!$
!!$ select type(xx => x)
!!$ type is (psb_z_base_multivect_type)
!!$ if ((beta /= dzero).and.(y%is_dev()))&
!!$ if ((beta /= zzero).and.(y%is_dev()))&
!!$ & call y%sync()
!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info)
!!$ call y%set_host()
!!$ type is (psb_z_multivect_cuda)
!!$ ! Do something different here
!!$ if ((beta /= dzero).and.y%is_host())&
!!$ if ((beta /= zzero).and.y%is_host())&
!!$ & call y%sync()
!!$ if (xx%is_host()) call xx%sync()
!!$ nx = getMultiVecDeviceSize(xx%deviceVect)
@ -1793,7 +1921,7 @@ contains
implicit none
class(psb_z_multivect_cuda), intent(inout) :: x
if (allocated(x%v)) x%v=dzero
if (allocated(x%v)) x%v=zzero
call x%set_host()
end subroutine z_cuda_multi_zero

@ -28,8 +28,6 @@
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module psb_z_vectordev_mod
use psb_base_vectordev_mod
@ -304,7 +302,6 @@ module psb_z_vectordev_mod
end function asumMultiVecDeviceDoubleComplex
end interface
interface axpbyMultiVecDevice
function axpbyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,beta,deviceVecB) &
& result(res) bind(c,name='axpbyMultiVecDeviceDoubleComplex')
@ -316,6 +313,30 @@ module psb_z_vectordev_mod
end function axpbyMultiVecDeviceDoubleComplex
end interface
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceDoubleComplex(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='abgdxyzMultiVecDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
complex(c_double_complex), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function abgdxyzMultiVecDeviceDoubleComplex
end interface
interface xyzwMultiVecDevice
function xyzwMultiVecDeviceDoubleComplex(n,a,b,c,d,e,f,deviceVecX,&
& deviceVecY,deviceVecZ,deviceVecW) &
& result(res) bind(c,name='xyzwMultiVecDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
complex(c_double_complex), value :: a,b,c,d,e,f
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ, deviceVecW
end function xyzwMultiVecDeviceDoubleComplex
end interface
interface axyMultiVecDevice
function axyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,deviceVecB) &
& result(res) bind(c,name='axyMultiVecDeviceDoubleComplex')

@ -11,15 +11,16 @@ LIBNAME=$(UP)/libspgpu.a
CINCLUDES=-I$(INCDIR)
OBJS=cabs.o camax.o casum.o caxpby.o caxy.o cdot.o cgath.o \
cnrm2.o cscal.o cscat.o csetscal.o \
dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o \
cnrm2.o cscal.o cscat.o csetscal.o cabgdxyz.o\
dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o dabgdxyz.o\
dia_cspmv.o dia_dspmv.o dia_sspmv.o dia_zspmv.o dnrm2.o \
dscal.o dscat.o dsetscal.o ell_ccsput.o ell_cspmv.o \
ell_dcsput.o ell_dspmv.o ell_scsput.o ell_sspmv.o ell_zcsput.o ell_zspmv.o \
hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \
hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \
saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o \
zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o
saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o sabgdxyz.o\
zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zabgdxyz.o \
sxyzw.o cxyzw.o dxyzw.o zxyzw.o
objs: $(OBJS)
lib: objs

@ -0,0 +1,80 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2012
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
#include <cuda_runtime.h>
extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define BLOCK_SIZE 512
__global__ void spgpuCabgdxyz_krn(int n, cuFloatComplex alpha, cuFloatComplex beta,
cuFloatComplex gamma, cuFloatComplex delta,
cuFloatComplex * x, cuFloatComplex *y, cuFloatComplex *z)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
cuFloatComplex t;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
if (cuFloatComplex_isZero(beta))
t = cuCmulf(alpha,x[id]);
else
t = cuCfmaf(alpha, x[id], cuCmulf(beta,y[id]));
if (cuFloatComplex_isZero(delta))
z[id] = cuCmulf(gamma, t);
else
z[id] = cuCfmaf(gamma, t, cuCmulf(delta,z[id]));
y[id] = t;
}
}
void spgpuCabgdxyz(spgpuHandle_t handle,
int n,
cuFloatComplex alpha,
cuFloatComplex beta,
cuFloatComplex gamma,
cuFloatComplex delta,
__device cuFloatComplex * x,
__device cuFloatComplex * y,
__device cuFloatComplex *z)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuCabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
x, y, z);
}

@ -22,6 +22,9 @@ extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
//#include "cuda_util.h"
}
@ -29,6 +32,53 @@ extern "C"
#define BLOCK_SIZE 512
#if 1
__global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
if (cuFloatComplex_isZero(beta)) {
for ( ; id < n; id +=gridSize)
//if (id,n)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
z[id] = cuCmulf(alpha,x[id]);
}
} else {
for ( ; id < n; id +=gridSize)
//if (id,n)
{
z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id]));
}
}
}
void spgpuCaxpby(spgpuHandle_t handle,
__device cuFloatComplex *z,
int n,
cuFloatComplex beta,
__device cuFloatComplex *y,
cuFloatComplex alpha,
__device cuFloatComplex* x)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuCaxpby_krn<<<grid, block, 0, handle->currentStream>>>(z, n, beta, y, alpha, x);
}
#else
__global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
@ -86,7 +136,7 @@ void spgpuCaxpby(spgpuHandle_t handle,
cudaCheckError("CUDA error on saxpby");
}
#endif
void spgpuCmaxpby(spgpuHandle_t handle,
__device cuFloatComplex *z,
int n,

@ -0,0 +1,78 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2012
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
#include <cuda_runtime.h>
extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define BLOCK_SIZE 512
__global__ void spgpuCxyzw_krn(int n, cuFloatComplex a, cuFloatComplex b,
cuFloatComplex c, cuFloatComplex d,
cuFloatComplex e, cuFloatComplex f,
cuFloatComplex * x, cuFloatComplex *y,
cuFloatComplex *z, cuFloatComplex *w)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
cuFloatComplex ty, tz;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
ty = cuCfmaf(a, x[id], cuCmulf(b,y[id]));
tz = cuCfmaf(c, ty, cuCmulf(d,z[id]));
w[id] = cuCfmaf(e, tz, cuCmulf(f,w[id]));
y[id] = ty;
z[id] = tz;
}
}
void spgpuCxyzw(spgpuHandle_t handle,
int n,
cuFloatComplex a, cuFloatComplex b,
cuFloatComplex c, cuFloatComplex d,
cuFloatComplex e, cuFloatComplex f,
__device cuFloatComplex * x,
__device cuFloatComplex * y,
__device cuFloatComplex * z,
__device cuFloatComplex *w)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuCxyzw_krn<<<grid, block, 0, handle->currentStream>>>(n, a,b,c,d,e,f,
x, y, z,w);
}

@ -0,0 +1,79 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2012
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
#include <cuda_runtime.h>
extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define BLOCK_SIZE 512
__global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma, double delta,
double* x, double *y, double *z)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
double t;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
if (beta == 0.0)
t = PREC_DMUL(alpha,x[id]);
else
t = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id]));
if (delta == 0.0)
z[id] = gamma * t;
else
z[id] = PREC_DADD(PREC_DMUL(gamma, t), PREC_DMUL(delta,z[id]));
y[id] = t;
}
}
void spgpuDabgdxyz(spgpuHandle_t handle,
int n,
double alpha,
double beta,
double gamma,
double delta,
__device double* x,
__device double* y,
__device double *z)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuDabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
x, y, z);
}

@ -16,11 +16,15 @@
#include "cudadebug.h"
#include "cudalang.h"
#include <cuda_runtime.h>
extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
//#include "cuda_util.h"
}
@ -28,6 +32,47 @@ extern "C"
#define BLOCK_SIZE 512
#if 1
__global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
if (beta == 0.0) {
for ( ; id < n; id +=gridSize)
{
z[id] = PREC_DMUL(alpha,x[id]);
}
} else {
for ( ; id < n; id +=gridSize)
{
z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id]));
}
}
}
void spgpuDaxpby(spgpuHandle_t handle,
__device double *z,
int n,
double beta,
__device double *y,
double alpha,
__device double* x)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuDaxpby_krn<<<grid, block, 0, handle->currentStream>>>(z, n, beta, y, alpha, x);
}
#else
__global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
@ -85,6 +130,7 @@ void spgpuDaxpby(spgpuHandle_t handle,
cudaCheckError("CUDA error on daxpby");
}
#endif
void spgpuDmaxpby(spgpuHandle_t handle,
__device double *z,
int n,

@ -0,0 +1,78 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2012
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
#include <cuda_runtime.h>
extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define BLOCK_SIZE 512
__global__ void spgpuDxyzw_krn(int n, double a, double b,
double c, double d,
double e, double f,
double * x, double *y,
double *z, double *w)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
double ty, tz;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
ty = PREC_DADD(PREC_DADD(a, x[id]), PREC_DMUL(b,y[id]));
tz = PREC_DADD(PREC_DADD(c, ty), PREC_DMUL(d,z[id]));
w[id] = PREC_DADD(PREC_DADD(e, tz), PREC_DMUL(f,w[id]));
y[id] = ty;
z[id] = tz;
}
}
void spgpuDxyzw(spgpuHandle_t handle,
int n,
double a, double b,
double c, double d,
double e, double f,
__device double * x,
__device double * y,
__device double * z,
__device double *w)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuDxyzw_krn<<<grid, block, 0, handle->currentStream>>>(n, a,b,c,d,e,f,
x, y, z,w);
}

@ -0,0 +1,79 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2012
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
#include <cuda_runtime.h>
extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define BLOCK_SIZE 512
__global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, float delta,
float* x, float *y, float *z)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
float t;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
if (beta == 0.0)
t = PREC_FMUL(alpha,x[id]);
else
t = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id]));
if (delta == 0.0)
z[id] = gamma * t;
else
z[id] = PREC_FADD(PREC_FMUL(gamma, t), PREC_FMUL(delta,z[id]));
y[id] = t;
}
}
void spgpuSabgdxyz(spgpuHandle_t handle,
int n,
float alpha,
float beta,
float gamma,
float delta,
__device float* x,
__device float* y,
__device float *z)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuSabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
x, y, z);
}

@ -20,6 +20,9 @@ extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
//#include "cuda_util.h"
}
@ -27,6 +30,52 @@ extern "C"
#define BLOCK_SIZE 512
#if 1
__global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
if (beta == 0.0f) {
for ( ; id < n; id +=gridSize)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
z[id] = PREC_FMUL(alpha,x[id]);
}
} else {
for ( ; id < n; id +=gridSize)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id]));
}
}
}
void spgpuSaxpby(spgpuHandle_t handle,
__device float *z,
int n,
float beta,
__device float *y,
float alpha,
__device float* x)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuSaxpby_krn<<<grid, block, 0, handle->currentStream>>>(z, n, beta, y, alpha, x);
}
#else
__global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
@ -44,6 +93,7 @@ __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alp
}
void spgpuSaxpby_(spgpuHandle_t handle,
__device float *z,
int n,
@ -83,7 +133,7 @@ void spgpuSaxpby(spgpuHandle_t handle,
cudaCheckError("CUDA error on saxpby");
}
#endif
void spgpuSmaxpby(spgpuHandle_t handle,
__device float *z,
int n,

@ -0,0 +1,78 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2012
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
#include <cuda_runtime.h>
extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define BLOCK_SIZE 512
__global__ void spgpuSxyzw_krn(int n, float a, float b,
float c, float d,
float e, float f,
float * x, float *y,
float *z, float *w)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
float ty, tz;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
ty = PREC_FADD(PREC_FMUL(a, x[id]), PREC_FMUL(b,y[id]));
tz = PREC_FADD(PREC_FMUL(c, ty), PREC_FMUL(d,z[id]));
w[id] = PREC_FADD(PREC_FMUL(e, tz), PREC_FMUL(f,w[id]));
y[id] = ty;
z[id] = tz;
}
}
void spgpuSxyzw(spgpuHandle_t handle,
int n,
float a, float b,
float c, float d,
float e, float f,
__device float * x,
__device float * y,
__device float * z,
__device float *w)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuSxyzw_krn<<<grid, block, 0, handle->currentStream>>>(n, a,b,c,d,e,f,
x, y, z,w);
}

@ -0,0 +1,80 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2012
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
#include <cuda_runtime.h>
extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define BLOCK_SIZE 512
__global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta,
cuDoubleComplex gamma, cuDoubleComplex delta,
cuDoubleComplex * x, cuDoubleComplex *y, cuDoubleComplex *z)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
cuDoubleComplex t;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
if (cuDoubleComplex_isZero(beta))
t = cuCmul(alpha,x[id]);
else
t = cuCfma(alpha, x[id], cuCmul(beta,y[id]));
if (cuDoubleComplex_isZero(delta))
z[id] = cuCmul(gamma, t);
else
z[id] = cuCfma(gamma, t, cuCmul(delta,z[id]));
y[id] = t;
}
}
void spgpuZabgdxyz(spgpuHandle_t handle,
int n,
cuDoubleComplex alpha,
cuDoubleComplex beta,
cuDoubleComplex gamma,
cuDoubleComplex delta,
__device cuDoubleComplex * x,
__device cuDoubleComplex * y,
__device cuDoubleComplex *z)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuZabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
x, y, z);
}

@ -23,6 +23,9 @@ extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
//#include "cuda_util.h"
}
@ -30,6 +33,49 @@ extern "C"
#define BLOCK_SIZE 512
#if 1
__global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
if (cuDoubleComplex_isZero(beta)) {
for ( ; id < n; id +=gridSize)
//if (id,n)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
z[id] = cuCmul(alpha,x[id]);
}
} else {
for ( ; id < n; id +=gridSize)
//if (id,n)
{
z[id] = cuCfma(beta, y[id], cuCmul(alpha, x[id]));
}
}
}
void spgpuZaxpby(spgpuHandle_t handle,
__device cuDoubleComplex *z,
int n,
cuDoubleComplex beta,
__device cuDoubleComplex *y,
cuDoubleComplex alpha,
__device cuDoubleComplex* x)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuZaxpby_krn<<<grid, block, 0, handle->currentStream>>>(z, n, beta, y, alpha, x);
}
#else
__global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
@ -86,7 +132,7 @@ void spgpuZaxpby(spgpuHandle_t handle,
cudaCheckError("CUDA error on daxpby");
}
#endif
void spgpuZmaxpby(spgpuHandle_t handle,
__device cuDoubleComplex *z,
int n,

@ -0,0 +1,78 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2012
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
#include <cuda_runtime.h>
extern "C"
{
#include "core.h"
#include "vector.h"
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define BLOCK_SIZE 512
__global__ void spgpuZxyzw_krn(int n, cuDoubleComplex a, cuDoubleComplex b,
cuDoubleComplex c, cuDoubleComplex d,
cuDoubleComplex e, cuDoubleComplex f,
cuDoubleComplex * x, cuDoubleComplex *y,
cuDoubleComplex *z, cuDoubleComplex *w)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
cuDoubleComplex ty, tz;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
ty = cuCfma(a, x[id], cuCmul(b,y[id]));
tz = cuCfma(c, ty, cuCmul(d,z[id]));
w[id] = cuCfma(e, tz, cuCmul(f,w[id]));
y[id] = ty;
z[id] = tz;
}
}
void spgpuZxyzw(spgpuHandle_t handle,
int n,
cuDoubleComplex a, cuDoubleComplex b,
cuDoubleComplex c, cuDoubleComplex d,
cuDoubleComplex e, cuDoubleComplex f,
__device cuDoubleComplex * x,
__device cuDoubleComplex * y,
__device cuDoubleComplex * z,
__device cuDoubleComplex *w)
{
int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE;
int num_mp, max_threads_mp, num_blocks_mp, num_blocks;
dim3 block(BLOCK_SIZE);
num_mp = getGPUMultiProcessors();
max_threads_mp = getGPUMaxThreadsPerMP();
num_blocks_mp = max_threads_mp/BLOCK_SIZE;
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuZxyzw_krn<<<grid, block, 0, handle->currentStream>>>(n, a,b,c,d,e,f,
x, y, z,w);
}

@ -181,6 +181,29 @@ void spgpuSaxpby(spgpuHandle_t handle,
float alpha,
__device float* x);
void spgpuSabgdxyz(spgpuHandle_t handle,
int n,
float alpha,
float beta,
float gamma,
float delta,
__device float* x,
__device float *y,
__device float *z)
;
void spgpuSxyzw(spgpuHandle_t handle,
int n,
float a, float b,
float c, float d,
float e, float f,
__device float* x,
__device float *y,
__device float *z,
__device float *w)
;
/**
* \fn void spgpuSmaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x, int count, int pitch)
* Computes the single precision z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector.
@ -463,7 +486,30 @@ void spgpuDaxpby(spgpuHandle_t handle,
double alpha,
__device double* x);
/**
void spgpuDabgdxyz(spgpuHandle_t handle,
int n,
double alpha,
double beta,
double gamma,
double delta,
__device double* x,
__device double *y,
__device double *z)
;
void spgpuDxyzw(spgpuHandle_t handle,
int n,
double a, double b,
double c, double d,
double e, double f,
__device double* x,
__device double *y,
__device double *z,
__device double *w)
;
/**
* \fn void spgpuDmaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, __device double *y, double alpha, __device double* x, int count, int pitch)
* Computes the double precision z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
@ -742,6 +788,30 @@ void spgpuCaxpby(spgpuHandle_t handle,
cuFloatComplex alpha,
__device cuFloatComplex* x);
void spgpuCabgdxyz(spgpuHandle_t handle,
int n,
cuFloatComplex alpha,
cuFloatComplex beta,
cuFloatComplex gamma,
cuFloatComplex delta,
__device cuFloatComplex* x,
__device cuFloatComplex *y,
__device cuFloatComplex *z)
;
void spgpuCxyzw(spgpuHandle_t handle,
int n,
cuFloatComplex a, cuFloatComplex b,
cuFloatComplex c, cuFloatComplex d,
cuFloatComplex e, cuFloatComplex f,
__device cuFloatComplex* x,
__device cuFloatComplex *y,
__device cuFloatComplex *z,
__device cuFloatComplex *w)
;
/**
* \fn void spgpuCmaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x, int count, int pitch)
* Computes the single precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector.
@ -1021,6 +1091,31 @@ void spgpuZaxpby(spgpuHandle_t handle,
cuDoubleComplex alpha,
__device cuDoubleComplex* x);
void spgpuZabgdxyz(spgpuHandle_t handle,
int n,
cuDoubleComplex alpha,
cuDoubleComplex beta,
cuDoubleComplex gamma,
cuDoubleComplex delta,
__device cuDoubleComplex* x,
__device cuDoubleComplex *y,
__device cuDoubleComplex *z)
;
void spgpuZxyzw(spgpuHandle_t handle,
int n,
cuDoubleComplex a, cuDoubleComplex b,
cuDoubleComplex c, cuDoubleComplex d,
cuDoubleComplex e, cuDoubleComplex f,
__device cuDoubleComplex* x,
__device cuDoubleComplex *y,
__device cuDoubleComplex *z,
__device cuDoubleComplex *w)
;
/**
* \fn void spgpuZmaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x, int count, int pitch)
* Computes the double precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector.

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save