Implement psi_exscan and use in _from_coo

omp-threadsafe
sfilippone 3 years ago
parent dbd55321f8
commit 02dd204351

@ -156,4 +156,16 @@ module psi_c_serial_mod
end subroutine psi_csctv
end interface psi_sct
interface psi_exscan
subroutine psi_c_exscanv(n,x,info,shift,ibase)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
end subroutine psi_c_exscanv
end interface psi_exscan
end module psi_c_serial_mod

@ -156,4 +156,16 @@ module psi_d_serial_mod
end subroutine psi_dsctv
end interface psi_sct
interface psi_exscan
subroutine psi_d_exscanv(n,x,info,shift,ibase)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
end subroutine psi_d_exscanv
end interface psi_exscan
end module psi_d_serial_mod

@ -156,4 +156,16 @@ module psi_e_serial_mod
end subroutine psi_esctv
end interface psi_sct
interface psi_exscan
subroutine psi_e_exscanv(n,x,info,shift,ibase)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_epk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_epk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
end subroutine psi_e_exscanv
end interface psi_exscan
end module psi_e_serial_mod

@ -156,4 +156,16 @@ module psi_i2_serial_mod
end subroutine psi_i2sctv
end interface psi_sct
interface psi_exscan
subroutine psi_i2_exscanv(n,x,info,shift,ibase)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_i2pk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_i2pk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
end subroutine psi_i2_exscanv
end interface psi_exscan
end module psi_i2_serial_mod

@ -156,4 +156,16 @@ module psi_m_serial_mod
end subroutine psi_msctv
end interface psi_sct
interface psi_exscan
subroutine psi_m_exscanv(n,x,info,shift,ibase)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_mpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_mpk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
end subroutine psi_m_exscanv
end interface psi_exscan
end module psi_m_serial_mod

@ -156,4 +156,16 @@ module psi_s_serial_mod
end subroutine psi_ssctv
end interface psi_sct
interface psi_exscan
subroutine psi_s_exscanv(n,x,info,shift,ibase)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: n
real(psb_spk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
real(psb_spk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
end subroutine psi_s_exscanv
end interface psi_exscan
end module psi_s_serial_mod

@ -156,4 +156,16 @@ module psi_z_serial_mod
end subroutine psi_zsctv
end interface psi_sct
interface psi_exscan
subroutine psi_z_exscanv(n,x,info,shift,ibase)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
end subroutine psi_z_exscanv
end interface psi_exscan
end module psi_z_serial_mod

@ -2189,6 +2189,9 @@ subroutine psb_c_mv_csc_from_coo(a,b,info)
use psb_error_mod
use psb_c_base_mat_mod
use psb_c_csc_mat_mod, psb_protect_name => psb_c_mv_csc_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_c_csc_sparse_mat), intent(inout) :: a
@ -2223,18 +2226,35 @@ subroutine psb_c_mv_csc_from_coo(a,b,info)
call psb_realloc(nc+1,a%icp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%icp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k)
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
!$OMP ATOMIC UPDATE
a%icp(i+1) = a%icp(i+1) + 1
!$OMP END ATOMIC
end do
ip = 1
do i=1,nc
nrl = a%icp(i)
a%icp(i) = ip
ip = ip + nrl
!$OMP END DO
!$OMP END PARALLEL
#else
a%icp(:) = 0
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
end do
a%icp(nc+1) = ip
#endif
call psi_exscan(nc+1,a%icp,info,shift=ione,ibase=ione)
call a%set_host()

@ -2876,12 +2876,6 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
character(len=20) :: name='c_cp_csr_from_coo'
logical :: use_openmp = .false.
#if defined(OPENMP)
integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
#endif
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -2927,8 +2921,8 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%irp(:) = 0
@ -2943,51 +2937,6 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
!$OMP END ATOMIC
end do
!$OMP END DO
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if
last_idx = min(first_idx + work - 1,nr+1)
s = 0
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + a%irp(i)
a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,nr+1
a%irp(i) = suma(ithread+1) + a%irp(i) +1
end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL
#else
a%irp(:) = 0
@ -2995,14 +2944,8 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
#endif
call psi_exscan(nr+1,a%irp,info,shift=ione,ibase=ione)
call a%set_host()
@ -3122,12 +3065,6 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
#if defined(OPENMP)
integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -3153,8 +3090,8 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%irp(:) = 0
@ -3169,51 +3106,6 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
!$OMP END ATOMIC
end do
!$OMP END DO
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if
last_idx = min(first_idx + work - 1,nr+1)
s = 0
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + a%irp(i)
a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,nr+1
a%irp(i) = suma(ithread+1) + a%irp(i) +1
end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL
#else
a%irp(:) = 0
@ -3221,14 +3113,8 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
#endif
call psi_exscan(nr+1,a%irp,info,shift=ione,ibase=ione)
!write(0,*) name,' Check:',a%irp(nr+1),all(a%irp(1:nr) < a%irp(nr+1))
!write(0,*) name,a%irp(:)

@ -2189,6 +2189,9 @@ subroutine psb_d_mv_csc_from_coo(a,b,info)
use psb_error_mod
use psb_d_base_mat_mod
use psb_d_csc_mat_mod, psb_protect_name => psb_d_mv_csc_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_d_csc_sparse_mat), intent(inout) :: a
@ -2223,18 +2226,35 @@ subroutine psb_d_mv_csc_from_coo(a,b,info)
call psb_realloc(nc+1,a%icp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%icp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k)
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
!$OMP ATOMIC UPDATE
a%icp(i+1) = a%icp(i+1) + 1
!$OMP END ATOMIC
end do
ip = 1
do i=1,nc
nrl = a%icp(i)
a%icp(i) = ip
ip = ip + nrl
!$OMP END DO
!$OMP END PARALLEL
#else
a%icp(:) = 0
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
end do
a%icp(nc+1) = ip
#endif
call psi_exscan(nc+1,a%icp,info,shift=ione,ibase=ione)
call a%set_host()

@ -2876,12 +2876,6 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
character(len=20) :: name='d_cp_csr_from_coo'
logical :: use_openmp = .false.
#if defined(OPENMP)
integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
#endif
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -2927,8 +2921,8 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%irp(:) = 0
@ -2943,51 +2937,6 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
!$OMP END ATOMIC
end do
!$OMP END DO
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if
last_idx = min(first_idx + work - 1,nr+1)
s = 0
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + a%irp(i)
a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,nr+1
a%irp(i) = suma(ithread+1) + a%irp(i) +1
end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL
#else
a%irp(:) = 0
@ -2995,14 +2944,8 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
#endif
call psi_exscan(nr+1,a%irp,info,shift=ione,ibase=ione)
call a%set_host()
@ -3122,12 +3065,6 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
#if defined(OPENMP)
integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -3153,8 +3090,8 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%irp(:) = 0
@ -3169,51 +3106,6 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
!$OMP END ATOMIC
end do
!$OMP END DO
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if
last_idx = min(first_idx + work - 1,nr+1)
s = 0
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + a%irp(i)
a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,nr+1
a%irp(i) = suma(ithread+1) + a%irp(i) +1
end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL
#else
a%irp(:) = 0
@ -3221,14 +3113,8 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
#endif
call psi_exscan(nr+1,a%irp,info,shift=ione,ibase=ione)
!write(0,*) name,' Check:',a%irp(nr+1),all(a%irp(1:nr) < a%irp(nr+1))
!write(0,*) name,a%irp(:)

@ -2189,6 +2189,9 @@ subroutine psb_s_mv_csc_from_coo(a,b,info)
use psb_error_mod
use psb_s_base_mat_mod
use psb_s_csc_mat_mod, psb_protect_name => psb_s_mv_csc_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_s_csc_sparse_mat), intent(inout) :: a
@ -2223,18 +2226,35 @@ subroutine psb_s_mv_csc_from_coo(a,b,info)
call psb_realloc(nc+1,a%icp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%icp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k)
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
!$OMP ATOMIC UPDATE
a%icp(i+1) = a%icp(i+1) + 1
!$OMP END ATOMIC
end do
ip = 1
do i=1,nc
nrl = a%icp(i)
a%icp(i) = ip
ip = ip + nrl
!$OMP END DO
!$OMP END PARALLEL
#else
a%icp(:) = 0
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
end do
a%icp(nc+1) = ip
#endif
call psi_exscan(nc+1,a%icp,info,shift=ione,ibase=ione)
call a%set_host()

@ -2876,12 +2876,6 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
character(len=20) :: name='s_cp_csr_from_coo'
logical :: use_openmp = .false.
#if defined(OPENMP)
integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
#endif
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -2927,8 +2921,8 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%irp(:) = 0
@ -2943,51 +2937,6 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
!$OMP END ATOMIC
end do
!$OMP END DO
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if
last_idx = min(first_idx + work - 1,nr+1)
s = 0
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + a%irp(i)
a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,nr+1
a%irp(i) = suma(ithread+1) + a%irp(i) +1
end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL
#else
a%irp(:) = 0
@ -2995,14 +2944,8 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
#endif
call psi_exscan(nr+1,a%irp,info,shift=ione,ibase=ione)
call a%set_host()
@ -3122,12 +3065,6 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
#if defined(OPENMP)
integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -3153,8 +3090,8 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%irp(:) = 0
@ -3169,51 +3106,6 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
!$OMP END ATOMIC
end do
!$OMP END DO
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if
last_idx = min(first_idx + work - 1,nr+1)
s = 0
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + a%irp(i)
a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,nr+1
a%irp(i) = suma(ithread+1) + a%irp(i) +1
end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL
#else
a%irp(:) = 0
@ -3221,14 +3113,8 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
#endif
call psi_exscan(nr+1,a%irp,info,shift=ione,ibase=ione)
!write(0,*) name,' Check:',a%irp(nr+1),all(a%irp(1:nr) < a%irp(nr+1))
!write(0,*) name,a%irp(:)

@ -2189,6 +2189,9 @@ subroutine psb_z_mv_csc_from_coo(a,b,info)
use psb_error_mod
use psb_z_base_mat_mod
use psb_z_csc_mat_mod, psb_protect_name => psb_z_mv_csc_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_z_csc_sparse_mat), intent(inout) :: a
@ -2223,18 +2226,35 @@ subroutine psb_z_mv_csc_from_coo(a,b,info)
call psb_realloc(nc+1,a%icp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%icp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k)
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
!$OMP ATOMIC UPDATE
a%icp(i+1) = a%icp(i+1) + 1
!$OMP END ATOMIC
end do
ip = 1
do i=1,nc
nrl = a%icp(i)
a%icp(i) = ip
ip = ip + nrl
!$OMP END DO
!$OMP END PARALLEL
#else
a%icp(:) = 0
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
end do
a%icp(nc+1) = ip
#endif
call psi_exscan(nc+1,a%icp,info,shift=ione,ibase=ione)
call a%set_host()

@ -2876,12 +2876,6 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
character(len=20) :: name='z_cp_csr_from_coo'
logical :: use_openmp = .false.
#if defined(OPENMP)
integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
#endif
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -2927,8 +2921,8 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%irp(:) = 0
@ -2943,51 +2937,6 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
!$OMP END ATOMIC
end do
!$OMP END DO
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if
last_idx = min(first_idx + work - 1,nr+1)
s = 0
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + a%irp(i)
a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,nr+1
a%irp(i) = suma(ithread+1) + a%irp(i) +1
end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL
#else
a%irp(:) = 0
@ -2995,14 +2944,8 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
#endif
call psi_exscan(nr+1,a%irp,info,shift=ione,ibase=ione)
call a%set_host()
@ -3122,12 +3065,6 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
#if defined(OPENMP)
integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -3153,8 +3090,8 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP shared(nr,a,itemp,nza) &
!$OMP private(i,info)
!$OMP WORKSHARE
a%irp(:) = 0
@ -3169,51 +3106,6 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
!$OMP END ATOMIC
end do
!$OMP END DO
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if
last_idx = min(first_idx + work - 1,nr+1)
s = 0
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + a%irp(i)
a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,nr+1
a%irp(i) = suma(ithread+1) + a%irp(i) +1
end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL
#else
a%irp(:) = 0
@ -3221,14 +3113,8 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
#endif
call psi_exscan(nr+1,a%irp,info,shift=ione,ibase=ione)
!write(0,*) name,' Check:',a%irp(nr+1),all(a%irp(1:nr) < a%irp(nr+1))
!write(0,*) name,a%irp(:)

@ -29,6 +29,97 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_c_exscanv(n,x,info,shift,ibase)
use psi_c_serial_mod, psb_protect_name => psi_c_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
complex(psb_spk_) :: shift_, tp, ts
complex(psb_spk_), allocatable :: suma(:)
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk, ibase_
if (present(shift)) then
shift_ = shift
else
shift_ = czero
end if
if (present(ibase)) then
ibase_ = ibase
else
ibase_ = ione
end if
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,n,x,shift_,ibase_) &
!$OMP private(ithread,wrk,i,first_idx,last_idx)
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + ibase_
else
first_idx = ithread*wrk + MOD((n),nthreads) + ibase_
end if
last_idx = min(first_idx + wrk - 1,n - (ibase_-ione))
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = suma(ithread+1) + x(i) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
!$OMP END SINGLE
!$OMP END PARALLEL
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
end subroutine psi_c_exscanv
subroutine psb_m_cgelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_cgelp
use psb_const_mod

@ -29,6 +29,97 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_d_exscanv(n,x,info,shift,ibase)
use psi_d_serial_mod, psb_protect_name => psi_d_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
real(psb_dpk_) :: shift_, tp, ts
real(psb_dpk_), allocatable :: suma(:)
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk, ibase_
if (present(shift)) then
shift_ = shift
else
shift_ = dzero
end if
if (present(ibase)) then
ibase_ = ibase
else
ibase_ = ione
end if
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,n,x,shift_,ibase_) &
!$OMP private(ithread,wrk,i,first_idx,last_idx)
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + ibase_
else
first_idx = ithread*wrk + MOD((n),nthreads) + ibase_
end if
last_idx = min(first_idx + wrk - 1,n - (ibase_-ione))
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = suma(ithread+1) + x(i) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
!$OMP END SINGLE
!$OMP END PARALLEL
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
end subroutine psi_d_exscanv
subroutine psb_m_dgelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_dgelp
use psb_const_mod

@ -29,6 +29,97 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_e_exscanv(n,x,info,shift,ibase)
use psi_e_serial_mod, psb_protect_name => psi_e_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_epk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_epk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
integer(psb_epk_) :: shift_, tp, ts
integer(psb_epk_), allocatable :: suma(:)
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk, ibase_
if (present(shift)) then
shift_ = shift
else
shift_ = ezero
end if
if (present(ibase)) then
ibase_ = ibase
else
ibase_ = ione
end if
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,n,x,shift_,ibase_) &
!$OMP private(ithread,wrk,i,first_idx,last_idx)
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + ibase_
else
first_idx = ithread*wrk + MOD((n),nthreads) + ibase_
end if
last_idx = min(first_idx + wrk - 1,n - (ibase_-ione))
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = suma(ithread+1) + x(i) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
!$OMP END SINGLE
!$OMP END PARALLEL
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
end subroutine psi_e_exscanv
subroutine psb_m_egelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_egelp
use psb_const_mod
@ -441,9 +532,9 @@ subroutine psi_eaxpby(m,n,alpha, x, beta, y, info)
integer(psb_epk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: lx, ly, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -502,7 +593,8 @@ subroutine psi_eaxpbyv(m,alpha, x, beta, y, info)
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
integer(psb_ipk_) :: i
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -532,7 +624,106 @@ subroutine psi_eaxpbyv(m,alpha, x, beta, y, info)
goto 9999
end if
if (m>0) call eaxpby(m,ione,alpha,x,lx,beta,y,ly,info)
! if (m>0) call eaxpby(m,ione,alpha,x,lx,beta,y,ly,info)
if (alpha.eq.ezero) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
enddo
else if (beta.eq.eone) then
!
! Do nothing!
!
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
enddo
endif
else if (alpha.eq.eone) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-eone) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -555,7 +746,7 @@ subroutine psi_eaxpbyv2(m,alpha, x, beta, y, z, info)
integer(psb_epk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly, lz
integer(psb_ipk_) :: lx, ly, lz, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
@ -594,7 +785,105 @@ subroutine psi_eaxpbyv2(m,alpha, x, beta, y, z, info)
goto 9999
end if
if (m>0) call eaxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info)
if (alpha.eq.ezero) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = ezero
enddo
else if (beta.eq.eone) then
!
! Do nothing!
!
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = beta*y(i)
enddo
endif
else if (alpha.eq.eone) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-eone) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -942,6 +1231,7 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (alpha.eq.ezero) then
if (beta.eq.ezero) then
do j=1, n
!$omp parallel do private(i)
do i=1,m
y(i,j) = ezero
enddo
@ -953,12 +1243,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = beta*y(i,j)
enddo
@ -969,12 +1261,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + y(i,j)
enddo
@ -982,12 +1276,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -998,12 +1294,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + y(i,j)
enddo
@ -1011,12 +1309,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1027,12 +1327,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1040,12 +1342,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo
@ -1131,12 +1435,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = beta*y(i,j)
enddo
@ -1147,12 +1453,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + y(i,j)
enddo
@ -1160,12 +1468,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -1176,12 +1486,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + y(i,j)
enddo
@ -1189,12 +1501,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1205,12 +1519,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1218,12 +1534,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo

@ -29,6 +29,97 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_i2_exscanv(n,x,info,shift,ibase)
use psi_i2_serial_mod, psb_protect_name => psi_i2_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_i2pk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_i2pk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
integer(psb_i2pk_) :: shift_, tp, ts
integer(psb_i2pk_), allocatable :: suma(:)
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk, ibase_
if (present(shift)) then
shift_ = shift
else
shift_ = i2zero
end if
if (present(ibase)) then
ibase_ = ibase
else
ibase_ = ione
end if
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,n,x,shift_,ibase_) &
!$OMP private(ithread,wrk,i,first_idx,last_idx)
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + ibase_
else
first_idx = ithread*wrk + MOD((n),nthreads) + ibase_
end if
last_idx = min(first_idx + wrk - 1,n - (ibase_-ione))
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = suma(ithread+1) + x(i) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
!$OMP END SINGLE
!$OMP END PARALLEL
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
end subroutine psi_i2_exscanv
subroutine psb_m_i2gelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_i2gelp
use psb_const_mod
@ -441,9 +532,9 @@ subroutine psi_i2axpby(m,n,alpha, x, beta, y, info)
integer(psb_i2pk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: lx, ly, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -502,7 +593,8 @@ subroutine psi_i2axpbyv(m,alpha, x, beta, y, info)
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
integer(psb_ipk_) :: i
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -532,7 +624,106 @@ subroutine psi_i2axpbyv(m,alpha, x, beta, y, info)
goto 9999
end if
if (m>0) call i2axpby(m,ione,alpha,x,lx,beta,y,ly,info)
! if (m>0) call i2axpby(m,ione,alpha,x,lx,beta,y,ly,info)
if (alpha.eq.i2zero) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
enddo
else if (beta.eq.i2one) then
!
! Do nothing!
!
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
enddo
endif
else if (alpha.eq.i2one) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-i2one) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -555,7 +746,7 @@ subroutine psi_i2axpbyv2(m,alpha, x, beta, y, z, info)
integer(psb_i2pk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly, lz
integer(psb_ipk_) :: lx, ly, lz, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
@ -594,7 +785,105 @@ subroutine psi_i2axpbyv2(m,alpha, x, beta, y, z, info)
goto 9999
end if
if (m>0) call i2axpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info)
if (alpha.eq.i2zero) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = i2zero
enddo
else if (beta.eq.i2one) then
!
! Do nothing!
!
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = beta*y(i)
enddo
endif
else if (alpha.eq.i2one) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-i2one) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -942,6 +1231,7 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (alpha.eq.i2zero) then
if (beta.eq.i2zero) then
do j=1, n
!$omp parallel do private(i)
do i=1,m
y(i,j) = i2zero
enddo
@ -953,12 +1243,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = beta*y(i,j)
enddo
@ -969,12 +1261,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + y(i,j)
enddo
@ -982,12 +1276,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -998,12 +1294,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + y(i,j)
enddo
@ -1011,12 +1309,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1027,12 +1327,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1040,12 +1342,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo
@ -1131,12 +1435,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = beta*y(i,j)
enddo
@ -1147,12 +1453,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + y(i,j)
enddo
@ -1160,12 +1468,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -1176,12 +1486,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + y(i,j)
enddo
@ -1189,12 +1501,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1205,12 +1519,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1218,12 +1534,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo

@ -29,6 +29,99 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_m_exscanv(n,x,info,shift,ibase)
use psi_m_serial_mod, psb_protect_name => psi_m_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_mpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_mpk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
integer(psb_mpk_) :: shift_, tp, ts
integer(psb_mpk_), allocatable :: suma(:)
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk, ibase_
logical is_nested
if (present(shift)) then
shift_ = shift
else
shift_ = mzero
end if
if (present(ibase)) then
ibase_ = ibase
else
ibase_ = ione
end if
#if defined(OPENMP)
is_nested = omp_get_nested()
call omp_set_nested(.true.)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,n,x,shift_,ibase_) &
!$OMP private(ithread,wrk,i,first_idx,last_idx)
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + ibase_
else
first_idx = ithread*wrk + MOD((n),nthreads) + ibase_
end if
last_idx = min(first_idx + wrk - 1,n - (ibase_-ione))
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = suma(ithread+1) + x(i) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
!$OMP END SINGLE
!$OMP END PARALLEL
call omp_set_nested(is_nested)
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
end subroutine psi_m_exscanv
subroutine psb_m_mgelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_mgelp
use psb_const_mod
@ -441,9 +534,9 @@ subroutine psi_maxpby(m,n,alpha, x, beta, y, info)
integer(psb_mpk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: lx, ly, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -502,7 +595,8 @@ subroutine psi_maxpbyv(m,alpha, x, beta, y, info)
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
integer(psb_ipk_) :: i
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -532,7 +626,106 @@ subroutine psi_maxpbyv(m,alpha, x, beta, y, info)
goto 9999
end if
if (m>0) call maxpby(m,ione,alpha,x,lx,beta,y,ly,info)
! if (m>0) call maxpby(m,ione,alpha,x,lx,beta,y,ly,info)
if (alpha.eq.mzero) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
enddo
else if (beta.eq.mone) then
!
! Do nothing!
!
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
enddo
endif
else if (alpha.eq.mone) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-mone) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -555,7 +748,7 @@ subroutine psi_maxpbyv2(m,alpha, x, beta, y, z, info)
integer(psb_mpk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly, lz
integer(psb_ipk_) :: lx, ly, lz, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
@ -594,7 +787,105 @@ subroutine psi_maxpbyv2(m,alpha, x, beta, y, z, info)
goto 9999
end if
if (m>0) call maxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info)
if (alpha.eq.mzero) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = mzero
enddo
else if (beta.eq.mone) then
!
! Do nothing!
!
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = beta*y(i)
enddo
endif
else if (alpha.eq.mone) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-mone) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -942,6 +1233,7 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (alpha.eq.mzero) then
if (beta.eq.mzero) then
do j=1, n
!$omp parallel do private(i)
do i=1,m
y(i,j) = mzero
enddo
@ -953,12 +1245,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = beta*y(i,j)
enddo
@ -969,12 +1263,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + y(i,j)
enddo
@ -982,12 +1278,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -998,12 +1296,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + y(i,j)
enddo
@ -1011,12 +1311,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1027,12 +1329,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1040,12 +1344,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo
@ -1131,12 +1437,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = beta*y(i,j)
enddo
@ -1147,12 +1455,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + y(i,j)
enddo
@ -1160,12 +1470,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -1176,12 +1488,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + y(i,j)
enddo
@ -1189,12 +1503,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1205,12 +1521,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1218,12 +1536,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo

@ -29,6 +29,97 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_s_exscanv(n,x,info,shift,ibase)
use psi_s_serial_mod, psb_protect_name => psi_s_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
real(psb_spk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
real(psb_spk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
real(psb_spk_) :: shift_, tp, ts
real(psb_spk_), allocatable :: suma(:)
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk, ibase_
if (present(shift)) then
shift_ = shift
else
shift_ = szero
end if
if (present(ibase)) then
ibase_ = ibase
else
ibase_ = ione
end if
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,n,x,shift_,ibase_) &
!$OMP private(ithread,wrk,i,first_idx,last_idx)
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + ibase_
else
first_idx = ithread*wrk + MOD((n),nthreads) + ibase_
end if
last_idx = min(first_idx + wrk - 1,n - (ibase_-ione))
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = suma(ithread+1) + x(i) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
!$OMP END SINGLE
!$OMP END PARALLEL
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
end subroutine psi_s_exscanv
subroutine psb_m_sgelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_sgelp
use psb_const_mod

@ -29,6 +29,97 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_z_exscanv(n,x,info,shift,ibase)
use psi_z_serial_mod, psb_protect_name => psi_z_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_), intent(in), optional :: shift
integer(psb_ipk_), intent(in), optional :: ibase
complex(psb_dpk_) :: shift_, tp, ts
complex(psb_dpk_), allocatable :: suma(:)
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk, ibase_
if (present(shift)) then
shift_ = shift
else
shift_ = zzero
end if
if (present(ibase)) then
ibase_ = ibase
else
ibase_ = ione
end if
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(suma,nthreads,n,x,shift_,ibase_) &
!$OMP private(ithread,wrk,i,first_idx,last_idx)
!$OMP SINGLE
nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE
ithread = omp_get_thread_num()
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + ibase_
else
first_idx = ithread*wrk + MOD((n),nthreads) + ibase_
end if
last_idx = min(first_idx + wrk - 1,n - (ibase_-ione))
if (first_idx<=last_idx) then
suma(ithread+2) = suma(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
suma(ithread+2) = suma(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
end do
end if
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
suma(i) = suma(i) + suma(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = suma(ithread+1) + x(i) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
!$OMP END SINGLE
!$OMP END PARALLEL
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
end subroutine psi_z_exscanv
subroutine psb_m_zgelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_zgelp
use psb_const_mod

Loading…
Cancel
Save