OpenMP for base_vect

new-context
Salvatore Filippone 4 years ago
parent ae870a7e7b
commit eb513e45c6

@ -266,14 +266,21 @@ contains
complex(psb_spk_), intent(in) :: this(:) complex(psb_spk_), intent(in) :: this(:)
class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_) :: i
call psb_realloc(size(this),x%v,info) call psb_realloc(size(this),x%v,info)
if (info /= 0) then if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
return return
end if end if
#if defined (OPENMP)
!$omp parallel do private(i)
do i = 1, size(this)
x%v(i) = this(i)
end do
#else
x%v(:) = this(:) x%v(:) = this(:)
#endif
end subroutine c_base_bld_x end subroutine c_base_bld_x
! !
@ -403,7 +410,6 @@ contains
case(psb_dupl_ovwrt_) case(psb_dupl_ovwrt_)
do i = 1, n do i = 1, n
!loop over all val's rows !loop over all val's rows
! row actual block row ! row actual block row
if ((1 <= irl(i)).and.(irl(i) <= isz)) then if ((1 <= irl(i)).and.(irl(i) <= isz)) then
! this row belongs to me ! this row belongs to me
@ -783,7 +789,7 @@ contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_), optional :: n integer(psb_ipk_), optional :: n
! Local variables ! Local variables
integer(psb_ipk_) :: isz integer(psb_ipk_) :: isz, i
if (.not.allocated(x%v)) return if (.not.allocated(x%v)) return
if (.not.x%is_host()) call x%sync() if (.not.x%is_host()) call x%sync()
@ -794,7 +800,15 @@ contains
call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
return return
end if end if
res(1:isz) = x%v(1:isz) if (.false.) then
res(1:isz) = x%v(1:isz)
else
!$omp parallel do private(i)
do i=1, isz
res(i) = x%v(i)
end do
end if
end function c_base_get_vect end function c_base_get_vect
! !
@ -812,7 +826,7 @@ contains
complex(psb_spk_), intent(in) :: val complex(psb_spk_), intent(in) :: val
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i
first_=1 first_=1
last_=size(x%v) last_=size(x%v)
@ -820,7 +834,14 @@ contains
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val
end do
#else
x%v(first_:last_) = val x%v(first_:last_) = val
#endif
call x%set_host() call x%set_host()
end subroutine c_base_set_scal end subroutine c_base_set_scal
@ -838,19 +859,27 @@ contains
complex(psb_spk_), intent(in) :: val(:) complex(psb_spk_), intent(in) :: val(:)
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i, info
if (.not.allocated(x%v)) then
call psb_realloc(size(val),x%v,info)
end if
first_ = 1 first_ = 1
if (present(first)) first_ = max(1,first) if (present(first)) first_ = max(1,first)
last_ = min(psb_size(x%v),first_+size(val)-1) last_ = min(psb_size(x%v),first_+size(val)-1)
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (allocated(x%v)) then if (x%is_dev()) call x%sync()
if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val(i-first_+1)
end do
#else
x%v(first_:last_) = val(1:last_-first_+1) x%v(first_:last_) = val(1:last_-first_+1)
else #endif
x%v = val
end if
call x%set_host() call x%set_host()
end subroutine c_base_set_vect end subroutine c_base_set_vect
@ -888,9 +917,18 @@ contains
implicit none implicit none
class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: x
integer(psb_ipk_) :: i
if (allocated(x%v)) then if (allocated(x%v)) then
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i=1, size(x%v)
x%v(i) = abs(x%v(i))
end do
#else
x%v = abs(x%v) x%v = abs(x%v)
#endif
call x%set_host() call x%set_host()
end if end if
@ -1132,6 +1170,7 @@ contains
info = 0 info = 0
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = min(size(y%v), size(x)) n = min(size(y%v), size(x))
!$omp parallel do private(i)
do i=1, n do i=1, n
y%v(i) = y%v(i)*x(i) y%v(i) = y%v(i)*x(i)
end do end do
@ -1169,6 +1208,7 @@ contains
if (beta == cone) then if (beta == cone) then
return return
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) z%v(i) = beta*z%v(i)
end do end do
@ -1176,42 +1216,51 @@ contains
else else
if (alpha == cone) then if (alpha == cone) then
if (beta == czero) then if (beta == czero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = y(i)*x(i) z%v(i) = y(i)*x(i)
end do end do
else if (beta == cone) then else if (beta == cone) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) + y(i)*x(i) z%v(i) = z%v(i) + y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i) z%v(i) = beta*z%v(i) + y(i)*x(i)
end do end do
end if end if
else if (alpha == -cone) then else if (alpha == -cone) then
if (beta == czero) then if (beta == czero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = -y(i)*x(i) z%v(i) = -y(i)*x(i)
end do end do
else if (beta == cone) then else if (beta == cone) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) - y(i)*x(i) z%v(i) = z%v(i) - y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i) z%v(i) = beta*z%v(i) - y(i)*x(i)
end do end do
end if end if
else else
if (beta == czero) then if (beta == czero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = alpha*y(i)*x(i) z%v(i) = alpha*y(i)*x(i)
end do end do
else if (beta == cone) then else if (beta == cone) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i) z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do end do
@ -1314,7 +1363,6 @@ contains
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
call x%div(x%v,y%v,info) call x%div(x%v,y%v,info)
end subroutine c_base_div_v end subroutine c_base_div_v
! !
!> Function base_div_v2 !> Function base_div_v2
@ -1358,7 +1406,6 @@ contains
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
call x%div(x%v,y%v,info,flag) call x%div(x%v,y%v,info,flag)
end subroutine c_base_div_v_check end subroutine c_base_div_v_check
! !
!> Function base_div_v2_check !> Function base_div_v2_check
@ -1381,7 +1428,6 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
call z%div(x%v,y%v,info,flag) call z%div(x%v,y%v,info,flag)
end subroutine c_base_div_v2_check end subroutine c_base_div_v2_check
! !
!> Function base_div_a2 !> Function base_div_a2
@ -1403,6 +1449,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = min(size(y), size(x)) n = min(size(y), size(x))
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = x(i)/y(i) z%v(i) = x(i)/y(i)
end do end do
@ -1433,6 +1480,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = min(size(y), size(x)) n = min(size(y), size(x))
! $omp parallel do private(i)
do i=1, n do i=1, n
if (y(i) /= 0) then if (y(i) /= 0) then
z%v(i) = x(i)/y(i) z%v(i) = x(i)/y(i)
@ -1443,7 +1491,6 @@ contains
end do end do
end if end if
end subroutine c_base_div_a2_check end subroutine c_base_div_a2_check
! !
!> Function base_inv_v !> Function base_inv_v
@ -1487,7 +1534,6 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
call y%inv(x%v,info,flag) call y%inv(x%v,info,flag)
end subroutine c_base_inv_v_check end subroutine c_base_inv_v_check
! !
!> Function base_inv_a2 !> Function base_inv_a2
@ -1509,6 +1555,7 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i=1, n do i=1, n
y%v(i) = 1_psb_spk_/x(i) y%v(i) = 1_psb_spk_/x(i)
end do end do
@ -1539,6 +1586,7 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i=1, n do i=1, n
if (x(i) /= 0) then if (x(i) /= 0) then
y%v(i) = 1_psb_spk_/x(i) y%v(i) = 1_psb_spk_/x(i)
@ -1573,6 +1621,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i = 1, n, 1 do i = 1, n, 1
if ( abs(x(i)).ge.c ) then if ( abs(x(i)).ge.c ) then
z%v(i) = 1_psb_spk_ z%v(i) = 1_psb_spk_
@ -1618,12 +1667,19 @@ contains
implicit none implicit none
class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: x
complex(psb_spk_), intent (in) :: alpha complex(psb_spk_), intent (in) :: alpha
integer(psb_ipk_) :: i
if (allocated(x%v)) then if (allocated(x%v)) then
#if defined(OPENMP)
!$omp parallel do private(i)
do i=1,size(x%v)
x%v(i) = alpha*x%v(i)
end do
#else
x%v = alpha*x%v x%v = alpha*x%v
call x%set_host() #endif
end if end if
call x%set_host()
end subroutine c_base_scal end subroutine c_base_scal
! !
@ -1655,10 +1711,18 @@ contains
class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res real(psb_spk_) :: res
integer(psb_ipk_) :: i
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
res = szero
!$omp parallel do private(i) reduction(max: res)
do i=1, n
res = max(res,abs(x%v(i)))
end do
#else
res = maxval(abs(x%v(1:n))) res = maxval(abs(x%v(1:n)))
#endif
end function c_base_amax end function c_base_amax
@ -1672,10 +1736,18 @@ contains
class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res real(psb_spk_) :: res
integer(psb_ipk_) :: i
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
res=szero
!$omp parallel do private(i) reduction(+: res)
do i= 1, size(x%v)
res = res + abs(x%v(i))
end do
#else
res = sum(abs(x%v(1:n))) res = sum(abs(x%v(1:n)))
#endif
end function c_base_asum end function c_base_asum
@ -1882,11 +1954,15 @@ contains
integer(psb_ipk_) :: i, n integer(psb_ipk_) :: i, n
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
#if defined(OPENMP)
n = size(x) n = size(x)
do i = 1, n, 1 !$omp parallel do private(i)
z%v(i) = x(i) + b do i = 1, n
z%v(i) = x(i) + b
end do end do
#else
z%v = x + b
#endif
info = 0 info = 0
end subroutine c_base_addconst_a2 end subroutine c_base_addconst_a2
@ -1914,9 +1990,6 @@ contains
end module psb_c_base_vect_mod end module psb_c_base_vect_mod
module psb_c_base_multivect_mod module psb_c_base_multivect_mod
use psb_const_mod use psb_const_mod

@ -273,14 +273,21 @@ contains
real(psb_dpk_), intent(in) :: this(:) real(psb_dpk_), intent(in) :: this(:)
class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_) :: i
call psb_realloc(size(this),x%v,info) call psb_realloc(size(this),x%v,info)
if (info /= 0) then if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
return return
end if end if
#if defined (OPENMP)
!$omp parallel do private(i)
do i = 1, size(this)
x%v(i) = this(i)
end do
#else
x%v(:) = this(:) x%v(:) = this(:)
#endif
end subroutine d_base_bld_x end subroutine d_base_bld_x
! !
@ -410,7 +417,6 @@ contains
case(psb_dupl_ovwrt_) case(psb_dupl_ovwrt_)
do i = 1, n do i = 1, n
!loop over all val's rows !loop over all val's rows
! row actual block row ! row actual block row
if ((1 <= irl(i)).and.(irl(i) <= isz)) then if ((1 <= irl(i)).and.(irl(i) <= isz)) then
! this row belongs to me ! this row belongs to me
@ -790,7 +796,7 @@ contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_), optional :: n integer(psb_ipk_), optional :: n
! Local variables ! Local variables
integer(psb_ipk_) :: isz integer(psb_ipk_) :: isz, i
if (.not.allocated(x%v)) return if (.not.allocated(x%v)) return
if (.not.x%is_host()) call x%sync() if (.not.x%is_host()) call x%sync()
@ -801,7 +807,15 @@ contains
call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
return return
end if end if
res(1:isz) = x%v(1:isz) if (.false.) then
res(1:isz) = x%v(1:isz)
else
!$omp parallel do private(i)
do i=1, isz
res(i) = x%v(i)
end do
end if
end function d_base_get_vect end function d_base_get_vect
! !
@ -819,7 +833,7 @@ contains
real(psb_dpk_), intent(in) :: val real(psb_dpk_), intent(in) :: val
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i
first_=1 first_=1
last_=size(x%v) last_=size(x%v)
@ -827,7 +841,14 @@ contains
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val
end do
#else
x%v(first_:last_) = val x%v(first_:last_) = val
#endif
call x%set_host() call x%set_host()
end subroutine d_base_set_scal end subroutine d_base_set_scal
@ -845,19 +866,27 @@ contains
real(psb_dpk_), intent(in) :: val(:) real(psb_dpk_), intent(in) :: val(:)
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i, info
if (.not.allocated(x%v)) then
call psb_realloc(size(val),x%v,info)
end if
first_ = 1 first_ = 1
if (present(first)) first_ = max(1,first) if (present(first)) first_ = max(1,first)
last_ = min(psb_size(x%v),first_+size(val)-1) last_ = min(psb_size(x%v),first_+size(val)-1)
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (allocated(x%v)) then if (x%is_dev()) call x%sync()
if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val(i-first_+1)
end do
#else
x%v(first_:last_) = val(1:last_-first_+1) x%v(first_:last_) = val(1:last_-first_+1)
else #endif
x%v = val
end if
call x%set_host() call x%set_host()
end subroutine d_base_set_vect end subroutine d_base_set_vect
@ -895,9 +924,18 @@ contains
implicit none implicit none
class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: x
integer(psb_ipk_) :: i
if (allocated(x%v)) then if (allocated(x%v)) then
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i=1, size(x%v)
x%v(i) = abs(x%v(i))
end do
#else
x%v = abs(x%v) x%v = abs(x%v)
#endif
call x%set_host() call x%set_host()
end if end if
@ -1139,6 +1177,7 @@ contains
info = 0 info = 0
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = min(size(y%v), size(x)) n = min(size(y%v), size(x))
!$omp parallel do private(i)
do i=1, n do i=1, n
y%v(i) = y%v(i)*x(i) y%v(i) = y%v(i)*x(i)
end do end do
@ -1176,6 +1215,7 @@ contains
if (beta == done) then if (beta == done) then
return return
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) z%v(i) = beta*z%v(i)
end do end do
@ -1183,42 +1223,51 @@ contains
else else
if (alpha == done) then if (alpha == done) then
if (beta == dzero) then if (beta == dzero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = y(i)*x(i) z%v(i) = y(i)*x(i)
end do end do
else if (beta == done) then else if (beta == done) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) + y(i)*x(i) z%v(i) = z%v(i) + y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i) z%v(i) = beta*z%v(i) + y(i)*x(i)
end do end do
end if end if
else if (alpha == -done) then else if (alpha == -done) then
if (beta == dzero) then if (beta == dzero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = -y(i)*x(i) z%v(i) = -y(i)*x(i)
end do end do
else if (beta == done) then else if (beta == done) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) - y(i)*x(i) z%v(i) = z%v(i) - y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i) z%v(i) = beta*z%v(i) - y(i)*x(i)
end do end do
end if end if
else else
if (beta == dzero) then if (beta == dzero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = alpha*y(i)*x(i) z%v(i) = alpha*y(i)*x(i)
end do end do
else if (beta == done) then else if (beta == done) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i) z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do end do
@ -1321,7 +1370,6 @@ contains
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
call x%div(x%v,y%v,info) call x%div(x%v,y%v,info)
end subroutine d_base_div_v end subroutine d_base_div_v
! !
!> Function base_div_v2 !> Function base_div_v2
@ -1365,7 +1413,6 @@ contains
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
call x%div(x%v,y%v,info,flag) call x%div(x%v,y%v,info,flag)
end subroutine d_base_div_v_check end subroutine d_base_div_v_check
! !
!> Function base_div_v2_check !> Function base_div_v2_check
@ -1388,7 +1435,6 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
call z%div(x%v,y%v,info,flag) call z%div(x%v,y%v,info,flag)
end subroutine d_base_div_v2_check end subroutine d_base_div_v2_check
! !
!> Function base_div_a2 !> Function base_div_a2
@ -1410,6 +1456,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = min(size(y), size(x)) n = min(size(y), size(x))
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = x(i)/y(i) z%v(i) = x(i)/y(i)
end do end do
@ -1440,6 +1487,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = min(size(y), size(x)) n = min(size(y), size(x))
! $omp parallel do private(i)
do i=1, n do i=1, n
if (y(i) /= 0) then if (y(i) /= 0) then
z%v(i) = x(i)/y(i) z%v(i) = x(i)/y(i)
@ -1450,7 +1498,6 @@ contains
end do end do
end if end if
end subroutine d_base_div_a2_check end subroutine d_base_div_a2_check
! !
!> Function base_inv_v !> Function base_inv_v
@ -1494,7 +1541,6 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
call y%inv(x%v,info,flag) call y%inv(x%v,info,flag)
end subroutine d_base_inv_v_check end subroutine d_base_inv_v_check
! !
!> Function base_inv_a2 !> Function base_inv_a2
@ -1516,6 +1562,7 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i=1, n do i=1, n
y%v(i) = 1_psb_dpk_/x(i) y%v(i) = 1_psb_dpk_/x(i)
end do end do
@ -1546,6 +1593,7 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i=1, n do i=1, n
if (x(i) /= 0) then if (x(i) /= 0) then
y%v(i) = 1_psb_dpk_/x(i) y%v(i) = 1_psb_dpk_/x(i)
@ -1580,6 +1628,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i = 1, n, 1 do i = 1, n, 1
if ( abs(x(i)).ge.c ) then if ( abs(x(i)).ge.c ) then
z%v(i) = 1_psb_dpk_ z%v(i) = 1_psb_dpk_
@ -1625,12 +1674,19 @@ contains
implicit none implicit none
class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: x
real(psb_dpk_), intent (in) :: alpha real(psb_dpk_), intent (in) :: alpha
integer(psb_ipk_) :: i
if (allocated(x%v)) then if (allocated(x%v)) then
#if defined(OPENMP)
!$omp parallel do private(i)
do i=1,size(x%v)
x%v(i) = alpha*x%v(i)
end do
#else
x%v = alpha*x%v x%v = alpha*x%v
call x%set_host() #endif
end if end if
call x%set_host()
end subroutine d_base_scal end subroutine d_base_scal
! !
@ -1662,10 +1718,18 @@ contains
class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res real(psb_dpk_) :: res
integer(psb_ipk_) :: i
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
res = dzero
!$omp parallel do private(i) reduction(max: res)
do i=1, n
res = max(res,abs(x%v(i)))
end do
#else
res = maxval(abs(x%v(1:n))) res = maxval(abs(x%v(1:n)))
#endif
end function d_base_amax end function d_base_amax
! !
@ -1678,10 +1742,18 @@ contains
class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res real(psb_dpk_) :: res
integer(psb_ipk_) :: i
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
res = HUGE(done)
!$omp parallel do private(i) reduction(min: res)
do i=1, n
res = min(res,abs(x%v(i)))
end do
#else
res = minval(x%v(1:n)) res = minval(x%v(1:n))
#endif
end function d_base_min end function d_base_min
! !
@ -1730,10 +1802,11 @@ contains
z = huge(z) z = huge(z)
n = min(size(y), size(x%v)) n = min(size(y), size(x%v))
!$omp parallel do private(i,temp) reduction(min: z)
do i=1, n do i=1, n
if ( y(i) /= dzero ) then if ( y(i) /= dzero ) then
temp = x%v(i)/y(i) temp = x%v(i)/y(i)
if (temp <= z) z = temp z = min(z,temp)
end if end if
end do end do
@ -1750,10 +1823,18 @@ contains
class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res real(psb_dpk_) :: res
integer(psb_ipk_) :: i
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
res=dzero
!$omp parallel do private(i) reduction(+: res)
do i= 1, size(x%v)
res = res + abs(x%v(i))
end do
#else
res = sum(abs(x%v(1:n))) res = sum(abs(x%v(1:n)))
#endif
end function d_base_asum end function d_base_asum
@ -2052,11 +2133,15 @@ contains
integer(psb_ipk_) :: i, n integer(psb_ipk_) :: i, n
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
#if defined(OPENMP)
n = size(x) n = size(x)
do i = 1, n, 1 !$omp parallel do private(i)
z%v(i) = x(i) + b do i = 1, n
z%v(i) = x(i) + b
end do end do
#else
z%v = x + b
#endif
info = 0 info = 0
end subroutine d_base_addconst_a2 end subroutine d_base_addconst_a2
@ -2084,9 +2169,6 @@ contains
end module psb_d_base_vect_mod end module psb_d_base_vect_mod
module psb_d_base_multivect_mod module psb_d_base_multivect_mod
use psb_const_mod use psb_const_mod

@ -202,14 +202,21 @@ contains
integer(psb_ipk_), intent(in) :: this(:) integer(psb_ipk_), intent(in) :: this(:)
class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_) :: i
call psb_realloc(size(this),x%v,info) call psb_realloc(size(this),x%v,info)
if (info /= 0) then if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
return return
end if end if
#if defined (OPENMP)
!$omp parallel do private(i)
do i = 1, size(this)
x%v(i) = this(i)
end do
#else
x%v(:) = this(:) x%v(:) = this(:)
#endif
end subroutine i_base_bld_x end subroutine i_base_bld_x
! !
@ -339,7 +346,6 @@ contains
case(psb_dupl_ovwrt_) case(psb_dupl_ovwrt_)
do i = 1, n do i = 1, n
!loop over all val's rows !loop over all val's rows
! row actual block row ! row actual block row
if ((1 <= irl(i)).and.(irl(i) <= isz)) then if ((1 <= irl(i)).and.(irl(i) <= isz)) then
! this row belongs to me ! this row belongs to me
@ -719,7 +725,7 @@ contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_), optional :: n integer(psb_ipk_), optional :: n
! Local variables ! Local variables
integer(psb_ipk_) :: isz integer(psb_ipk_) :: isz, i
if (.not.allocated(x%v)) return if (.not.allocated(x%v)) return
if (.not.x%is_host()) call x%sync() if (.not.x%is_host()) call x%sync()
@ -730,7 +736,15 @@ contains
call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
return return
end if end if
res(1:isz) = x%v(1:isz) if (.false.) then
res(1:isz) = x%v(1:isz)
else
!$omp parallel do private(i)
do i=1, isz
res(i) = x%v(i)
end do
end if
end function i_base_get_vect end function i_base_get_vect
! !
@ -748,7 +762,7 @@ contains
integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), intent(in) :: val
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i
first_=1 first_=1
last_=size(x%v) last_=size(x%v)
@ -756,7 +770,14 @@ contains
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val
end do
#else
x%v(first_:last_) = val x%v(first_:last_) = val
#endif
call x%set_host() call x%set_host()
end subroutine i_base_set_scal end subroutine i_base_set_scal
@ -774,19 +795,27 @@ contains
integer(psb_ipk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: val(:)
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i, info
if (.not.allocated(x%v)) then
call psb_realloc(size(val),x%v,info)
end if
first_ = 1 first_ = 1
if (present(first)) first_ = max(1,first) if (present(first)) first_ = max(1,first)
last_ = min(psb_size(x%v),first_+size(val)-1) last_ = min(psb_size(x%v),first_+size(val)-1)
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (allocated(x%v)) then if (x%is_dev()) call x%sync()
if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val(i-first_+1)
end do
#else
x%v(first_:last_) = val(1:last_-first_+1) x%v(first_:last_) = val(1:last_-first_+1)
else #endif
x%v = val
end if
call x%set_host() call x%set_host()
end subroutine i_base_set_vect end subroutine i_base_set_vect
@ -980,9 +1009,6 @@ contains
end module psb_i_base_vect_mod end module psb_i_base_vect_mod
module psb_i_base_multivect_mod module psb_i_base_multivect_mod
use psb_const_mod use psb_const_mod

@ -203,14 +203,21 @@ contains
integer(psb_lpk_), intent(in) :: this(:) integer(psb_lpk_), intent(in) :: this(:)
class(psb_l_base_vect_type), intent(inout) :: x class(psb_l_base_vect_type), intent(inout) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_) :: i
call psb_realloc(size(this),x%v,info) call psb_realloc(size(this),x%v,info)
if (info /= 0) then if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
return return
end if end if
#if defined (OPENMP)
!$omp parallel do private(i)
do i = 1, size(this)
x%v(i) = this(i)
end do
#else
x%v(:) = this(:) x%v(:) = this(:)
#endif
end subroutine l_base_bld_x end subroutine l_base_bld_x
! !
@ -340,7 +347,6 @@ contains
case(psb_dupl_ovwrt_) case(psb_dupl_ovwrt_)
do i = 1, n do i = 1, n
!loop over all val's rows !loop over all val's rows
! row actual block row ! row actual block row
if ((1 <= irl(i)).and.(irl(i) <= isz)) then if ((1 <= irl(i)).and.(irl(i) <= isz)) then
! this row belongs to me ! this row belongs to me
@ -720,7 +726,7 @@ contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_), optional :: n integer(psb_ipk_), optional :: n
! Local variables ! Local variables
integer(psb_ipk_) :: isz integer(psb_ipk_) :: isz, i
if (.not.allocated(x%v)) return if (.not.allocated(x%v)) return
if (.not.x%is_host()) call x%sync() if (.not.x%is_host()) call x%sync()
@ -731,7 +737,15 @@ contains
call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
return return
end if end if
res(1:isz) = x%v(1:isz) if (.false.) then
res(1:isz) = x%v(1:isz)
else
!$omp parallel do private(i)
do i=1, isz
res(i) = x%v(i)
end do
end if
end function l_base_get_vect end function l_base_get_vect
! !
@ -749,7 +763,7 @@ contains
integer(psb_lpk_), intent(in) :: val integer(psb_lpk_), intent(in) :: val
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i
first_=1 first_=1
last_=size(x%v) last_=size(x%v)
@ -757,7 +771,14 @@ contains
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val
end do
#else
x%v(first_:last_) = val x%v(first_:last_) = val
#endif
call x%set_host() call x%set_host()
end subroutine l_base_set_scal end subroutine l_base_set_scal
@ -775,19 +796,27 @@ contains
integer(psb_lpk_), intent(in) :: val(:) integer(psb_lpk_), intent(in) :: val(:)
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i, info
if (.not.allocated(x%v)) then
call psb_realloc(size(val),x%v,info)
end if
first_ = 1 first_ = 1
if (present(first)) first_ = max(1,first) if (present(first)) first_ = max(1,first)
last_ = min(psb_size(x%v),first_+size(val)-1) last_ = min(psb_size(x%v),first_+size(val)-1)
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (allocated(x%v)) then if (x%is_dev()) call x%sync()
if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val(i-first_+1)
end do
#else
x%v(first_:last_) = val(1:last_-first_+1) x%v(first_:last_) = val(1:last_-first_+1)
else #endif
x%v = val
end if
call x%set_host() call x%set_host()
end subroutine l_base_set_vect end subroutine l_base_set_vect
@ -981,9 +1010,6 @@ contains
end module psb_l_base_vect_mod end module psb_l_base_vect_mod
module psb_l_base_multivect_mod module psb_l_base_multivect_mod
use psb_const_mod use psb_const_mod

@ -273,14 +273,21 @@ contains
real(psb_spk_), intent(in) :: this(:) real(psb_spk_), intent(in) :: this(:)
class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_) :: i
call psb_realloc(size(this),x%v,info) call psb_realloc(size(this),x%v,info)
if (info /= 0) then if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
return return
end if end if
#if defined (OPENMP)
!$omp parallel do private(i)
do i = 1, size(this)
x%v(i) = this(i)
end do
#else
x%v(:) = this(:) x%v(:) = this(:)
#endif
end subroutine s_base_bld_x end subroutine s_base_bld_x
! !
@ -410,7 +417,6 @@ contains
case(psb_dupl_ovwrt_) case(psb_dupl_ovwrt_)
do i = 1, n do i = 1, n
!loop over all val's rows !loop over all val's rows
! row actual block row ! row actual block row
if ((1 <= irl(i)).and.(irl(i) <= isz)) then if ((1 <= irl(i)).and.(irl(i) <= isz)) then
! this row belongs to me ! this row belongs to me
@ -790,7 +796,7 @@ contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_), optional :: n integer(psb_ipk_), optional :: n
! Local variables ! Local variables
integer(psb_ipk_) :: isz integer(psb_ipk_) :: isz, i
if (.not.allocated(x%v)) return if (.not.allocated(x%v)) return
if (.not.x%is_host()) call x%sync() if (.not.x%is_host()) call x%sync()
@ -801,7 +807,15 @@ contains
call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
return return
end if end if
res(1:isz) = x%v(1:isz) if (.false.) then
res(1:isz) = x%v(1:isz)
else
!$omp parallel do private(i)
do i=1, isz
res(i) = x%v(i)
end do
end if
end function s_base_get_vect end function s_base_get_vect
! !
@ -819,7 +833,7 @@ contains
real(psb_spk_), intent(in) :: val real(psb_spk_), intent(in) :: val
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i
first_=1 first_=1
last_=size(x%v) last_=size(x%v)
@ -827,7 +841,14 @@ contains
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val
end do
#else
x%v(first_:last_) = val x%v(first_:last_) = val
#endif
call x%set_host() call x%set_host()
end subroutine s_base_set_scal end subroutine s_base_set_scal
@ -845,19 +866,27 @@ contains
real(psb_spk_), intent(in) :: val(:) real(psb_spk_), intent(in) :: val(:)
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i, info
if (.not.allocated(x%v)) then
call psb_realloc(size(val),x%v,info)
end if
first_ = 1 first_ = 1
if (present(first)) first_ = max(1,first) if (present(first)) first_ = max(1,first)
last_ = min(psb_size(x%v),first_+size(val)-1) last_ = min(psb_size(x%v),first_+size(val)-1)
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (allocated(x%v)) then if (x%is_dev()) call x%sync()
if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val(i-first_+1)
end do
#else
x%v(first_:last_) = val(1:last_-first_+1) x%v(first_:last_) = val(1:last_-first_+1)
else #endif
x%v = val
end if
call x%set_host() call x%set_host()
end subroutine s_base_set_vect end subroutine s_base_set_vect
@ -895,9 +924,18 @@ contains
implicit none implicit none
class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: x
integer(psb_ipk_) :: i
if (allocated(x%v)) then if (allocated(x%v)) then
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i=1, size(x%v)
x%v(i) = abs(x%v(i))
end do
#else
x%v = abs(x%v) x%v = abs(x%v)
#endif
call x%set_host() call x%set_host()
end if end if
@ -1139,6 +1177,7 @@ contains
info = 0 info = 0
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = min(size(y%v), size(x)) n = min(size(y%v), size(x))
!$omp parallel do private(i)
do i=1, n do i=1, n
y%v(i) = y%v(i)*x(i) y%v(i) = y%v(i)*x(i)
end do end do
@ -1176,6 +1215,7 @@ contains
if (beta == sone) then if (beta == sone) then
return return
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) z%v(i) = beta*z%v(i)
end do end do
@ -1183,42 +1223,51 @@ contains
else else
if (alpha == sone) then if (alpha == sone) then
if (beta == szero) then if (beta == szero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = y(i)*x(i) z%v(i) = y(i)*x(i)
end do end do
else if (beta == sone) then else if (beta == sone) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) + y(i)*x(i) z%v(i) = z%v(i) + y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i) z%v(i) = beta*z%v(i) + y(i)*x(i)
end do end do
end if end if
else if (alpha == -sone) then else if (alpha == -sone) then
if (beta == szero) then if (beta == szero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = -y(i)*x(i) z%v(i) = -y(i)*x(i)
end do end do
else if (beta == sone) then else if (beta == sone) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) - y(i)*x(i) z%v(i) = z%v(i) - y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i) z%v(i) = beta*z%v(i) - y(i)*x(i)
end do end do
end if end if
else else
if (beta == szero) then if (beta == szero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = alpha*y(i)*x(i) z%v(i) = alpha*y(i)*x(i)
end do end do
else if (beta == sone) then else if (beta == sone) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i) z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do end do
@ -1321,7 +1370,6 @@ contains
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
call x%div(x%v,y%v,info) call x%div(x%v,y%v,info)
end subroutine s_base_div_v end subroutine s_base_div_v
! !
!> Function base_div_v2 !> Function base_div_v2
@ -1365,7 +1413,6 @@ contains
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
call x%div(x%v,y%v,info,flag) call x%div(x%v,y%v,info,flag)
end subroutine s_base_div_v_check end subroutine s_base_div_v_check
! !
!> Function base_div_v2_check !> Function base_div_v2_check
@ -1388,7 +1435,6 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
call z%div(x%v,y%v,info,flag) call z%div(x%v,y%v,info,flag)
end subroutine s_base_div_v2_check end subroutine s_base_div_v2_check
! !
!> Function base_div_a2 !> Function base_div_a2
@ -1410,6 +1456,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = min(size(y), size(x)) n = min(size(y), size(x))
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = x(i)/y(i) z%v(i) = x(i)/y(i)
end do end do
@ -1440,6 +1487,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = min(size(y), size(x)) n = min(size(y), size(x))
! $omp parallel do private(i)
do i=1, n do i=1, n
if (y(i) /= 0) then if (y(i) /= 0) then
z%v(i) = x(i)/y(i) z%v(i) = x(i)/y(i)
@ -1450,7 +1498,6 @@ contains
end do end do
end if end if
end subroutine s_base_div_a2_check end subroutine s_base_div_a2_check
! !
!> Function base_inv_v !> Function base_inv_v
@ -1494,7 +1541,6 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
call y%inv(x%v,info,flag) call y%inv(x%v,info,flag)
end subroutine s_base_inv_v_check end subroutine s_base_inv_v_check
! !
!> Function base_inv_a2 !> Function base_inv_a2
@ -1516,6 +1562,7 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i=1, n do i=1, n
y%v(i) = 1_psb_spk_/x(i) y%v(i) = 1_psb_spk_/x(i)
end do end do
@ -1546,6 +1593,7 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i=1, n do i=1, n
if (x(i) /= 0) then if (x(i) /= 0) then
y%v(i) = 1_psb_spk_/x(i) y%v(i) = 1_psb_spk_/x(i)
@ -1580,6 +1628,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i = 1, n, 1 do i = 1, n, 1
if ( abs(x(i)).ge.c ) then if ( abs(x(i)).ge.c ) then
z%v(i) = 1_psb_spk_ z%v(i) = 1_psb_spk_
@ -1625,12 +1674,19 @@ contains
implicit none implicit none
class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: x
real(psb_spk_), intent (in) :: alpha real(psb_spk_), intent (in) :: alpha
integer(psb_ipk_) :: i
if (allocated(x%v)) then if (allocated(x%v)) then
#if defined(OPENMP)
!$omp parallel do private(i)
do i=1,size(x%v)
x%v(i) = alpha*x%v(i)
end do
#else
x%v = alpha*x%v x%v = alpha*x%v
call x%set_host() #endif
end if end if
call x%set_host()
end subroutine s_base_scal end subroutine s_base_scal
! !
@ -1662,10 +1718,18 @@ contains
class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res real(psb_spk_) :: res
integer(psb_ipk_) :: i
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
res = szero
!$omp parallel do private(i) reduction(max: res)
do i=1, n
res = max(res,abs(x%v(i)))
end do
#else
res = maxval(abs(x%v(1:n))) res = maxval(abs(x%v(1:n)))
#endif
end function s_base_amax end function s_base_amax
! !
@ -1678,10 +1742,18 @@ contains
class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res real(psb_spk_) :: res
integer(psb_ipk_) :: i
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
res = HUGE(sone)
!$omp parallel do private(i) reduction(min: res)
do i=1, n
res = min(res,abs(x%v(i)))
end do
#else
res = minval(x%v(1:n)) res = minval(x%v(1:n))
#endif
end function s_base_min end function s_base_min
! !
@ -1730,10 +1802,11 @@ contains
z = huge(z) z = huge(z)
n = min(size(y), size(x%v)) n = min(size(y), size(x%v))
!$omp parallel do private(i,temp) reduction(min: z)
do i=1, n do i=1, n
if ( y(i) /= szero ) then if ( y(i) /= szero ) then
temp = x%v(i)/y(i) temp = x%v(i)/y(i)
if (temp <= z) z = temp z = min(z,temp)
end if end if
end do end do
@ -1750,10 +1823,18 @@ contains
class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res real(psb_spk_) :: res
integer(psb_ipk_) :: i
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
res=szero
!$omp parallel do private(i) reduction(+: res)
do i= 1, size(x%v)
res = res + abs(x%v(i))
end do
#else
res = sum(abs(x%v(1:n))) res = sum(abs(x%v(1:n)))
#endif
end function s_base_asum end function s_base_asum
@ -2052,11 +2133,15 @@ contains
integer(psb_ipk_) :: i, n integer(psb_ipk_) :: i, n
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
#if defined(OPENMP)
n = size(x) n = size(x)
do i = 1, n, 1 !$omp parallel do private(i)
z%v(i) = x(i) + b do i = 1, n
z%v(i) = x(i) + b
end do end do
#else
z%v = x + b
#endif
info = 0 info = 0
end subroutine s_base_addconst_a2 end subroutine s_base_addconst_a2
@ -2084,9 +2169,6 @@ contains
end module psb_s_base_vect_mod end module psb_s_base_vect_mod
module psb_s_base_multivect_mod module psb_s_base_multivect_mod
use psb_const_mod use psb_const_mod

@ -266,14 +266,21 @@ contains
complex(psb_dpk_), intent(in) :: this(:) complex(psb_dpk_), intent(in) :: this(:)
class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_) :: i
call psb_realloc(size(this),x%v,info) call psb_realloc(size(this),x%v,info)
if (info /= 0) then if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
return return
end if end if
#if defined (OPENMP)
!$omp parallel do private(i)
do i = 1, size(this)
x%v(i) = this(i)
end do
#else
x%v(:) = this(:) x%v(:) = this(:)
#endif
end subroutine z_base_bld_x end subroutine z_base_bld_x
! !
@ -403,7 +410,6 @@ contains
case(psb_dupl_ovwrt_) case(psb_dupl_ovwrt_)
do i = 1, n do i = 1, n
!loop over all val's rows !loop over all val's rows
! row actual block row ! row actual block row
if ((1 <= irl(i)).and.(irl(i) <= isz)) then if ((1 <= irl(i)).and.(irl(i) <= isz)) then
! this row belongs to me ! this row belongs to me
@ -783,7 +789,7 @@ contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
integer(psb_ipk_), optional :: n integer(psb_ipk_), optional :: n
! Local variables ! Local variables
integer(psb_ipk_) :: isz integer(psb_ipk_) :: isz, i
if (.not.allocated(x%v)) return if (.not.allocated(x%v)) return
if (.not.x%is_host()) call x%sync() if (.not.x%is_host()) call x%sync()
@ -794,7 +800,15 @@ contains
call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
return return
end if end if
res(1:isz) = x%v(1:isz) if (.false.) then
res(1:isz) = x%v(1:isz)
else
!$omp parallel do private(i)
do i=1, isz
res(i) = x%v(i)
end do
end if
end function z_base_get_vect end function z_base_get_vect
! !
@ -812,7 +826,7 @@ contains
complex(psb_dpk_), intent(in) :: val complex(psb_dpk_), intent(in) :: val
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i
first_=1 first_=1
last_=size(x%v) last_=size(x%v)
@ -820,7 +834,14 @@ contains
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val
end do
#else
x%v(first_:last_) = val x%v(first_:last_) = val
#endif
call x%set_host() call x%set_host()
end subroutine z_base_set_scal end subroutine z_base_set_scal
@ -838,19 +859,27 @@ contains
complex(psb_dpk_), intent(in) :: val(:) complex(psb_dpk_), intent(in) :: val(:)
integer(psb_ipk_), optional :: first, last integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_ integer(psb_ipk_) :: first_, last_, i, info
if (.not.allocated(x%v)) then
call psb_realloc(size(val),x%v,info)
end if
first_ = 1 first_ = 1
if (present(first)) first_ = max(1,first) if (present(first)) first_ = max(1,first)
last_ = min(psb_size(x%v),first_+size(val)-1) last_ = min(psb_size(x%v),first_+size(val)-1)
if (present(last)) last_ = min(last,last_) if (present(last)) last_ = min(last,last_)
if (allocated(x%v)) then if (x%is_dev()) call x%sync()
if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i = first_, last_
x%v(i) = val(i-first_+1)
end do
#else
x%v(first_:last_) = val(1:last_-first_+1) x%v(first_:last_) = val(1:last_-first_+1)
else #endif
x%v = val
end if
call x%set_host() call x%set_host()
end subroutine z_base_set_vect end subroutine z_base_set_vect
@ -888,9 +917,18 @@ contains
implicit none implicit none
class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: x
integer(psb_ipk_) :: i
if (allocated(x%v)) then if (allocated(x%v)) then
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
!$omp parallel do private(i)
do i=1, size(x%v)
x%v(i) = abs(x%v(i))
end do
#else
x%v = abs(x%v) x%v = abs(x%v)
#endif
call x%set_host() call x%set_host()
end if end if
@ -1132,6 +1170,7 @@ contains
info = 0 info = 0
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = min(size(y%v), size(x)) n = min(size(y%v), size(x))
!$omp parallel do private(i)
do i=1, n do i=1, n
y%v(i) = y%v(i)*x(i) y%v(i) = y%v(i)*x(i)
end do end do
@ -1169,6 +1208,7 @@ contains
if (beta == zone) then if (beta == zone) then
return return
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) z%v(i) = beta*z%v(i)
end do end do
@ -1176,42 +1216,51 @@ contains
else else
if (alpha == zone) then if (alpha == zone) then
if (beta == zzero) then if (beta == zzero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = y(i)*x(i) z%v(i) = y(i)*x(i)
end do end do
else if (beta == zone) then else if (beta == zone) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) + y(i)*x(i) z%v(i) = z%v(i) + y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i) z%v(i) = beta*z%v(i) + y(i)*x(i)
end do end do
end if end if
else if (alpha == -zone) then else if (alpha == -zone) then
if (beta == zzero) then if (beta == zzero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = -y(i)*x(i) z%v(i) = -y(i)*x(i)
end do end do
else if (beta == zone) then else if (beta == zone) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) - y(i)*x(i) z%v(i) = z%v(i) - y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i) z%v(i) = beta*z%v(i) - y(i)*x(i)
end do end do
end if end if
else else
if (beta == zzero) then if (beta == zzero) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = alpha*y(i)*x(i) z%v(i) = alpha*y(i)*x(i)
end do end do
else if (beta == zone) then else if (beta == zone) then
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i) z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do end do
else else
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do end do
@ -1314,7 +1363,6 @@ contains
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
call x%div(x%v,y%v,info) call x%div(x%v,y%v,info)
end subroutine z_base_div_v end subroutine z_base_div_v
! !
!> Function base_div_v2 !> Function base_div_v2
@ -1358,7 +1406,6 @@ contains
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
call x%div(x%v,y%v,info,flag) call x%div(x%v,y%v,info,flag)
end subroutine z_base_div_v_check end subroutine z_base_div_v_check
! !
!> Function base_div_v2_check !> Function base_div_v2_check
@ -1381,7 +1428,6 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
call z%div(x%v,y%v,info,flag) call z%div(x%v,y%v,info,flag)
end subroutine z_base_div_v2_check end subroutine z_base_div_v2_check
! !
!> Function base_div_a2 !> Function base_div_a2
@ -1403,6 +1449,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = min(size(y), size(x)) n = min(size(y), size(x))
!$omp parallel do private(i)
do i=1, n do i=1, n
z%v(i) = x(i)/y(i) z%v(i) = x(i)/y(i)
end do end do
@ -1433,6 +1480,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = min(size(y), size(x)) n = min(size(y), size(x))
! $omp parallel do private(i)
do i=1, n do i=1, n
if (y(i) /= 0) then if (y(i) /= 0) then
z%v(i) = x(i)/y(i) z%v(i) = x(i)/y(i)
@ -1443,7 +1491,6 @@ contains
end do end do
end if end if
end subroutine z_base_div_a2_check end subroutine z_base_div_a2_check
! !
!> Function base_inv_v !> Function base_inv_v
@ -1487,7 +1534,6 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
call y%inv(x%v,info,flag) call y%inv(x%v,info,flag)
end subroutine z_base_inv_v_check end subroutine z_base_inv_v_check
! !
!> Function base_inv_a2 !> Function base_inv_a2
@ -1509,6 +1555,7 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i=1, n do i=1, n
y%v(i) = 1_psb_dpk_/x(i) y%v(i) = 1_psb_dpk_/x(i)
end do end do
@ -1539,6 +1586,7 @@ contains
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i=1, n do i=1, n
if (x(i) /= 0) then if (x(i) /= 0) then
y%v(i) = 1_psb_dpk_/x(i) y%v(i) = 1_psb_dpk_/x(i)
@ -1573,6 +1621,7 @@ contains
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
n = size(x) n = size(x)
!$omp parallel do private(i)
do i = 1, n, 1 do i = 1, n, 1
if ( abs(x(i)).ge.c ) then if ( abs(x(i)).ge.c ) then
z%v(i) = 1_psb_dpk_ z%v(i) = 1_psb_dpk_
@ -1618,12 +1667,19 @@ contains
implicit none implicit none
class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: x
complex(psb_dpk_), intent (in) :: alpha complex(psb_dpk_), intent (in) :: alpha
integer(psb_ipk_) :: i
if (allocated(x%v)) then if (allocated(x%v)) then
#if defined(OPENMP)
!$omp parallel do private(i)
do i=1,size(x%v)
x%v(i) = alpha*x%v(i)
end do
#else
x%v = alpha*x%v x%v = alpha*x%v
call x%set_host() #endif
end if end if
call x%set_host()
end subroutine z_base_scal end subroutine z_base_scal
! !
@ -1655,10 +1711,18 @@ contains
class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res real(psb_dpk_) :: res
integer(psb_ipk_) :: i
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
res = dzero
!$omp parallel do private(i) reduction(max: res)
do i=1, n
res = max(res,abs(x%v(i)))
end do
#else
res = maxval(abs(x%v(1:n))) res = maxval(abs(x%v(1:n)))
#endif
end function z_base_amax end function z_base_amax
@ -1672,10 +1736,18 @@ contains
class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res real(psb_dpk_) :: res
integer(psb_ipk_) :: i
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
#if defined(OPENMP)
res=dzero
!$omp parallel do private(i) reduction(+: res)
do i= 1, size(x%v)
res = res + abs(x%v(i))
end do
#else
res = sum(abs(x%v(1:n))) res = sum(abs(x%v(1:n)))
#endif
end function z_base_asum end function z_base_asum
@ -1882,11 +1954,15 @@ contains
integer(psb_ipk_) :: i, n integer(psb_ipk_) :: i, n
if (z%is_dev()) call z%sync() if (z%is_dev()) call z%sync()
#if defined(OPENMP)
n = size(x) n = size(x)
do i = 1, n, 1 !$omp parallel do private(i)
z%v(i) = x(i) + b do i = 1, n
z%v(i) = x(i) + b
end do end do
#else
z%v = x + b
#endif
info = 0 info = 0
end subroutine z_base_addconst_a2 end subroutine z_base_addconst_a2
@ -1914,9 +1990,6 @@ contains
end module psb_z_base_vect_mod end module psb_z_base_vect_mod
module psb_z_base_multivect_mod module psb_z_base_multivect_mod
use psb_const_mod use psb_const_mod

Loading…
Cancel
Save