From eb513e45c66a484851d8ed2d2ab2e4cfbe784ce9 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 3 Nov 2020 10:39:03 +0100 Subject: [PATCH] OpenMP for base_vect --- base/modules/serial/psb_c_base_vect_mod.F90 | 133 ++++++++++++++---- base/modules/serial/psb_d_base_vect_mod.F90 | 146 +++++++++++++++----- base/modules/serial/psb_i_base_vect_mod.F90 | 56 ++++++-- base/modules/serial/psb_l_base_vect_mod.F90 | 56 ++++++-- base/modules/serial/psb_s_base_vect_mod.F90 | 146 +++++++++++++++----- base/modules/serial/psb_z_base_vect_mod.F90 | 133 ++++++++++++++---- 6 files changed, 516 insertions(+), 154 deletions(-) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index f59e238f..e68fef6c 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -266,14 +266,21 @@ contains complex(psb_spk_), intent(in) :: this(:) class(psb_c_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine c_base_bld_x ! @@ -403,7 +410,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -783,7 +789,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -794,7 +800,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function c_base_get_vect ! @@ -812,7 +826,7 @@ contains complex(psb_spk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -820,7 +834,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine c_base_set_scal @@ -838,19 +859,27 @@ contains complex(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine c_base_set_vect @@ -888,9 +917,18 @@ contains implicit none class(psb_c_base_vect_type), intent(inout) :: x + integer(psb_ipk_) :: i + if (allocated(x%v)) then if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1, size(x%v) + x%v(i) = abs(x%v(i)) + end do +#else x%v = abs(x%v) +#endif call x%set_host() end if @@ -1132,6 +1170,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1169,6 +1208,7 @@ contains if (beta == cone) then return else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1176,42 +1216,51 @@ contains else if (alpha == cone) then if (beta == czero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == cone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -cone) then if (beta == czero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == cone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == czero) then + !$omp parallel do private(i) do i=1, n z%v(i) = alpha*y(i)*x(i) end do else if (beta == cone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1314,7 +1363,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info) - end subroutine c_base_div_v ! !> Function base_div_v2 @@ -1358,7 +1406,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info,flag) - end subroutine c_base_div_v_check ! !> Function base_div_v2_check @@ -1381,7 +1428,6 @@ contains if (z%is_dev()) call z%sync() call z%div(x%v,y%v,info,flag) - end subroutine c_base_div_v2_check ! !> Function base_div_a2 @@ -1403,6 +1449,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + !$omp parallel do private(i) do i=1, n z%v(i) = x(i)/y(i) end do @@ -1433,6 +1480,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + ! $omp parallel do private(i) do i=1, n if (y(i) /= 0) then z%v(i) = x(i)/y(i) @@ -1443,7 +1491,6 @@ contains end do end if - end subroutine c_base_div_a2_check ! !> Function base_inv_v @@ -1487,7 +1534,6 @@ contains if (y%is_dev()) call y%sync() call y%inv(x%v,info,flag) - end subroutine c_base_inv_v_check ! !> Function base_inv_a2 @@ -1509,6 +1555,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n y%v(i) = 1_psb_spk_/x(i) end do @@ -1539,6 +1586,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n if (x(i) /= 0) then y%v(i) = 1_psb_spk_/x(i) @@ -1573,6 +1621,7 @@ contains if (z%is_dev()) call z%sync() n = size(x) + !$omp parallel do private(i) do i = 1, n, 1 if ( abs(x(i)).ge.c ) then z%v(i) = 1_psb_spk_ @@ -1618,14 +1667,21 @@ contains implicit none class(psb_c_base_vect_type), intent(inout) :: x complex(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: i if (allocated(x%v)) then +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1,size(x%v) + x%v(i) = alpha*x%v(i) + end do +#else x%v = alpha*x%v - call x%set_host() +#endif end if - + call x%set_host() end subroutine c_base_scal - + ! ! Norms 1, 2 and infinity ! @@ -1655,10 +1711,18 @@ contains class(psb_c_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = szero + !$omp parallel do private(i) reduction(max: res) + do i=1, n + res = max(res,abs(x%v(i))) + end do +#else res = maxval(abs(x%v(1:n))) - +#endif end function c_base_amax @@ -1672,10 +1736,18 @@ contains class(psb_c_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res - + integer(psb_ipk_) :: i + if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res=szero + !$omp parallel do private(i) reduction(+: res) + do i= 1, size(x%v) + res = res + abs(x%v(i)) + end do +#else res = sum(abs(x%v(1:n))) - +#endif end function c_base_asum @@ -1882,13 +1954,17 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() - +#if defined(OPENMP) n = size(x) - do i = 1, n, 1 - z%v(i) = x(i) + b + !$omp parallel do private(i) + do i = 1, n + z%v(i) = x(i) + b end do +#else + z%v = x + b +#endif info = 0 - + end subroutine c_base_addconst_a2 ! !> Function _base_addconst_v2 @@ -1914,9 +1990,6 @@ contains end module psb_c_base_vect_mod - - - module psb_c_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index daf12cbf..09fd187b 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -273,14 +273,21 @@ contains real(psb_dpk_), intent(in) :: this(:) class(psb_d_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine d_base_bld_x ! @@ -410,7 +417,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -790,7 +796,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -801,7 +807,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function d_base_get_vect ! @@ -819,7 +833,7 @@ contains real(psb_dpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -827,7 +841,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine d_base_set_scal @@ -845,19 +866,27 @@ contains real(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine d_base_set_vect @@ -895,9 +924,18 @@ contains implicit none class(psb_d_base_vect_type), intent(inout) :: x + integer(psb_ipk_) :: i + if (allocated(x%v)) then if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1, size(x%v) + x%v(i) = abs(x%v(i)) + end do +#else x%v = abs(x%v) +#endif call x%set_host() end if @@ -1139,6 +1177,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1176,6 +1215,7 @@ contains if (beta == done) then return else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1183,42 +1223,51 @@ contains else if (alpha == done) then if (beta == dzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -done) then if (beta == dzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == dzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = alpha*y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1321,7 +1370,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info) - end subroutine d_base_div_v ! !> Function base_div_v2 @@ -1365,7 +1413,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info,flag) - end subroutine d_base_div_v_check ! !> Function base_div_v2_check @@ -1388,7 +1435,6 @@ contains if (z%is_dev()) call z%sync() call z%div(x%v,y%v,info,flag) - end subroutine d_base_div_v2_check ! !> Function base_div_a2 @@ -1410,6 +1456,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + !$omp parallel do private(i) do i=1, n z%v(i) = x(i)/y(i) end do @@ -1440,6 +1487,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + ! $omp parallel do private(i) do i=1, n if (y(i) /= 0) then z%v(i) = x(i)/y(i) @@ -1450,7 +1498,6 @@ contains end do end if - end subroutine d_base_div_a2_check ! !> Function base_inv_v @@ -1494,7 +1541,6 @@ contains if (y%is_dev()) call y%sync() call y%inv(x%v,info,flag) - end subroutine d_base_inv_v_check ! !> Function base_inv_a2 @@ -1516,6 +1562,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n y%v(i) = 1_psb_dpk_/x(i) end do @@ -1546,6 +1593,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n if (x(i) /= 0) then y%v(i) = 1_psb_dpk_/x(i) @@ -1580,6 +1628,7 @@ contains if (z%is_dev()) call z%sync() n = size(x) + !$omp parallel do private(i) do i = 1, n, 1 if ( abs(x(i)).ge.c ) then z%v(i) = 1_psb_dpk_ @@ -1625,14 +1674,21 @@ contains implicit none class(psb_d_base_vect_type), intent(inout) :: x real(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: i if (allocated(x%v)) then +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1,size(x%v) + x%v(i) = alpha*x%v(i) + end do +#else x%v = alpha*x%v - call x%set_host() +#endif end if - + call x%set_host() end subroutine d_base_scal - + ! ! Norms 1, 2 and infinity ! @@ -1662,10 +1718,18 @@ contains class(psb_d_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = dzero + !$omp parallel do private(i) reduction(max: res) + do i=1, n + res = max(res,abs(x%v(i))) + end do +#else res = maxval(abs(x%v(1:n))) - +#endif end function d_base_amax ! @@ -1678,10 +1742,18 @@ contains class(psb_d_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = HUGE(done) + !$omp parallel do private(i) reduction(min: res) + do i=1, n + res = min(res,abs(x%v(i))) + end do +#else res = minval(x%v(1:n)) - +#endif end function d_base_min ! @@ -1730,10 +1802,11 @@ contains z = huge(z) n = min(size(y), size(x%v)) + !$omp parallel do private(i,temp) reduction(min: z) do i=1, n if ( y(i) /= dzero ) then temp = x%v(i)/y(i) - if (temp <= z) z = temp + z = min(z,temp) end if end do @@ -1750,10 +1823,18 @@ contains class(psb_d_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res - + integer(psb_ipk_) :: i + if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res=dzero + !$omp parallel do private(i) reduction(+: res) + do i= 1, size(x%v) + res = res + abs(x%v(i)) + end do +#else res = sum(abs(x%v(1:n))) - +#endif end function d_base_asum @@ -2052,13 +2133,17 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() - +#if defined(OPENMP) n = size(x) - do i = 1, n, 1 - z%v(i) = x(i) + b + !$omp parallel do private(i) + do i = 1, n + z%v(i) = x(i) + b end do +#else + z%v = x + b +#endif info = 0 - + end subroutine d_base_addconst_a2 ! !> Function _base_addconst_v2 @@ -2084,9 +2169,6 @@ contains end module psb_d_base_vect_mod - - - module psb_d_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_i_base_vect_mod.F90 b/base/modules/serial/psb_i_base_vect_mod.F90 index 55d7b47e..0289ecd0 100644 --- a/base/modules/serial/psb_i_base_vect_mod.F90 +++ b/base/modules/serial/psb_i_base_vect_mod.F90 @@ -202,14 +202,21 @@ contains integer(psb_ipk_), intent(in) :: this(:) class(psb_i_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine i_base_bld_x ! @@ -339,7 +346,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -719,7 +725,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -730,7 +736,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function i_base_get_vect ! @@ -748,7 +762,7 @@ contains integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -756,7 +770,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine i_base_set_scal @@ -774,19 +795,27 @@ contains integer(psb_ipk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine i_base_set_vect @@ -980,9 +1009,6 @@ contains end module psb_i_base_vect_mod - - - module psb_i_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_l_base_vect_mod.F90 b/base/modules/serial/psb_l_base_vect_mod.F90 index 53b45f2a..d8654f63 100644 --- a/base/modules/serial/psb_l_base_vect_mod.F90 +++ b/base/modules/serial/psb_l_base_vect_mod.F90 @@ -203,14 +203,21 @@ contains integer(psb_lpk_), intent(in) :: this(:) class(psb_l_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine l_base_bld_x ! @@ -340,7 +347,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -720,7 +726,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -731,7 +737,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function l_base_get_vect ! @@ -749,7 +763,7 @@ contains integer(psb_lpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -757,7 +771,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine l_base_set_scal @@ -775,19 +796,27 @@ contains integer(psb_lpk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine l_base_set_vect @@ -981,9 +1010,6 @@ contains end module psb_l_base_vect_mod - - - module psb_l_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index c185e341..231b1dc7 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -273,14 +273,21 @@ contains real(psb_spk_), intent(in) :: this(:) class(psb_s_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine s_base_bld_x ! @@ -410,7 +417,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -790,7 +796,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -801,7 +807,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function s_base_get_vect ! @@ -819,7 +833,7 @@ contains real(psb_spk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -827,7 +841,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine s_base_set_scal @@ -845,19 +866,27 @@ contains real(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine s_base_set_vect @@ -895,9 +924,18 @@ contains implicit none class(psb_s_base_vect_type), intent(inout) :: x + integer(psb_ipk_) :: i + if (allocated(x%v)) then if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1, size(x%v) + x%v(i) = abs(x%v(i)) + end do +#else x%v = abs(x%v) +#endif call x%set_host() end if @@ -1139,6 +1177,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1176,6 +1215,7 @@ contains if (beta == sone) then return else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1183,42 +1223,51 @@ contains else if (alpha == sone) then if (beta == szero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == sone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -sone) then if (beta == szero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == sone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == szero) then + !$omp parallel do private(i) do i=1, n z%v(i) = alpha*y(i)*x(i) end do else if (beta == sone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1321,7 +1370,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info) - end subroutine s_base_div_v ! !> Function base_div_v2 @@ -1365,7 +1413,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info,flag) - end subroutine s_base_div_v_check ! !> Function base_div_v2_check @@ -1388,7 +1435,6 @@ contains if (z%is_dev()) call z%sync() call z%div(x%v,y%v,info,flag) - end subroutine s_base_div_v2_check ! !> Function base_div_a2 @@ -1410,6 +1456,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + !$omp parallel do private(i) do i=1, n z%v(i) = x(i)/y(i) end do @@ -1440,6 +1487,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + ! $omp parallel do private(i) do i=1, n if (y(i) /= 0) then z%v(i) = x(i)/y(i) @@ -1450,7 +1498,6 @@ contains end do end if - end subroutine s_base_div_a2_check ! !> Function base_inv_v @@ -1494,7 +1541,6 @@ contains if (y%is_dev()) call y%sync() call y%inv(x%v,info,flag) - end subroutine s_base_inv_v_check ! !> Function base_inv_a2 @@ -1516,6 +1562,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n y%v(i) = 1_psb_spk_/x(i) end do @@ -1546,6 +1593,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n if (x(i) /= 0) then y%v(i) = 1_psb_spk_/x(i) @@ -1580,6 +1628,7 @@ contains if (z%is_dev()) call z%sync() n = size(x) + !$omp parallel do private(i) do i = 1, n, 1 if ( abs(x(i)).ge.c ) then z%v(i) = 1_psb_spk_ @@ -1625,14 +1674,21 @@ contains implicit none class(psb_s_base_vect_type), intent(inout) :: x real(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: i if (allocated(x%v)) then +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1,size(x%v) + x%v(i) = alpha*x%v(i) + end do +#else x%v = alpha*x%v - call x%set_host() +#endif end if - + call x%set_host() end subroutine s_base_scal - + ! ! Norms 1, 2 and infinity ! @@ -1662,10 +1718,18 @@ contains class(psb_s_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = szero + !$omp parallel do private(i) reduction(max: res) + do i=1, n + res = max(res,abs(x%v(i))) + end do +#else res = maxval(abs(x%v(1:n))) - +#endif end function s_base_amax ! @@ -1678,10 +1742,18 @@ contains class(psb_s_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = HUGE(sone) + !$omp parallel do private(i) reduction(min: res) + do i=1, n + res = min(res,abs(x%v(i))) + end do +#else res = minval(x%v(1:n)) - +#endif end function s_base_min ! @@ -1730,10 +1802,11 @@ contains z = huge(z) n = min(size(y), size(x%v)) + !$omp parallel do private(i,temp) reduction(min: z) do i=1, n if ( y(i) /= szero ) then temp = x%v(i)/y(i) - if (temp <= z) z = temp + z = min(z,temp) end if end do @@ -1750,10 +1823,18 @@ contains class(psb_s_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res - + integer(psb_ipk_) :: i + if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res=szero + !$omp parallel do private(i) reduction(+: res) + do i= 1, size(x%v) + res = res + abs(x%v(i)) + end do +#else res = sum(abs(x%v(1:n))) - +#endif end function s_base_asum @@ -2052,13 +2133,17 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() - +#if defined(OPENMP) n = size(x) - do i = 1, n, 1 - z%v(i) = x(i) + b + !$omp parallel do private(i) + do i = 1, n + z%v(i) = x(i) + b end do +#else + z%v = x + b +#endif info = 0 - + end subroutine s_base_addconst_a2 ! !> Function _base_addconst_v2 @@ -2084,9 +2169,6 @@ contains end module psb_s_base_vect_mod - - - module psb_s_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 1daed233..08cfb840 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -266,14 +266,21 @@ contains complex(psb_dpk_), intent(in) :: this(:) class(psb_z_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine z_base_bld_x ! @@ -403,7 +410,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -783,7 +789,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -794,7 +800,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function z_base_get_vect ! @@ -812,7 +826,7 @@ contains complex(psb_dpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -820,7 +834,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine z_base_set_scal @@ -838,19 +859,27 @@ contains complex(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine z_base_set_vect @@ -888,9 +917,18 @@ contains implicit none class(psb_z_base_vect_type), intent(inout) :: x + integer(psb_ipk_) :: i + if (allocated(x%v)) then if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1, size(x%v) + x%v(i) = abs(x%v(i)) + end do +#else x%v = abs(x%v) +#endif call x%set_host() end if @@ -1132,6 +1170,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1169,6 +1208,7 @@ contains if (beta == zone) then return else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1176,42 +1216,51 @@ contains else if (alpha == zone) then if (beta == zzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == zone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -zone) then if (beta == zzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == zone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == zzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = alpha*y(i)*x(i) end do else if (beta == zone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1314,7 +1363,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info) - end subroutine z_base_div_v ! !> Function base_div_v2 @@ -1358,7 +1406,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info,flag) - end subroutine z_base_div_v_check ! !> Function base_div_v2_check @@ -1381,7 +1428,6 @@ contains if (z%is_dev()) call z%sync() call z%div(x%v,y%v,info,flag) - end subroutine z_base_div_v2_check ! !> Function base_div_a2 @@ -1403,6 +1449,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + !$omp parallel do private(i) do i=1, n z%v(i) = x(i)/y(i) end do @@ -1433,6 +1480,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + ! $omp parallel do private(i) do i=1, n if (y(i) /= 0) then z%v(i) = x(i)/y(i) @@ -1443,7 +1491,6 @@ contains end do end if - end subroutine z_base_div_a2_check ! !> Function base_inv_v @@ -1487,7 +1534,6 @@ contains if (y%is_dev()) call y%sync() call y%inv(x%v,info,flag) - end subroutine z_base_inv_v_check ! !> Function base_inv_a2 @@ -1509,6 +1555,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n y%v(i) = 1_psb_dpk_/x(i) end do @@ -1539,6 +1586,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n if (x(i) /= 0) then y%v(i) = 1_psb_dpk_/x(i) @@ -1573,6 +1621,7 @@ contains if (z%is_dev()) call z%sync() n = size(x) + !$omp parallel do private(i) do i = 1, n, 1 if ( abs(x(i)).ge.c ) then z%v(i) = 1_psb_dpk_ @@ -1618,14 +1667,21 @@ contains implicit none class(psb_z_base_vect_type), intent(inout) :: x complex(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: i if (allocated(x%v)) then +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1,size(x%v) + x%v(i) = alpha*x%v(i) + end do +#else x%v = alpha*x%v - call x%set_host() +#endif end if - + call x%set_host() end subroutine z_base_scal - + ! ! Norms 1, 2 and infinity ! @@ -1655,10 +1711,18 @@ contains class(psb_z_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = dzero + !$omp parallel do private(i) reduction(max: res) + do i=1, n + res = max(res,abs(x%v(i))) + end do +#else res = maxval(abs(x%v(1:n))) - +#endif end function z_base_amax @@ -1672,10 +1736,18 @@ contains class(psb_z_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res - + integer(psb_ipk_) :: i + if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res=dzero + !$omp parallel do private(i) reduction(+: res) + do i= 1, size(x%v) + res = res + abs(x%v(i)) + end do +#else res = sum(abs(x%v(1:n))) - +#endif end function z_base_asum @@ -1882,13 +1954,17 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() - +#if defined(OPENMP) n = size(x) - do i = 1, n, 1 - z%v(i) = x(i) + b + !$omp parallel do private(i) + do i = 1, n + z%v(i) = x(i) + b end do +#else + z%v = x + b +#endif info = 0 - + end subroutine z_base_addconst_a2 ! !> Function _base_addconst_v2 @@ -1914,9 +1990,6 @@ contains end module psb_z_base_vect_mod - - - module psb_z_base_multivect_mod use psb_const_mod