From 8e7679dc025a67e7ed9a0c380944de18a7b04bcb Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 9 Jul 2019 13:36:38 +0100 Subject: [PATCH] Initial version with OpenMP directives. --- base/modules/serial/psb_c_base_vect_mod.f90 | 27 ++++++++++++++----- base/modules/serial/psb_d_base_vect_mod.f90 | 27 ++++++++++++++----- base/modules/serial/psb_s_base_vect_mod.f90 | 27 ++++++++++++++----- base/modules/serial/psb_z_base_vect_mod.f90 | 27 ++++++++++++++----- base/serial/impl/psb_c_csr_impl.f90 | 30 ++++++++++++++------- base/serial/impl/psb_d_csr_impl.f90 | 30 ++++++++++++++------- base/serial/impl/psb_s_csr_impl.f90 | 30 ++++++++++++++------- base/serial/impl/psb_z_csr_impl.f90 | 30 ++++++++++++++------- base/serial/psi_c_serial_impl.f90 | 15 +++++++++++ base/serial/psi_d_serial_impl.f90 | 15 +++++++++++ base/serial/psi_i_serial_impl.f90 | 15 +++++++++++ base/serial/psi_s_serial_impl.f90 | 15 +++++++++++ base/serial/psi_z_serial_impl.f90 | 15 +++++++++++ prec/impl/psb_c_diagprec_impl.f90 | 1 + prec/impl/psb_d_diagprec_impl.f90 | 1 + prec/impl/psb_s_diagprec_impl.f90 | 1 + prec/impl/psb_z_diagprec_impl.f90 | 1 + 17 files changed, 243 insertions(+), 64 deletions(-) diff --git a/base/modules/serial/psb_c_base_vect_mod.f90 b/base/modules/serial/psb_c_base_vect_mod.f90 index ec9d1d99..2571ce63 100644 --- a/base/modules/serial/psb_c_base_vect_mod.f90 +++ b/base/modules/serial/psb_c_base_vect_mod.f90 @@ -964,6 +964,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1001,6 +1002,7 @@ contains if (beta == cone) then return else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1008,42 +1010,51 @@ contains else if (alpha == cone) then if (beta == czero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == cone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do - else + else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -cone) then if (beta == czero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == cone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == czero) then + !$omp parallel do private(i) shared(alpha) do i=1, n z%v(i) = alpha*y(i)*x(i) end do - else if (beta == cone) then + else if (beta == cone) then + !$omp parallel do private(i) shared(alpha) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do - else + else + !$omp parallel do private(i) shared(alpha,beta) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1143,8 +1154,10 @@ contains class(psb_c_base_vect_type), intent(inout) :: x complex(psb_spk_), intent (in) :: alpha - if (allocated(x%v)) then + if (allocated(x%v)) then + !$OMP parallel workshare x%v = alpha*x%v + !$OMP end parallel workshare call x%set_host() end if @@ -1181,8 +1194,9 @@ contains real(psb_spk_) :: res if (x%is_dev()) call x%sync() + !$OMP parallel workshare res = maxval(abs(x%v(1:n))) - + !$OMP end parallel workshare end function c_base_amax ! @@ -1197,8 +1211,9 @@ contains real(psb_spk_) :: res if (x%is_dev()) call x%sync() + !$OMP parallel workshare res = sum(abs(x%v(1:n))) - + !$OMP end parallel workshare end function c_base_asum diff --git a/base/modules/serial/psb_d_base_vect_mod.f90 b/base/modules/serial/psb_d_base_vect_mod.f90 index 3413a6da..1e724d13 100644 --- a/base/modules/serial/psb_d_base_vect_mod.f90 +++ b/base/modules/serial/psb_d_base_vect_mod.f90 @@ -964,6 +964,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1001,6 +1002,7 @@ contains if (beta == done) then return else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1008,42 +1010,51 @@ contains else if (alpha == done) then if (beta == dzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do - else + else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -done) then if (beta == dzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == dzero) then + !$omp parallel do private(i) shared(alpha) do i=1, n z%v(i) = alpha*y(i)*x(i) end do - else if (beta == done) then + else if (beta == done) then + !$omp parallel do private(i) shared(alpha) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do - else + else + !$omp parallel do private(i) shared(alpha,beta) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1143,8 +1154,10 @@ contains class(psb_d_base_vect_type), intent(inout) :: x real(psb_dpk_), intent (in) :: alpha - if (allocated(x%v)) then + if (allocated(x%v)) then + !$OMP parallel workshare x%v = alpha*x%v + !$OMP end parallel workshare call x%set_host() end if @@ -1181,8 +1194,9 @@ contains real(psb_dpk_) :: res if (x%is_dev()) call x%sync() + !$OMP parallel workshare res = maxval(abs(x%v(1:n))) - + !$OMP end parallel workshare end function d_base_amax ! @@ -1197,8 +1211,9 @@ contains real(psb_dpk_) :: res if (x%is_dev()) call x%sync() + !$OMP parallel workshare res = sum(abs(x%v(1:n))) - + !$OMP end parallel workshare end function d_base_asum diff --git a/base/modules/serial/psb_s_base_vect_mod.f90 b/base/modules/serial/psb_s_base_vect_mod.f90 index 1bbc1750..26c8b7a6 100644 --- a/base/modules/serial/psb_s_base_vect_mod.f90 +++ b/base/modules/serial/psb_s_base_vect_mod.f90 @@ -964,6 +964,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1001,6 +1002,7 @@ contains if (beta == sone) then return else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1008,42 +1010,51 @@ contains else if (alpha == sone) then if (beta == szero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == sone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do - else + else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -sone) then if (beta == szero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == sone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == szero) then + !$omp parallel do private(i) shared(alpha) do i=1, n z%v(i) = alpha*y(i)*x(i) end do - else if (beta == sone) then + else if (beta == sone) then + !$omp parallel do private(i) shared(alpha) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do - else + else + !$omp parallel do private(i) shared(alpha,beta) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1143,8 +1154,10 @@ contains class(psb_s_base_vect_type), intent(inout) :: x real(psb_spk_), intent (in) :: alpha - if (allocated(x%v)) then + if (allocated(x%v)) then + !$OMP parallel workshare x%v = alpha*x%v + !$OMP end parallel workshare call x%set_host() end if @@ -1181,8 +1194,9 @@ contains real(psb_spk_) :: res if (x%is_dev()) call x%sync() + !$OMP parallel workshare res = maxval(abs(x%v(1:n))) - + !$OMP end parallel workshare end function s_base_amax ! @@ -1197,8 +1211,9 @@ contains real(psb_spk_) :: res if (x%is_dev()) call x%sync() + !$OMP parallel workshare res = sum(abs(x%v(1:n))) - + !$OMP end parallel workshare end function s_base_asum diff --git a/base/modules/serial/psb_z_base_vect_mod.f90 b/base/modules/serial/psb_z_base_vect_mod.f90 index 479e7cd9..fbe6b6db 100644 --- a/base/modules/serial/psb_z_base_vect_mod.f90 +++ b/base/modules/serial/psb_z_base_vect_mod.f90 @@ -964,6 +964,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1001,6 +1002,7 @@ contains if (beta == zone) then return else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1008,42 +1010,51 @@ contains else if (alpha == zone) then if (beta == zzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == zone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do - else + else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -zone) then if (beta == zzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == zone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) shared(beta) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == zzero) then + !$omp parallel do private(i) shared(alpha) do i=1, n z%v(i) = alpha*y(i)*x(i) end do - else if (beta == zone) then + else if (beta == zone) then + !$omp parallel do private(i) shared(alpha) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do - else + else + !$omp parallel do private(i) shared(alpha,beta) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1143,8 +1154,10 @@ contains class(psb_z_base_vect_type), intent(inout) :: x complex(psb_dpk_), intent (in) :: alpha - if (allocated(x%v)) then + if (allocated(x%v)) then + !$OMP parallel workshare x%v = alpha*x%v + !$OMP end parallel workshare call x%set_host() end if @@ -1181,8 +1194,9 @@ contains real(psb_dpk_) :: res if (x%is_dev()) call x%sync() + !$OMP parallel workshare res = maxval(abs(x%v(1:n))) - + !$OMP end parallel workshare end function z_base_amax ! @@ -1197,8 +1211,9 @@ contains real(psb_dpk_) :: res if (x%is_dev()) call x%sync() + !$OMP parallel workshare res = sum(abs(x%v(1:n))) - + !$OMP end parallel workshare end function z_base_asum diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index e248c5e1..4afe8289 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -131,10 +131,12 @@ contains if (alpha == czero) then if (beta == czero) then + !$omp parallel do private(i) do i = 1, m y(i) = czero enddo else + !$omp parallel do private(i) do i = 1, m y(i) = beta*y(i) end do @@ -148,6 +150,7 @@ contains if (beta == czero) then if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -158,6 +161,7 @@ contains else if (alpha == -cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -168,6 +172,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -181,7 +186,8 @@ contains else if (beta == cone) then - if (alpha == cone) then + if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -192,7 +198,8 @@ contains else if (alpha == -cone) then - do i=1,m + !$omp parallel do private(i,j,acc) + do i=1,m acc = czero do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) @@ -200,8 +207,9 @@ contains y(i) = y(i) -acc end do - else - + else + + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -214,7 +222,8 @@ contains else if (beta == -cone) then - if (alpha == cone) then + if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -224,7 +233,7 @@ contains end do else if (alpha == -cone) then - + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -234,7 +243,7 @@ contains end do else - + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -247,7 +256,8 @@ contains else - if (alpha == cone) then + if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -257,7 +267,7 @@ contains end do else if (alpha == -cone) then - + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -267,7 +277,7 @@ contains end do else - + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index a6fb0b18..7e653352 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -131,10 +131,12 @@ contains if (alpha == dzero) then if (beta == dzero) then + !$omp parallel do private(i) do i = 1, m y(i) = dzero enddo else + !$omp parallel do private(i) do i = 1, m y(i) = beta*y(i) end do @@ -148,6 +150,7 @@ contains if (beta == dzero) then if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -158,6 +161,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -168,6 +172,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -181,7 +186,8 @@ contains else if (beta == done) then - if (alpha == done) then + if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -192,7 +198,8 @@ contains else if (alpha == -done) then - do i=1,m + !$omp parallel do private(i,j,acc) + do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) @@ -200,8 +207,9 @@ contains y(i) = y(i) -acc end do - else - + else + + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -214,7 +222,8 @@ contains else if (beta == -done) then - if (alpha == done) then + if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -224,7 +233,7 @@ contains end do else if (alpha == -done) then - + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -234,7 +243,7 @@ contains end do else - + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -247,7 +256,8 @@ contains else - if (alpha == done) then + if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -257,7 +267,7 @@ contains end do else if (alpha == -done) then - + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -267,7 +277,7 @@ contains end do else - + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index a5bca393..b583ac82 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -131,10 +131,12 @@ contains if (alpha == szero) then if (beta == szero) then + !$omp parallel do private(i) do i = 1, m y(i) = szero enddo else + !$omp parallel do private(i) do i = 1, m y(i) = beta*y(i) end do @@ -148,6 +150,7 @@ contains if (beta == szero) then if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -158,6 +161,7 @@ contains else if (alpha == -sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -168,6 +172,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -181,7 +186,8 @@ contains else if (beta == sone) then - if (alpha == sone) then + if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -192,7 +198,8 @@ contains else if (alpha == -sone) then - do i=1,m + !$omp parallel do private(i,j,acc) + do i=1,m acc = szero do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) @@ -200,8 +207,9 @@ contains y(i) = y(i) -acc end do - else - + else + + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -214,7 +222,8 @@ contains else if (beta == -sone) then - if (alpha == sone) then + if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -224,7 +233,7 @@ contains end do else if (alpha == -sone) then - + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -234,7 +243,7 @@ contains end do else - + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -247,7 +256,8 @@ contains else - if (alpha == sone) then + if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -257,7 +267,7 @@ contains end do else if (alpha == -sone) then - + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -267,7 +277,7 @@ contains end do else - + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index b258eaac..820e1105 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -131,10 +131,12 @@ contains if (alpha == zzero) then if (beta == zzero) then + !$omp parallel do private(i) do i = 1, m y(i) = zzero enddo else + !$omp parallel do private(i) do i = 1, m y(i) = beta*y(i) end do @@ -148,6 +150,7 @@ contains if (beta == zzero) then if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -158,6 +161,7 @@ contains else if (alpha == -zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -168,6 +172,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -181,7 +186,8 @@ contains else if (beta == zone) then - if (alpha == zone) then + if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -192,7 +198,8 @@ contains else if (alpha == -zone) then - do i=1,m + !$omp parallel do private(i,j,acc) + do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) @@ -200,8 +207,9 @@ contains y(i) = y(i) -acc end do - else - + else + + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -214,7 +222,8 @@ contains else if (beta == -zone) then - if (alpha == zone) then + if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -224,7 +233,7 @@ contains end do else if (alpha == -zone) then - + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -234,7 +243,7 @@ contains end do else - + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -247,7 +256,8 @@ contains else - if (alpha == zone) then + if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -257,7 +267,7 @@ contains end do else if (alpha == -zone) then - + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -267,7 +277,7 @@ contains end do else - + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 diff --git a/base/serial/psi_c_serial_impl.f90 b/base/serial/psi_c_serial_impl.f90 index 4794c38e..3aac47ab 100644 --- a/base/serial/psi_c_serial_impl.f90 +++ b/base/serial/psi_c_serial_impl.f90 @@ -476,6 +476,7 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (alpha.eq.czero) then if (beta.eq.czero) then do j=1, n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = czero enddo @@ -487,12 +488,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = beta*y(i,j) enddo @@ -503,12 +506,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.czero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) enddo enddo else if (beta.eq.cone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) + y(i,j) enddo @@ -516,12 +521,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) + beta*y(i,j) enddo @@ -532,12 +539,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.czero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) enddo enddo else if (beta.eq.cone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) + y(i,j) enddo @@ -545,12 +554,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -561,12 +572,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.czero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.cone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -574,12 +587,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) + beta*y(i,j) enddo diff --git a/base/serial/psi_d_serial_impl.f90 b/base/serial/psi_d_serial_impl.f90 index 71f62cd5..b3d99e9b 100644 --- a/base/serial/psi_d_serial_impl.f90 +++ b/base/serial/psi_d_serial_impl.f90 @@ -476,6 +476,7 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (alpha.eq.dzero) then if (beta.eq.dzero) then do j=1, n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = dzero enddo @@ -487,12 +488,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = beta*y(i,j) enddo @@ -503,12 +506,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.dzero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) + y(i,j) enddo @@ -516,12 +521,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) + beta*y(i,j) enddo @@ -532,12 +539,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.dzero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) + y(i,j) enddo @@ -545,12 +554,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -561,12 +572,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.dzero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -574,12 +587,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) + beta*y(i,j) enddo diff --git a/base/serial/psi_i_serial_impl.f90 b/base/serial/psi_i_serial_impl.f90 index 90d2cdea..6b35466b 100644 --- a/base/serial/psi_i_serial_impl.f90 +++ b/base/serial/psi_i_serial_impl.f90 @@ -476,6 +476,7 @@ subroutine iaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (alpha.eq.izero) then if (beta.eq.izero) then do j=1, n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = izero enddo @@ -487,12 +488,14 @@ subroutine iaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-ione) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = beta*y(i,j) enddo @@ -503,12 +506,14 @@ subroutine iaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.izero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) enddo enddo else if (beta.eq.ione) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) + y(i,j) enddo @@ -516,12 +521,14 @@ subroutine iaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-ione) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) + beta*y(i,j) enddo @@ -532,12 +539,14 @@ subroutine iaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.izero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) enddo enddo else if (beta.eq.ione) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) + y(i,j) enddo @@ -545,12 +554,14 @@ subroutine iaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-ione) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -561,12 +572,14 @@ subroutine iaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.izero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.ione) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -574,12 +587,14 @@ subroutine iaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-ione) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) + beta*y(i,j) enddo diff --git a/base/serial/psi_s_serial_impl.f90 b/base/serial/psi_s_serial_impl.f90 index cba56128..a8de2e11 100644 --- a/base/serial/psi_s_serial_impl.f90 +++ b/base/serial/psi_s_serial_impl.f90 @@ -476,6 +476,7 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (alpha.eq.szero) then if (beta.eq.szero) then do j=1, n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = szero enddo @@ -487,12 +488,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = beta*y(i,j) enddo @@ -503,12 +506,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.szero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) enddo enddo else if (beta.eq.sone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) + y(i,j) enddo @@ -516,12 +521,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) + beta*y(i,j) enddo @@ -532,12 +539,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.szero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) enddo enddo else if (beta.eq.sone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) + y(i,j) enddo @@ -545,12 +554,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -561,12 +572,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.szero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.sone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -574,12 +587,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) + beta*y(i,j) enddo diff --git a/base/serial/psi_z_serial_impl.f90 b/base/serial/psi_z_serial_impl.f90 index 9444f6c5..68bc8c6b 100644 --- a/base/serial/psi_z_serial_impl.f90 +++ b/base/serial/psi_z_serial_impl.f90 @@ -476,6 +476,7 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (alpha.eq.zzero) then if (beta.eq.zzero) then do j=1, n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = zzero enddo @@ -487,12 +488,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = beta*y(i,j) enddo @@ -503,12 +506,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.zzero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) enddo enddo else if (beta.eq.zone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) + y(i,j) enddo @@ -516,12 +521,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = x(i,j) + beta*y(i,j) enddo @@ -532,12 +539,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.zzero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) enddo enddo else if (beta.eq.zone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) + y(i,j) enddo @@ -545,12 +554,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -561,12 +572,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.zzero) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.zone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -574,12 +587,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) shared(j) do i=1,m y(i,j) = alpha*x(i,j) + beta*y(i,j) enddo diff --git a/prec/impl/psb_c_diagprec_impl.f90 b/prec/impl/psb_c_diagprec_impl.f90 index f31aae1e..45ac7e69 100644 --- a/prec/impl/psb_c_diagprec_impl.f90 +++ b/prec/impl/psb_c_diagprec_impl.f90 @@ -255,6 +255,7 @@ subroutine psb_c_diag_precbld(a,desc_a,prec,info,amold,vmold,imold) end if call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=cone) + !$omp parallel do private(i) do i=1,nrow if (prec%d(i) == dzero) then prec%d(i) = cone diff --git a/prec/impl/psb_d_diagprec_impl.f90 b/prec/impl/psb_d_diagprec_impl.f90 index 5e0175a2..cc340f75 100644 --- a/prec/impl/psb_d_diagprec_impl.f90 +++ b/prec/impl/psb_d_diagprec_impl.f90 @@ -255,6 +255,7 @@ subroutine psb_d_diag_precbld(a,desc_a,prec,info,amold,vmold,imold) end if call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=done) + !$omp parallel do private(i) do i=1,nrow if (prec%d(i) == dzero) then prec%d(i) = done diff --git a/prec/impl/psb_s_diagprec_impl.f90 b/prec/impl/psb_s_diagprec_impl.f90 index 79ba27cc..928d33ab 100644 --- a/prec/impl/psb_s_diagprec_impl.f90 +++ b/prec/impl/psb_s_diagprec_impl.f90 @@ -255,6 +255,7 @@ subroutine psb_s_diag_precbld(a,desc_a,prec,info,amold,vmold,imold) end if call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=sone) + !$omp parallel do private(i) do i=1,nrow if (prec%d(i) == dzero) then prec%d(i) = sone diff --git a/prec/impl/psb_z_diagprec_impl.f90 b/prec/impl/psb_z_diagprec_impl.f90 index 24e288f5..1f54cf6a 100644 --- a/prec/impl/psb_z_diagprec_impl.f90 +++ b/prec/impl/psb_z_diagprec_impl.f90 @@ -255,6 +255,7 @@ subroutine psb_z_diag_precbld(a,desc_a,prec,info,amold,vmold,imold) end if call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=zone) + !$omp parallel do private(i) do i=1,nrow if (prec%d(i) == dzero) then prec%d(i) = zone