From 8459ea28f5359e55b0e51db837dd34c5fdee5873 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 14 Apr 2023 18:25:43 +0200 Subject: [PATCH] Modified matrix build procedures with OpenMP --- base/serial/impl/psb_c_coo_impl.F90 | 33 ++++++++++++++++-------- base/serial/impl/psb_c_csr_impl.F90 | 29 ++++++++++----------- base/serial/impl/psb_d_coo_impl.F90 | 13 +++++----- base/serial/impl/psb_d_csr_impl.F90 | 29 ++++++++++----------- base/serial/impl/psb_s_coo_impl.F90 | 33 ++++++++++++++++-------- base/serial/impl/psb_s_csr_impl.F90 | 29 ++++++++++----------- base/serial/impl/psb_z_coo_impl.F90 | 33 ++++++++++++++++-------- base/serial/impl/psb_z_csr_impl.F90 | 29 ++++++++++----------- base/tools/psb_cspins.F90 | 39 ++++++++++++++++++++++------- base/tools/psb_dspins.F90 | 18 ++++++------- base/tools/psb_sspins.F90 | 39 ++++++++++++++++++++++------- base/tools/psb_zspins.F90 | 39 ++++++++++++++++++++++------- test/omp/psb_tomp.F90 | 18 +++++++++---- test/pargen/psb_d_pde3d.F90 | 1 - 14 files changed, 236 insertions(+), 146 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 830b7400..c9be113e 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -2818,6 +2818,9 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_realloc_mod use psb_sort_mod use psb_c_base_mat_mod, psb_protect_name => psb_c_coo_csput_a +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_c_coo_sparse_mat), intent(inout) :: a @@ -2829,7 +2832,7 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act character(len=20) :: name='c_coo_csput_a_impl' logical, parameter :: debug=.false. - integer(psb_ipk_) :: nza, i,j,k, nzl, isza, debug_level, debug_unit + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, nzaold, debug_level, debug_unit info = psb_success_ debug_unit = psb_get_debug_unit() @@ -2861,10 +2864,11 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz == 0) return - - nza = a%get_nzeros() - isza = a%get_size() if (a%is_bld()) then + + !$omp critical + nza = a%get_nzeros() + isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then call a%reallocate(max(nza+nz,int(1.5*isza))) @@ -2872,16 +2876,23 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) isza = a%get_size() if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) - goto 9999 + else + nzaold = nza + nza = nza + nz + call a%set_nzeros(nza) +#if defined(OPENMP) + !write(0,*) 'From thread ',omp_get_thread_num(),nzaold,nz,nza,a%get_nzeros() +#endif end if - - call psb_inner_ins(nz,ia,ja,val,nza,a%ia,a%ja,a%val,isza,& + !$omp end critical + if (info /= 0) goto 9999 + call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) - call a%set_nzeros(nza) call a%set_sorted(.false.) - - + else if (a%is_upd()) then + nza = a%get_nzeros() + isza = a%get_size() if (a%is_dev()) call a%sync() @@ -2951,7 +2962,7 @@ contains end do !$OMP END PARALLEL DO - nza = nza + nz + !nza = nza + nz #else do i=1, nz ir = ia(i) diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index a7869136..fc56e9d8 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -2997,22 +2997,21 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip #endif call a%set_host() - - + end subroutine psb_c_cp_csr_from_coo @@ -3128,7 +3127,6 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' - logical :: use_openmp = .false. #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) @@ -3229,7 +3227,6 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index f4f4f0dc..bb845f4b 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -2832,7 +2832,7 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act character(len=20) :: name='d_coo_csput_a_impl' logical, parameter :: debug=.false. - integer(psb_ipk_) :: nza, i,j,k, nzl, isza, debug_level, debug_unit, nzaold + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, nzaold, debug_level, debug_unit info = psb_success_ debug_unit = psb_get_debug_unit() @@ -2864,7 +2864,6 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz == 0) return - if (a%is_bld()) then !$omp critical @@ -2890,7 +2889,7 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) call a%set_sorted(.false.) - + else if (a%is_upd()) then nza = a%get_nzeros() isza = a%get_size() @@ -2945,9 +2944,9 @@ contains ! the serial version: each element is stored in data ! structures but the invalid ones are stored as '-1' values. ! These values will be filtered in a future fixing process. - ! $ OMP PARALLEL DO default(none) schedule(STATIC) & - ! $ OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & - ! $ OMP private(ir,ic,i) + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & + !$OMP private(ir,ic,i) do i=1,nz ir = ia(i) ic = ja(i) @@ -2961,7 +2960,7 @@ contains aspk(nza+i) = -1 end if end do - ! $OMP END PARALLEL DO + !$OMP END PARALLEL DO !nza = nza + nz #else diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index a2ddad30..1e579aaa 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -2997,22 +2997,21 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip #endif call a%set_host() - - + end subroutine psb_d_cp_csr_from_coo @@ -3128,7 +3127,6 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' - logical :: use_openmp = .false. #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) @@ -3229,7 +3227,6 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index be4218de..0b201684 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -2818,6 +2818,9 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_realloc_mod use psb_sort_mod use psb_s_base_mat_mod, psb_protect_name => psb_s_coo_csput_a +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_s_coo_sparse_mat), intent(inout) :: a @@ -2829,7 +2832,7 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act character(len=20) :: name='s_coo_csput_a_impl' logical, parameter :: debug=.false. - integer(psb_ipk_) :: nza, i,j,k, nzl, isza, debug_level, debug_unit + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, nzaold, debug_level, debug_unit info = psb_success_ debug_unit = psb_get_debug_unit() @@ -2861,10 +2864,11 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz == 0) return - - nza = a%get_nzeros() - isza = a%get_size() if (a%is_bld()) then + + !$omp critical + nza = a%get_nzeros() + isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then call a%reallocate(max(nza+nz,int(1.5*isza))) @@ -2872,16 +2876,23 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) isza = a%get_size() if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) - goto 9999 + else + nzaold = nza + nza = nza + nz + call a%set_nzeros(nza) +#if defined(OPENMP) + !write(0,*) 'From thread ',omp_get_thread_num(),nzaold,nz,nza,a%get_nzeros() +#endif end if - - call psb_inner_ins(nz,ia,ja,val,nza,a%ia,a%ja,a%val,isza,& + !$omp end critical + if (info /= 0) goto 9999 + call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) - call a%set_nzeros(nza) call a%set_sorted(.false.) - - + else if (a%is_upd()) then + nza = a%get_nzeros() + isza = a%get_size() if (a%is_dev()) call a%sync() @@ -2951,7 +2962,7 @@ contains end do !$OMP END PARALLEL DO - nza = nza + nz + !nza = nza + nz #else do i=1, nz ir = ia(i) diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index a8c67c10..4eeaaf5d 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -2997,22 +2997,21 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip #endif call a%set_host() - - + end subroutine psb_s_cp_csr_from_coo @@ -3128,7 +3127,6 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' - logical :: use_openmp = .false. #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) @@ -3229,7 +3227,6 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 4f99cb5c..14410f23 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -2818,6 +2818,9 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_realloc_mod use psb_sort_mod use psb_z_base_mat_mod, psb_protect_name => psb_z_coo_csput_a +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_z_coo_sparse_mat), intent(inout) :: a @@ -2829,7 +2832,7 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act character(len=20) :: name='z_coo_csput_a_impl' logical, parameter :: debug=.false. - integer(psb_ipk_) :: nza, i,j,k, nzl, isza, debug_level, debug_unit + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, nzaold, debug_level, debug_unit info = psb_success_ debug_unit = psb_get_debug_unit() @@ -2861,10 +2864,11 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz == 0) return - - nza = a%get_nzeros() - isza = a%get_size() if (a%is_bld()) then + + !$omp critical + nza = a%get_nzeros() + isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then call a%reallocate(max(nza+nz,int(1.5*isza))) @@ -2872,16 +2876,23 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) isza = a%get_size() if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) - goto 9999 + else + nzaold = nza + nza = nza + nz + call a%set_nzeros(nza) +#if defined(OPENMP) + !write(0,*) 'From thread ',omp_get_thread_num(),nzaold,nz,nza,a%get_nzeros() +#endif end if - - call psb_inner_ins(nz,ia,ja,val,nza,a%ia,a%ja,a%val,isza,& + !$omp end critical + if (info /= 0) goto 9999 + call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) - call a%set_nzeros(nza) call a%set_sorted(.false.) - - + else if (a%is_upd()) then + nza = a%get_nzeros() + isza = a%get_size() if (a%is_dev()) call a%sync() @@ -2951,7 +2962,7 @@ contains end do !$OMP END PARALLEL DO - nza = nza + nz + !nza = nza + nz #else do i=1, nz ir = ia(i) diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index 6344b268..3e1dacb9 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -2997,22 +2997,21 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip #endif call a%set_host() - - + end subroutine psb_z_cp_csr_from_coo @@ -3128,7 +3127,6 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' - logical :: use_openmp = .false. #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) @@ -3229,7 +3227,6 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 diff --git a/base/tools/psb_cspins.F90 b/base/tools/psb_cspins.F90 index 27cfbd8e..0a65fb5c 100644 --- a/base/tools/psb_cspins.F90 +++ b/base/tools/psb_cspins.F90 @@ -51,6 +51,9 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_cspins use psi_mod +#if defined(OPENMP) + use omp_lib +#endif implicit none !....parameters... @@ -70,7 +73,7 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) integer(psb_ipk_), parameter :: relocsz=200 logical :: rebuild_, local_ integer(psb_ipk_), allocatable :: ila(:),jla(:) - integer(psb_ipk_) :: i,k + integer(psb_ipk_) :: i,k, ith, nth integer(psb_lpk_) :: nnl integer(psb_lpk_), allocatable :: lila(:),ljla(:) complex(psb_spk_), allocatable :: lval(:) @@ -82,7 +85,13 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) ctxt = desc_a%get_context() call psb_info(ctxt, me, np) - +#if defined(OPENMP) + nth = omp_get_num_threads() + ith = omp_get_thread_num() +#else + nth = 1 + ith = 0 +#endif if (nz < 0) then info = 1111 call psb_errpush(info,name) @@ -131,15 +140,23 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if - - call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) +#if defined(OPENMP) + !$omp parallel private(ila,jla,nrow,ncol) +#endif + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) +#if defined(OPENMP) + !$omp critical(cSPINS) +#endif if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) - +#if defined(OPENMP) + !$omp end critical(cSPINS) +#endif + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& & a_err='psb_cdins',i_err=(/info/)) - goto 9999 + !goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() @@ -149,13 +166,12 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') - goto 9999 + !goto 9999 end if if (a%is_remote_build()) then nnl = count(ila(1:nz)<0) if (nnl > 0) then - !write(0,*) 'Check on insert ',nnl allocate(lila(nnl),ljla(nnl),lval(nnl)) k = 0 do i=1,nz @@ -175,8 +191,13 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) else info = psb_err_invalid_a_and_cd_state_ call psb_errpush(info,name) - goto 9999 + !goto 9999 end if + +#if defined(OPENMP) + !$omp end parallel +#endif + if (info /= 0) goto 9999 endif else if (desc_a%is_asb()) then diff --git a/base/tools/psb_dspins.F90 b/base/tools/psb_dspins.F90 index 9c4f137f..6f700bcc 100644 --- a/base/tools/psb_dspins.F90 +++ b/base/tools/psb_dspins.F90 @@ -73,7 +73,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) integer(psb_ipk_), parameter :: relocsz=200 logical :: rebuild_, local_ integer(psb_ipk_), allocatable :: ila(:),jla(:) - integer(psb_ipk_) :: i,k + integer(psb_ipk_) :: i,k, ith, nth integer(psb_lpk_) :: nnl integer(psb_lpk_), allocatable :: lila(:),ljla(:) real(psb_dpk_), allocatable :: lval(:) @@ -86,7 +86,11 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) ctxt = desc_a%get_context() call psb_info(ctxt, me, np) #if defined(OPENMP) - !write(0,*) name,omp_get_num_threads(),omp_get_thread_num() + nth = omp_get_num_threads() + ith = omp_get_thread_num() +#else + nth = 1 + ith = 0 #endif if (nz < 0) then info = 1111 @@ -139,17 +143,14 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) #if defined(OPENMP) !$omp parallel private(ila,jla,nrow,ncol) #endif - call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) #if defined(OPENMP) - !$omp critical + !$omp critical(dSPINS) #endif if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) -!!$ write(0,*) omp_get_thread_num(),'Check g2l: ',ila(1:nz),':',& -!!$ & jla(1:nz),':',ja(1:nz) #if defined(OPENMP) - !$omp end critical + !$omp end critical(dSPINS) #endif if (info /= psb_success_) then @@ -171,7 +172,6 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) if (a%is_remote_build()) then nnl = count(ila(1:nz)<0) if (nnl > 0) then - !write(0,*) 'Check on insert ',nnl allocate(lila(nnl),ljla(nnl),lval(nnl)) k = 0 do i=1,nz @@ -197,7 +197,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) #if defined(OPENMP) !$omp end parallel #endif - + if (info /= 0) goto 9999 endif else if (desc_a%is_asb()) then diff --git a/base/tools/psb_sspins.F90 b/base/tools/psb_sspins.F90 index aee7a900..1e7f9037 100644 --- a/base/tools/psb_sspins.F90 +++ b/base/tools/psb_sspins.F90 @@ -51,6 +51,9 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_sspins use psi_mod +#if defined(OPENMP) + use omp_lib +#endif implicit none !....parameters... @@ -70,7 +73,7 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) integer(psb_ipk_), parameter :: relocsz=200 logical :: rebuild_, local_ integer(psb_ipk_), allocatable :: ila(:),jla(:) - integer(psb_ipk_) :: i,k + integer(psb_ipk_) :: i,k, ith, nth integer(psb_lpk_) :: nnl integer(psb_lpk_), allocatable :: lila(:),ljla(:) real(psb_spk_), allocatable :: lval(:) @@ -82,7 +85,13 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) ctxt = desc_a%get_context() call psb_info(ctxt, me, np) - +#if defined(OPENMP) + nth = omp_get_num_threads() + ith = omp_get_thread_num() +#else + nth = 1 + ith = 0 +#endif if (nz < 0) then info = 1111 call psb_errpush(info,name) @@ -131,15 +140,23 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if - - call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) +#if defined(OPENMP) + !$omp parallel private(ila,jla,nrow,ncol) +#endif + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) +#if defined(OPENMP) + !$omp critical(sSPINS) +#endif if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) - +#if defined(OPENMP) + !$omp end critical(sSPINS) +#endif + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& & a_err='psb_cdins',i_err=(/info/)) - goto 9999 + !goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() @@ -149,13 +166,12 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') - goto 9999 + !goto 9999 end if if (a%is_remote_build()) then nnl = count(ila(1:nz)<0) if (nnl > 0) then - !write(0,*) 'Check on insert ',nnl allocate(lila(nnl),ljla(nnl),lval(nnl)) k = 0 do i=1,nz @@ -175,8 +191,13 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) else info = psb_err_invalid_a_and_cd_state_ call psb_errpush(info,name) - goto 9999 + !goto 9999 end if + +#if defined(OPENMP) + !$omp end parallel +#endif + if (info /= 0) goto 9999 endif else if (desc_a%is_asb()) then diff --git a/base/tools/psb_zspins.F90 b/base/tools/psb_zspins.F90 index abe64251..40c0783b 100644 --- a/base/tools/psb_zspins.F90 +++ b/base/tools/psb_zspins.F90 @@ -51,6 +51,9 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_zspins use psi_mod +#if defined(OPENMP) + use omp_lib +#endif implicit none !....parameters... @@ -70,7 +73,7 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) integer(psb_ipk_), parameter :: relocsz=200 logical :: rebuild_, local_ integer(psb_ipk_), allocatable :: ila(:),jla(:) - integer(psb_ipk_) :: i,k + integer(psb_ipk_) :: i,k, ith, nth integer(psb_lpk_) :: nnl integer(psb_lpk_), allocatable :: lila(:),ljla(:) complex(psb_dpk_), allocatable :: lval(:) @@ -82,7 +85,13 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) ctxt = desc_a%get_context() call psb_info(ctxt, me, np) - +#if defined(OPENMP) + nth = omp_get_num_threads() + ith = omp_get_thread_num() +#else + nth = 1 + ith = 0 +#endif if (nz < 0) then info = 1111 call psb_errpush(info,name) @@ -131,15 +140,23 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if - - call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) +#if defined(OPENMP) + !$omp parallel private(ila,jla,nrow,ncol) +#endif + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) +#if defined(OPENMP) + !$omp critical(zSPINS) +#endif if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) - +#if defined(OPENMP) + !$omp end critical(zSPINS) +#endif + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& & a_err='psb_cdins',i_err=(/info/)) - goto 9999 + !goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() @@ -149,13 +166,12 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') - goto 9999 + !goto 9999 end if if (a%is_remote_build()) then nnl = count(ila(1:nz)<0) if (nnl > 0) then - !write(0,*) 'Check on insert ',nnl allocate(lila(nnl),ljla(nnl),lval(nnl)) k = 0 do i=1,nz @@ -175,8 +191,13 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) else info = psb_err_invalid_a_and_cd_state_ call psb_errpush(info,name) - goto 9999 + !goto 9999 end if + +#if defined(OPENMP) + !$omp end parallel +#endif + if (info /= 0) goto 9999 endif else if (desc_a%is_asb()) then diff --git a/test/omp/psb_tomp.F90 b/test/omp/psb_tomp.F90 index 8681c396..7965a5f4 100644 --- a/test/omp/psb_tomp.F90 +++ b/test/omp/psb_tomp.F90 @@ -451,16 +451,24 @@ contains call psb_barrier(ctxt) t1 = psb_wtime() - !$omp parallel private(i,ii,ib,icoeff,glob_row,x,y,z,zt,ix,iy,iz) - ! shared(deltah,myidx,a,desc_a,nb) + !$omp parallel shared(deltah,myidx,a,desc_a) ! we build an auxiliary matrix consisting of one row at a ! time; just a small matrix. might be extended to generate ! a bunch of rows per call. ! block + integer(psb_ipk_) :: i,j,ii,ib,icoeff, ix,iy,iz, ith,nth + integer(psb_lpk_) :: glob_row integer(psb_lpk_), allocatable :: irow(:),icol(:) real(psb_dpk_), allocatable :: val(:) - + real(psb_dpk_) :: x,y,z, zt(nb) +#if defined(OPENMP) + nth = omp_get_num_threads() + ith = omp_get_thread_num() +#else + nth = 1 + ith = 0 +#endif allocate(val(20*nb),irow(20*nb),& &icol(20*nb),stat=info) if (info /= psb_success_ ) then @@ -473,7 +481,7 @@ contains ! loop over rows belonging to current process in a block ! distribution. - !$omp do + !$omp do schedule(dynamic,4) ! do ii=1, nlr, nb if (info /= 0) cycle @@ -723,7 +731,7 @@ program psb_d_pde3d if(psb_errstatus_fatal()) goto 9999 name='pde3d90' call psb_set_errverbosity(itwo) - call psb_cd_set_large_threshold(2000_psb_ipk_) + !call psb_cd_set_large_threshold(2000_psb_ipk_) ! ! Hello world ! diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90 index 0f9df354..d4eeccf2 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pargen/psb_d_pde3d.F90 @@ -799,7 +799,6 @@ program psb_d_pde3d if(psb_errstatus_fatal()) goto 9999 name='pde3d90' call psb_set_errverbosity(itwo) - call psb_cd_set_large_threshold(2000_psb_ipk_) ! ! Hello world !