From 30810f804e631abca803b48af205287a20f65665 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Sun, 9 Feb 2025 14:04:34 +0100 Subject: [PATCH] Additional OpenMP tweaks, to be redesigned in the future. --- base/modules/desc/psb_hash_map_mod.F90 | 16 ++++---- base/serial/impl/psb_c_coo_impl.F90 | 10 +++-- base/serial/impl/psb_d_coo_impl.F90 | 4 +- base/serial/impl/psb_s_coo_impl.F90 | 10 +++-- base/serial/impl/psb_z_coo_impl.F90 | 10 +++-- base/tools/psb_cspins.F90 | 2 +- base/tools/psb_dspins.F90 | 54 +++----------------------- base/tools/psb_sspins.F90 | 2 +- base/tools/psb_zspins.F90 | 2 +- 9 files changed, 37 insertions(+), 73 deletions(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index b16edaba..d3acc491 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -722,7 +722,7 @@ contains idx(i) = lip info = psb_success_ else - ! $ o m p critical(hash_g2l_ins) + !$omp critical(hash_g2l_ins) tlip = lip nxt = lidx(i) if (nxt <= nrow) then @@ -758,7 +758,7 @@ contains end if end if endif - ! $ o m p end critical(hash_g2l_ins) + !$omp end critical(hash_g2l_ins) end if else idx(i) = -1 @@ -785,7 +785,7 @@ contains idx(i) = lip info = psb_success_ else - ! $ o m p critical(hash_g2l_ins) + !$omp critical(hash_g2l_ins) tlip = lip nxt = lidx(i) if (nxt <= nrow) then @@ -821,7 +821,7 @@ contains end if end if endif - ! $ o m p end critical(hash_g2l_ins) + !$omp end critical(hash_g2l_ins) end if enddo ! $ o m p end parallel do @@ -848,7 +848,7 @@ contains idx(i) = lip info = psb_success_ else - ! $ o m p critical(hash_g2l_ins) + !$omp critical(hash_g2l_ins) ncol = idxmap%get_lc() nxt = ncol + 1 call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& @@ -879,7 +879,7 @@ contains isLoopValid = .false. end if end if - ! $ o m p end critical(hash_g2l_ins) + !$omp end critical(hash_g2l_ins) end if else idx(i) = -1 @@ -906,7 +906,7 @@ contains idx(i) = lip info = psb_success_ else - ! $ o m p critical(hash_g2l_ins) + !$omp critical(hash_g2l_ins) ncol = idxmap%get_lc() nxt = ncol + 1 call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& @@ -937,7 +937,7 @@ contains isLoopValid = .false. end if end if - ! $ o m p end critical(hash_g2l_ins) + !$omp end critical(hash_g2l_ins) end if enddo ! $ o m p end parallel do diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 2ef9af89..15045178 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -2947,13 +2947,15 @@ contains info = psb_success_ #if defined(OPENMP) + ! Disabling OpenMP parallel do for the time being. + ! Will need to redesign the entire code stack ! The logic here is different from the one used for ! the serial version: each element is stored in data ! structures but the invalid ones are stored as '-1' values. ! These values will be filtered in a future fixing process. - !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & - !$OMP private(ir,ic,i) + ! $ O M P PARALLEL DO default(none) schedule(STATIC) & + ! $ O M P shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & + ! $ O M P private(ir,ic,i) do i=1,nz ir = ia(i) ic = ja(i) @@ -2967,7 +2969,7 @@ contains aspk(nza+i) = -1 end if end do - !$OMP END PARALLEL DO + ! $ O M P END PARALLEL DO nza = nza + nz #else do i=1, nz diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index c1f6f692..81c99ba8 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -2892,10 +2892,8 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) #if !defined(OPENMP) - !$omp critical nza = nzaold call a%set_nzeros(nza) - !$omp end critical #endif call a%set_sorted(.false.) @@ -2949,6 +2947,8 @@ contains info = psb_success_ #if defined(OPENMP) + ! Disabling OpenMP parallel do for the time being. + ! Will need to redesign the entire code stack ! The logic here is different from the one used for ! the serial version: each element is stored in data ! structures but the invalid ones are stored as '-1' values. diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 96424a98..50dcba19 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -2947,13 +2947,15 @@ contains info = psb_success_ #if defined(OPENMP) + ! Disabling OpenMP parallel do for the time being. + ! Will need to redesign the entire code stack ! The logic here is different from the one used for ! the serial version: each element is stored in data ! structures but the invalid ones are stored as '-1' values. ! These values will be filtered in a future fixing process. - !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & - !$OMP private(ir,ic,i) + ! $ O M P PARALLEL DO default(none) schedule(STATIC) & + ! $ O M P shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & + ! $ O M P private(ir,ic,i) do i=1,nz ir = ia(i) ic = ja(i) @@ -2967,7 +2969,7 @@ contains aspk(nza+i) = -1 end if end do - !$OMP END PARALLEL DO + ! $ O M P END PARALLEL DO nza = nza + nz #else do i=1, nz diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 0895c011..089eff97 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -2947,13 +2947,15 @@ contains info = psb_success_ #if defined(OPENMP) + ! Disabling OpenMP parallel do for the time being. + ! Will need to redesign the entire code stack ! The logic here is different from the one used for ! the serial version: each element is stored in data ! structures but the invalid ones are stored as '-1' values. ! These values will be filtered in a future fixing process. - !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & - !$OMP private(ir,ic,i) + ! $ O M P PARALLEL DO default(none) schedule(STATIC) & + ! $ O M P shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & + ! $ O M P private(ir,ic,i) do i=1,nz ir = ia(i) ic = ja(i) @@ -2967,7 +2969,7 @@ contains aspk(nza+i) = -1 end if end do - !$OMP END PARALLEL DO + ! $ O M P END PARALLEL DO nza = nza + nz #else do i=1, nz diff --git a/base/tools/psb_cspins.F90 b/base/tools/psb_cspins.F90 index e5f2731d..c6442b6f 100644 --- a/base/tools/psb_cspins.F90 +++ b/base/tools/psb_cspins.F90 @@ -134,7 +134,7 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if 0 && defined(OPENMP) block logical :: is_in_parallel is_in_parallel = omp_in_parallel() diff --git a/base/tools/psb_dspins.F90 b/base/tools/psb_dspins.F90 index a3215829..23cd9211 100644 --- a/base/tools/psb_dspins.F90 +++ b/base/tools/psb_dspins.F90 @@ -78,10 +78,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) integer(psb_lpk_), allocatable :: lila(:),ljla(:) real(psb_dpk_), allocatable :: lval(:) character(len=20) :: name - logical, parameter :: do_timings=.true. - integer(psb_ipk_), save :: bph1=-1, bph2=-1, bph3=-1 - integer(psb_ipk_), save :: bph11=-1, bph12=-1, bph13=-1 - + info = psb_success_ name = 'psb_dspins' call psb_erractionsave(err_act) @@ -123,18 +120,6 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) else local_ = .false. endif - if ((do_timings).and.(bph1==-1)) & - & bph1 = psb_get_timer_idx("SPI: g2l1 ") - if ((do_timings).and.(bph2==-1)) & - & bph2 = psb_get_timer_idx("SPI: g2li1") - if ((do_timings).and.(bph3==-1)) & - & bph3 = psb_get_timer_idx("SPI: cspu1") - if ((do_timings).and.(bph11==-1)) & - & bph11 = psb_get_timer_idx("SPI: g2l2 ") - if ((do_timings).and.(bph12==-1)) & - & bph12 = psb_get_timer_idx("SPI: g2li2") - if ((do_timings).and.(bph13==-1)) & - & bph13 = psb_get_timer_idx("SPI: cspu2") if (desc_a%is_bld()) then @@ -153,18 +138,9 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) block logical :: is_in_parallel is_in_parallel = omp_in_parallel() - !write(0,*) 'IN PARALLEL:',is_in_parallel - if (is_in_parallel) then - !$omp single - if (do_timings) call psb_tic(bph1) - !$omp end single - !$omp parallel private(ila,jla,nrow,ncol,nnl,k,lila,ljla,lval) + if (is_in_parallel) then + !$omp parallel private(ila,jla,nrow,ncol,nnl,k) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) - !$omp barrier - !$omp single - if (do_timings) call psb_toc(bph1) - if (do_timings) call psb_tic(bph2) - !$omp end single !$omp critical(spins) if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) @@ -175,10 +151,6 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='psb_cdins',i_err=(/info/)) goto 9998 end if - !$omp single - if (do_timings) call psb_toc(bph2) - if (do_timings) call psb_tic(bph3) - !$omp end single nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() !write(0,*) me,' Before csput',psb_errstatus_fatal() @@ -214,16 +186,10 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) call psb_errpush(info,name) end if 9998 continue - !$omp single - if (do_timings) call psb_toc(bph3) - !$omp end single !write(0,*) me,' after csput',psb_errstatus_fatal() !$omp end parallel - else if (.not.is_in_parallel) then - if (do_timings) call psb_tic(bph11) + else call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) - if (do_timings) call psb_toc(bph11) - if (do_timings) call psb_tic(bph12) !write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal() if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) @@ -233,8 +199,6 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='psb_cdins',i_err=(/info/)) goto 9999 end if - if (do_timings) call psb_toc(bph12) - if (do_timings) call psb_tic(bph13) nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() !write(0,*) me,' Before csput',psb_errstatus_fatal() @@ -270,22 +234,16 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) call psb_errpush(info,name) goto 9999 end if - if (do_timings) call psb_toc(bph13) end if end block #else !write(0,*) me,' Before g2l ',psb_errstatus_fatal() call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) -#if defined(OPENMP) - !$omp critical(g2lins) -#endif + if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) -#if defined(OPENMP) - !$omp endcritical(g2lins) -#endif - + !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& diff --git a/base/tools/psb_sspins.F90 b/base/tools/psb_sspins.F90 index 39e4ad79..696f756f 100644 --- a/base/tools/psb_sspins.F90 +++ b/base/tools/psb_sspins.F90 @@ -134,7 +134,7 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if 0 && defined(OPENMP) block logical :: is_in_parallel is_in_parallel = omp_in_parallel() diff --git a/base/tools/psb_zspins.F90 b/base/tools/psb_zspins.F90 index 0c0ff91f..3b33d80f 100644 --- a/base/tools/psb_zspins.F90 +++ b/base/tools/psb_zspins.F90 @@ -134,7 +134,7 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if 0 && defined(OPENMP) block logical :: is_in_parallel is_in_parallel = omp_in_parallel()