From 772533b6f1a64ebb84b36c8777227695564393e8 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 23 Jan 2025 15:38:05 +0100 Subject: [PATCH] Avoid excessive OpenMP overhead in SPINS. Will have to be redesigned. --- base/modules/desc/psb_gen_block_map_mod.F90 | 64 ++++----- base/modules/desc/psb_hash_map_mod.F90 | 136 ++++++++++---------- base/modules/desc/psb_list_map_mod.F90 | 56 ++++---- base/modules/desc/psb_repl_map_mod.F90 | 128 +++++++++--------- base/serial/impl/psb_d_coo_impl.F90 | 10 +- base/tools/psb_dspins.F90 | 56 +++++++- 6 files changed, 247 insertions(+), 203 deletions(-) diff --git a/base/modules/desc/psb_gen_block_map_mod.F90 b/base/modules/desc/psb_gen_block_map_mod.F90 index 82a4cc15..6fb86621 100644 --- a/base/modules/desc/psb_gen_block_map_mod.F90 +++ b/base/modules/desc/psb_gen_block_map_mod.F90 @@ -215,9 +215,9 @@ contains end if if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,idxmap,owned_,info) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,idxmap,owned_,info) & + ! $ o m p private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -231,11 +231,11 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idx,idxmap,owned_,info) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idx,idxmap,owned_,info) & + ! $ o m p private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then idx(i) = idxmap%min_glob_row + idx(i) - 1 @@ -247,7 +247,7 @@ contains info = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do end if end subroutine block_ll2gv1 @@ -281,9 +281,9 @@ contains end if if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap,owned_,info,im) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idxin,idxout,idxmap,owned_,info,im) & + ! $ o m p private(i) do i=1, im if (mask(i)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then @@ -297,11 +297,11 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap,owned_,info,im) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idxin,idxout,idxmap,owned_,info,im) & + ! $ o m p private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then idxout(i) = idxmap%min_glob_row + idxin(i) - 1 @@ -313,7 +313,7 @@ contains info = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do end if if (is > im) then @@ -400,9 +400,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,is,idx,idxmap,owned_) & - !$omp private(i,nv,tidx) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idx,idxmap,owned_) & + ! $ o m p private(i,nv,tidx) do i=1, is if (mask(i)) then if ((idxmap%min_glob_row <= idx(i)).and. & @@ -419,11 +419,11 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,is,idx,idxmap,owned_) & - !$omp private(i,ip,lip,tidx,info) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idx,idxmap,owned_) & + ! $ o m p private(i,ip,lip,tidx,info) do i=1,is if (mask(i)) then if ((idxmap%min_glob_row <= idx(i)).and.& @@ -439,7 +439,7 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else idx(1:is) = -1 info = -1 @@ -448,9 +448,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(is,idx,idxmap,owned_) & - !$omp private(i,nv,tidx) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idx,idxmap,owned_) & + ! $ o m p private(i,nv,tidx) do i=1, is if ((idxmap%min_glob_row <= idx(i)).and.& & (idx(i) <= idxmap%max_glob_row)) then @@ -465,11 +465,11 @@ contains idx(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(is,idx,idxmap,owned_) & - !$omp private(i,ip,lip,tidx,info) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idx,idxmap,owned_) & + ! $ o m p private(i,ip,lip,tidx,info) do i=1,is if ((idxmap%min_glob_row <= idx(i)).and.& & (idx(i) <= idxmap%max_glob_row)) then @@ -483,7 +483,7 @@ contains idx(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do else idx(1:is) = -1 info = -1 diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index c3d833c6..b16edaba 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -221,9 +221,9 @@ contains if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,idxmap,owned_) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,idxmap,owned_) & + ! $ o m p private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -236,12 +236,12 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idx,idxmap,owned_) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idx,idxmap,owned_) & + ! $ o m p private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then idx(i) = idxmap%loc_to_glob(idx(i)) @@ -252,7 +252,7 @@ contains idx(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do end if end subroutine hash_l2gv1 @@ -369,9 +369,9 @@ contains else if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & - !$omp private(i,ip,lip,tlip,info) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & + ! $ o m p private(i,ip,lip,tlip,info) do i = 1, is if (mask(i)) then ip = idx(i) @@ -397,7 +397,7 @@ contains endif end if enddo - !$omp end parallel do + ! $ o m p end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idx(1:is) = -1 @@ -413,9 +413,9 @@ contains else if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & - !$omp private(i,ip,lip,tlip,info) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & + ! $ o m p private(i,ip,lip,tlip,info) do i = 1, is ip = idx(i) if ((ip < 1 ).or.(ip>mglob)) then @@ -439,7 +439,7 @@ contains idx(i) = lip endif enddo - !$omp end parallel do + ! $ o m p end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idx(1:is) = -1 @@ -503,9 +503,9 @@ contains else if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & - !$omp private(i,ip,lip,tlip,info) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & + ! $ o m p private(i,ip,lip,tlip,info) do i = 1, is if (mask(i)) then ip = idxin(i) @@ -531,7 +531,7 @@ contains endif end if enddo - !$omp end parallel do + ! $ o m p end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idxout(1:is) = -1 @@ -547,9 +547,9 @@ contains else if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & - !$omp private(i,ip,lip,tlip,info) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & + ! $ o m p private(i,ip,lip,tlip,info) do i = 1, is ip = idxin(i) if ((ip < 1 ).or.(ip>mglob)) then @@ -573,7 +573,7 @@ contains idxout(i) = lip endif enddo - !$omp end parallel do + ! $ o m p end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idxout(1:is) = -1 @@ -704,10 +704,10 @@ contains if (present(lidx)) then if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & - !$omp reduction(.AND.:isLoopValid) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + ! $ o m p private(i,ip,lip,tlip,nxt,info) & + ! $ o m p reduction(.AND.:isLoopValid) do i = 1, is if (mask(i)) then ip = idx(i) @@ -722,7 +722,7 @@ contains idx(i) = lip info = psb_success_ else - !$omp critical(hash_g2l_ins) + ! $ o m p critical(hash_g2l_ins) tlip = lip nxt = lidx(i) if (nxt <= nrow) then @@ -758,20 +758,20 @@ contains end if end if endif - !$omp end critical(hash_g2l_ins) + ! $ o m p end critical(hash_g2l_ins) end if else idx(i) = -1 end if enddo - !$omp end parallel do + ! $ o m p end parallel do else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & - !$omp reduction(.AND.:isLoopValid) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + ! $ o m p private(i,ip,lip,tlip,nxt,info) & + ! $ o m p reduction(.AND.:isLoopValid) do i = 1, is ip = idx(i) if ((ip < 1 ).or.(ip>mglob) ) then @@ -785,7 +785,7 @@ contains idx(i) = lip info = psb_success_ else - !$omp critical(hash_g2l_ins) + ! $ o m p critical(hash_g2l_ins) tlip = lip nxt = lidx(i) if (nxt <= nrow) then @@ -821,19 +821,19 @@ contains end if end if endif - !$omp end critical(hash_g2l_ins) + ! $ o m p end critical(hash_g2l_ins) end if enddo - !$omp end parallel do + ! $ o m p end parallel do end if else if (.not.present(lidx)) then if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & - !$omp reduction(.AND.:isLoopValid) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + ! $ o m p private(i,ip,lip,tlip,nxt,info) & + ! $ o m p reduction(.AND.:isLoopValid) do i = 1, is if (mask(i)) then ip = idx(i) @@ -848,7 +848,7 @@ contains idx(i) = lip info = psb_success_ else - !$omp critical(hash_g2l_ins) + ! $ o m p critical(hash_g2l_ins) ncol = idxmap%get_lc() nxt = ncol + 1 call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& @@ -879,20 +879,20 @@ contains isLoopValid = .false. end if end if - !$omp end critical(hash_g2l_ins) + ! $ o m p end critical(hash_g2l_ins) end if else idx(i) = -1 end if enddo - !$omp end parallel do + ! $ o m p end parallel do else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & - !$omp reduction(.AND.:isLoopValid) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + ! $ o m p private(i,ip,lip,tlip,nxt,info) & + ! $ o m p reduction(.AND.:isLoopValid) do i = 1, is ip = idx(i) if ((ip < 1 ).or.(ip>mglob)) then @@ -906,7 +906,7 @@ contains idx(i) = lip info = psb_success_ else - !$omp critical(hash_g2l_ins) + ! $ o m p critical(hash_g2l_ins) ncol = idxmap%get_lc() nxt = ncol + 1 call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& @@ -937,10 +937,10 @@ contains isLoopValid = .false. end if end if - !$omp end critical(hash_g2l_ins) + ! $ o m p end critical(hash_g2l_ins) end if enddo - !$omp end parallel do + ! $ o m p end parallel do end if end if else @@ -1648,9 +1648,9 @@ contains ! for a width of psb_hash_bits ! if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(n,hashv,hashmask,x,glb_lc,nrm,mask) & - !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(n,hashv,hashmask,x,glb_lc,nrm,mask) & + ! $ o m p private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n if (mask(i)) then key = x(i) @@ -1688,11 +1688,11 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(n,hashv,hashmask,x,glb_lc,nrm) & - !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(n,hashv,hashmask,x,glb_lc,nrm) & + ! $ o m p private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n key = x(i) ih = iand(key,hashmask) @@ -1728,7 +1728,7 @@ contains x(i) = tmp end if end do - !$omp end parallel do + ! $ o m p end parallel do end if end subroutine hash_inner_cnv1 @@ -1751,9 +1751,9 @@ contains ! for a width of psb_hash_bits ! if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) & - !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) & + ! $ o m p private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n if (mask(i)) then key = x(i) @@ -1794,12 +1794,12 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) & - !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) & + ! $ o m p private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n key = x(i) ih = iand(key,hashmask) @@ -1838,7 +1838,7 @@ contains y(i) = tmp end if end do - !$omp end parallel do + ! $ o m p end parallel do end if end subroutine hash_inner_cnv2 diff --git a/base/modules/desc/psb_list_map_mod.F90 b/base/modules/desc/psb_list_map_mod.F90 index 913145da..446339bd 100644 --- a/base/modules/desc/psb_list_map_mod.F90 +++ b/base/modules/desc/psb_list_map_mod.F90 @@ -179,9 +179,9 @@ contains if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,idxmap,owned_) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,idxmap,owned_) & + ! $ o m p private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then @@ -194,12 +194,12 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idx,idxmap,owned_) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idx,idxmap,owned_) & + ! $ o m p private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then idx(i) = idxmap%loc_to_glob(idx(i)) @@ -210,7 +210,7 @@ contains idx(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do end if @@ -305,9 +305,9 @@ contains if (present(mask)) then if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,is,idx,idxmap,owned_) & - !$omp private(i,ix) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idx,idxmap,owned_) & + ! $ o m p private(i,ix) do i=1,is if (mask(i)) then if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -319,7 +319,7 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else idx(1:is) = -1 info = -1 @@ -328,9 +328,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(is,idx,idxmap,owned_) & - !$omp private(i,ix) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idx,idxmap,owned_) & + ! $ o m p private(i,ix) do i=1, is if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idx(i)) @@ -340,7 +340,7 @@ contains idx(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do else idx(1:is) = -1 info = -1 @@ -380,9 +380,9 @@ contains if (present(mask)) then if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,is,idxin,idxout,idxmap,owned_) & - !$omp private(i,ix) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idxin,idxout,idxmap,owned_) & + ! $ o m p private(i,ix) do i=1,is if (mask(i)) then if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -394,7 +394,7 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else idxout(1:is) = -1 info = -1 @@ -403,9 +403,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(is,idxin,idxout,idxmap,owned_) & - !$omp private(i,ix) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idxin,idxout,idxmap,owned_) & + ! $ o m p private(i,ix) do i=1, is if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idxin(i)) @@ -415,7 +415,7 @@ contains idxout(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do else idxout(1:is) = -1 info = -1 @@ -564,9 +564,9 @@ contains else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,is,idx,idxmap,laddsz,lidx) & - !$omp private(i,ix,info) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idx,idxmap,laddsz,lidx) & + ! $ o m p private(i,ix,info) ! $ o m p reduction(.AND.:isLoopValid) do i=1, is if (info /= 0) cycle @@ -606,7 +606,7 @@ contains idx(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do end if else if (.not.present(lidx)) then diff --git a/base/modules/desc/psb_repl_map_mod.F90 b/base/modules/desc/psb_repl_map_mod.F90 index f68ae3b8..eef1e5d1 100644 --- a/base/modules/desc/psb_repl_map_mod.F90 +++ b/base/modules/desc/psb_repl_map_mod.F90 @@ -332,9 +332,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,idxmap,is) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(static) & + ! $ o m p shared(mask,idx,idxmap,is) & + ! $ o m p private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -344,11 +344,11 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,idxmap,is) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(static) & + ! $ o m p shared(mask,idx,idxmap,is) & + ! $ o m p private(i) do i=1,is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -359,7 +359,7 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else idx(1:is) = -1 info = -1 @@ -368,9 +368,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idx,idxmap,is) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(static) & + ! $ o m p shared(idx,idxmap,is) & + ! $ o m p private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -378,11 +378,11 @@ contains idx(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idx,idxmap,is) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(static) & + ! $ o m p shared(idx,idxmap,is) & + ! $ o m p private(i) do i=1,is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -390,7 +390,7 @@ contains idx(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do else idx(1:is) = -1 info = -1 @@ -433,9 +433,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap,im) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idxin,idxout,idxmap,im) & + ! $ o m p private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -445,11 +445,11 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap,im) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idxin,idxout,idxmap,im) & + ! $ o m p private(i) do i=1,im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -459,7 +459,7 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else idxout(1:im) = -1 info = -1 @@ -468,9 +468,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap,im) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idxin,idxout,idxmap,im) & + ! $ o m p private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -478,11 +478,11 @@ contains idxout(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (idxmap%is_valid()) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap,im) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idxin,idxout,idxmap,im) & + ! $ o m p private(i) do i=1,im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -490,7 +490,7 @@ contains idxout(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do else idxout(1:im) = -1 info = -1 @@ -597,9 +597,9 @@ contains else if (idxmap%is_valid()) then if (present(lidx)) then if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,lidx,is,idxmap) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,lidx,is,idxmap) & + ! $ o m p private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -609,11 +609,11 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,lidx,is,idxmap) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,lidx,is,idxmap) & + ! $ o m p private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -621,13 +621,13 @@ contains idx(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do end if else if (.not.present(lidx)) then if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,is,idxmap) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,is,idxmap) & + ! $ o m p private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -637,11 +637,11 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idx,is,idxmap) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idx,is,idxmap) & + ! $ o m p private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -649,7 +649,7 @@ contains idx(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do end if end if else @@ -697,9 +697,9 @@ contains else if (idxmap%is_valid()) then if (present(lidx)) then if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,im,idxmap) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idxin,idxout,im,idxmap) & + ! $ o m p private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -709,11 +709,11 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,im,idxmap) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idxin,idxout,im,idxmap) & + ! $ o m p private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -721,13 +721,13 @@ contains idxout(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do end if else if (.not.present(lidx)) then if (present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,im,idxmap) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idxin,idxout,im,idxmap) & + ! $ o m p private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -737,11 +737,11 @@ contains end if end if end do - !$omp end parallel do + ! $ o m p end parallel do else if (.not.present(mask)) then - !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,im,idxmap) & - !$omp private(i) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idxin,idxout,im,idxmap) & + ! $ o m p private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -749,7 +749,7 @@ contains idxout(i) = -1 end if end do - !$omp end parallel do + ! $ o m p end parallel do end if end if else diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 743ae3a3..c1f6f692 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -2892,8 +2892,10 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) #if !defined(OPENMP) + !$omp critical nza = nzaold call a%set_nzeros(nza) + !$omp end critical #endif call a%set_sorted(.false.) @@ -2951,9 +2953,9 @@ contains ! the serial version: each element is stored in data ! structures but the invalid ones are stored as '-1' values. ! These values will be filtered in a future fixing process. - !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & - !$OMP private(ir,ic,i) + ! $ O M P PARALLEL DO default(none) schedule(STATIC) & + ! $ O M P shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & + ! $ O M P private(ir,ic,i) do i=1,nz ir = ia(i) ic = ja(i) @@ -2967,7 +2969,7 @@ contains aspk(nza+i) = -1 end if end do - !$OMP END PARALLEL DO + ! $ O M P END PARALLEL DO nza = nza + nz #else do i=1, nz diff --git a/base/tools/psb_dspins.F90 b/base/tools/psb_dspins.F90 index cdeaa931..a3215829 100644 --- a/base/tools/psb_dspins.F90 +++ b/base/tools/psb_dspins.F90 @@ -78,7 +78,10 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) integer(psb_lpk_), allocatable :: lila(:),ljla(:) real(psb_dpk_), allocatable :: lval(:) character(len=20) :: name - + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: bph1=-1, bph2=-1, bph3=-1 + integer(psb_ipk_), save :: bph11=-1, bph12=-1, bph13=-1 + info = psb_success_ name = 'psb_dspins' call psb_erractionsave(err_act) @@ -120,6 +123,18 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) else local_ = .false. endif + if ((do_timings).and.(bph1==-1)) & + & bph1 = psb_get_timer_idx("SPI: g2l1 ") + if ((do_timings).and.(bph2==-1)) & + & bph2 = psb_get_timer_idx("SPI: g2li1") + if ((do_timings).and.(bph3==-1)) & + & bph3 = psb_get_timer_idx("SPI: cspu1") + if ((do_timings).and.(bph11==-1)) & + & bph11 = psb_get_timer_idx("SPI: g2l2 ") + if ((do_timings).and.(bph12==-1)) & + & bph12 = psb_get_timer_idx("SPI: g2li2") + if ((do_timings).and.(bph13==-1)) & + & bph13 = psb_get_timer_idx("SPI: cspu2") if (desc_a%is_bld()) then @@ -134,13 +149,22 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if 0 && defined(OPENMP) block logical :: is_in_parallel is_in_parallel = omp_in_parallel() - if (is_in_parallel) then - !$omp parallel private(ila,jla,nrow,ncol,nnl,k) + !write(0,*) 'IN PARALLEL:',is_in_parallel + if (is_in_parallel) then + !$omp single + if (do_timings) call psb_tic(bph1) + !$omp end single + !$omp parallel private(ila,jla,nrow,ncol,nnl,k,lila,ljla,lval) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + !$omp barrier + !$omp single + if (do_timings) call psb_toc(bph1) + if (do_timings) call psb_tic(bph2) + !$omp end single !$omp critical(spins) if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) @@ -151,6 +175,10 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='psb_cdins',i_err=(/info/)) goto 9998 end if + !$omp single + if (do_timings) call psb_toc(bph2) + if (do_timings) call psb_tic(bph3) + !$omp end single nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() !write(0,*) me,' Before csput',psb_errstatus_fatal() @@ -186,10 +214,16 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) call psb_errpush(info,name) end if 9998 continue + !$omp single + if (do_timings) call psb_toc(bph3) + !$omp end single !write(0,*) me,' after csput',psb_errstatus_fatal() !$omp end parallel - else + else if (.not.is_in_parallel) then + if (do_timings) call psb_tic(bph11) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + if (do_timings) call psb_toc(bph11) + if (do_timings) call psb_tic(bph12) !write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal() if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) @@ -199,6 +233,8 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='psb_cdins',i_err=(/info/)) goto 9999 end if + if (do_timings) call psb_toc(bph12) + if (do_timings) call psb_tic(bph13) nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() !write(0,*) me,' Before csput',psb_errstatus_fatal() @@ -234,16 +270,22 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) call psb_errpush(info,name) goto 9999 end if + if (do_timings) call psb_toc(bph13) end if end block #else !write(0,*) me,' Before g2l ',psb_errstatus_fatal() call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) - +#if defined(OPENMP) + !$omp critical(g2lins) +#endif if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) - +#if defined(OPENMP) + !$omp endcritical(g2lins) +#endif + !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,&