Avoid excessive OpenMP overhead in SPINS. Will have to be redesigned.

repackage
sfilippone 1 month ago
parent 4e71fa971c
commit 772533b6f1

@ -332,7 +332,7 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) & ! $ o m p parallel do default(none) schedule(static) &
! $ o m p shared(mask,idx,idxmap,is) & ! $ o m p shared(mask,idx,idxmap,is) &
! $ o m p private(i) ! $ o m p private(i)
do i=1, is do i=1, is
@ -346,7 +346,7 @@ contains
end do end do
! $ o m p end parallel do ! $ o m p end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) & ! $ o m p parallel do default(none) schedule(static) &
! $ o m p shared(mask,idx,idxmap,is) & ! $ o m p shared(mask,idx,idxmap,is) &
! $ o m p private(i) ! $ o m p private(i)
do i=1,is do i=1,is
@ -368,7 +368,7 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) & ! $ o m p parallel do default(none) schedule(static) &
! $ o m p shared(idx,idxmap,is) & ! $ o m p shared(idx,idxmap,is) &
! $ o m p private(i) ! $ o m p private(i)
do i=1, is do i=1, is
@ -380,7 +380,7 @@ contains
end do end do
! $ o m p end parallel do ! $ o m p end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) & ! $ o m p parallel do default(none) schedule(static) &
! $ o m p shared(idx,idxmap,is) & ! $ o m p shared(idx,idxmap,is) &
! $ o m p private(i) ! $ o m p private(i)
do i=1,is do i=1,is

@ -2892,8 +2892,10 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,&
& imin,imax,jmin,jmax,info) & imin,imax,jmin,jmax,info)
#if !defined(OPENMP) #if !defined(OPENMP)
!$omp critical
nza = nzaold nza = nzaold
call a%set_nzeros(nza) call a%set_nzeros(nza)
!$omp end critical
#endif #endif
call a%set_sorted(.false.) call a%set_sorted(.false.)

@ -78,6 +78,9 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
integer(psb_lpk_), allocatable :: lila(:),ljla(:) integer(psb_lpk_), allocatable :: lila(:),ljla(:)
real(psb_dpk_), allocatable :: lval(:) real(psb_dpk_), allocatable :: lval(:)
character(len=20) :: name character(len=20) :: name
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: bph1=-1, bph2=-1, bph3=-1
integer(psb_ipk_), save :: bph11=-1, bph12=-1, bph13=-1
info = psb_success_ info = psb_success_
name = 'psb_dspins' name = 'psb_dspins'
@ -120,6 +123,18 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
else else
local_ = .false. local_ = .false.
endif endif
if ((do_timings).and.(bph1==-1)) &
& bph1 = psb_get_timer_idx("SPI: g2l1 ")
if ((do_timings).and.(bph2==-1)) &
& bph2 = psb_get_timer_idx("SPI: g2li1")
if ((do_timings).and.(bph3==-1)) &
& bph3 = psb_get_timer_idx("SPI: cspu1")
if ((do_timings).and.(bph11==-1)) &
& bph11 = psb_get_timer_idx("SPI: g2l2 ")
if ((do_timings).and.(bph12==-1)) &
& bph12 = psb_get_timer_idx("SPI: g2li2")
if ((do_timings).and.(bph13==-1)) &
& bph13 = psb_get_timer_idx("SPI: cspu2")
if (desc_a%is_bld()) then if (desc_a%is_bld()) then
@ -134,13 +149,22 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/)) & a_err='allocate',i_err=(/info/))
goto 9999 goto 9999
end if end if
#if defined(OPENMP) #if 0 && defined(OPENMP)
block block
logical :: is_in_parallel logical :: is_in_parallel
is_in_parallel = omp_in_parallel() is_in_parallel = omp_in_parallel()
!write(0,*) 'IN PARALLEL:',is_in_parallel
if (is_in_parallel) then if (is_in_parallel) then
!$omp parallel private(ila,jla,nrow,ncol,nnl,k) !$omp single
if (do_timings) call psb_tic(bph1)
!$omp end single
!$omp parallel private(ila,jla,nrow,ncol,nnl,k,lila,ljla,lval)
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
!$omp barrier
!$omp single
if (do_timings) call psb_toc(bph1)
if (do_timings) call psb_tic(bph2)
!$omp end single
!$omp critical(spins) !$omp critical(spins)
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0)) & mask=(ila(1:nz)>0))
@ -151,6 +175,10 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='psb_cdins',i_err=(/info/)) & a_err='psb_cdins',i_err=(/info/))
goto 9998 goto 9998
end if end if
!$omp single
if (do_timings) call psb_toc(bph2)
if (do_timings) call psb_tic(bph3)
!$omp end single
nrow = desc_a%get_local_rows() nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols() ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal() !write(0,*) me,' Before csput',psb_errstatus_fatal()
@ -186,10 +214,16 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
call psb_errpush(info,name) call psb_errpush(info,name)
end if end if
9998 continue 9998 continue
!$omp single
if (do_timings) call psb_toc(bph3)
!$omp end single
!write(0,*) me,' after csput',psb_errstatus_fatal() !write(0,*) me,' after csput',psb_errstatus_fatal()
!$omp end parallel !$omp end parallel
else else if (.not.is_in_parallel) then
if (do_timings) call psb_tic(bph11)
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
if (do_timings) call psb_toc(bph11)
if (do_timings) call psb_tic(bph12)
!write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal() !write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal()
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0)) & mask=(ila(1:nz)>0))
@ -199,6 +233,8 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='psb_cdins',i_err=(/info/)) & a_err='psb_cdins',i_err=(/info/))
goto 9999 goto 9999
end if end if
if (do_timings) call psb_toc(bph12)
if (do_timings) call psb_tic(bph13)
nrow = desc_a%get_local_rows() nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols() ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal() !write(0,*) me,' Before csput',psb_errstatus_fatal()
@ -234,15 +270,21 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
if (do_timings) call psb_toc(bph13)
end if end if
end block end block
#else #else
!write(0,*) me,' Before g2l ',psb_errstatus_fatal() !write(0,*) me,' Before g2l ',psb_errstatus_fatal()
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
#if defined(OPENMP)
!$omp critical(g2lins)
#endif
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0)) & mask=(ila(1:nz)>0))
#if defined(OPENMP)
!$omp endcritical(g2lins)
#endif
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then if (info /= psb_success_) then

Loading…
Cancel
Save