Final OMP version of SOC1.

dev-openmp
sfilippone 1 year ago
parent c1ecb4ebec
commit 9e82d2e311

@ -187,38 +187,40 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! been set because it is strongly connected to an entry J belonging to a ! been set because it is strongly connected to an entry J belonging to a
! different thread. ! different thread.
info = 0 !$omp parallel shared(bnds,idxs,locnaggr,ilaggr,nr,naggr,diag,theta,nths,info) &
!$omp parallel shared(bnds,locnaggr,ilaggr,nr,naggr,diag,theta,nths) &
!$omp private(icol,val,myth,kk) !$omp private(icol,val,myth,kk)
block block
integer(psb_ipk_) :: ii,nlp,k,kp,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz integer(psb_ipk_) :: ii,nlp,k,kp,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz
integer(psb_lpk_) :: itmp integer(psb_lpk_) :: itmp
nths = omp_get_num_threads()
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
!$omp master !$omp master
nths = omp_get_num_threads()
allocate(bnds(0:nths),locnaggr(0:nths+1)) allocate(bnds(0:nths),locnaggr(0:nths+1))
locnaggr(:) = 0 locnaggr(:) = 0
bnds(0) = 1 bnds(0) = 1
!$omp end master !$omp end master
!$omp barrier !$omp barrier
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
bnds(myth+1) = rsz bnds(myth+1) = rsz
!$omp barrier !$omp barrier
!$omp master !$omp master
do i=1,nths do i=1,nths
bnds(i) = bnds(i) + bnds(i-1) bnds(i) = bnds(i) + bnds(i-1)
end do end do
info = 0
!$omp end master !$omp end master
!$omp barrier !$omp barrier
!$omp do schedule(static) private(disjoint) reduction(max: info) !$omp do schedule(static) private(disjoint)
do kk=0, nths-1 do kk=0, nths-1
step1: do ii=bnds(kk), bnds(kk+1)-1 step1: do ii=bnds(kk), bnds(kk+1)-1
if (info /= 0) cycle
i = idxs(ii) i = idxs(ii)
if (info /= 0) cycle step1
if ((i<1).or.(i>nr)) then if ((i<1).or.(i>nr)) then
!$omp atomic write
info=psb_err_internal_error_ info=psb_err_internal_error_
!$omp end atomic
call psb_errpush(info,name) call psb_errpush(info,name)
cycle step1 cycle step1
!goto 9999 !goto 9999
@ -227,7 +229,9 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
if (ilaggr(i) == -(nr+1)) then if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i)) nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then if ((nz<0).or.(nz>size(icol))) then
!$omp atomic write
info=psb_err_internal_error_ info=psb_err_internal_error_
!$omp end atomic
call psb_errpush(info,name) call psb_errpush(info,name)
cycle step1 cycle step1
!goto 9999 !goto 9999
@ -263,13 +267,18 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! However, a certain unrepeatability is already present ! However, a certain unrepeatability is already present
! because the sequence of aggregates is computed with a ! because the sequence of aggregates is computed with a
! different order than in serial mode. ! different order than in serial mode.
! In any case, even if the enteries of ILAGGR may be
! overwritten, the important thing is that each entry is
! consistent and they generate a correct aggregation map.
! !
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1 locnaggr(kk) = locnaggr(kk) + 1
itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk
if (itmp < (bnds(kk)-1+locnaggr(kk))) then if (itmp < (bnds(kk)-1+locnaggr(kk))) then
info = 12345678 !$omp atomic update
info = max(12345678,info)
!$omp end atomic
cycle step1 cycle step1
end if end if
!$omp atomic write !$omp atomic write

@ -187,38 +187,40 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! been set because it is strongly connected to an entry J belonging to a ! been set because it is strongly connected to an entry J belonging to a
! different thread. ! different thread.
info = 0 !$omp parallel shared(bnds,idxs,locnaggr,ilaggr,nr,naggr,diag,theta,nths,info) &
!$omp parallel shared(bnds,locnaggr,ilaggr,nr,naggr,diag,theta,nths) &
!$omp private(icol,val,myth,kk) !$omp private(icol,val,myth,kk)
block block
integer(psb_ipk_) :: ii,nlp,k,kp,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz integer(psb_ipk_) :: ii,nlp,k,kp,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz
integer(psb_lpk_) :: itmp integer(psb_lpk_) :: itmp
nths = omp_get_num_threads()
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
!$omp master !$omp master
nths = omp_get_num_threads()
allocate(bnds(0:nths),locnaggr(0:nths+1)) allocate(bnds(0:nths),locnaggr(0:nths+1))
locnaggr(:) = 0 locnaggr(:) = 0
bnds(0) = 1 bnds(0) = 1
!$omp end master !$omp end master
!$omp barrier !$omp barrier
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
bnds(myth+1) = rsz bnds(myth+1) = rsz
!$omp barrier !$omp barrier
!$omp master !$omp master
do i=1,nths do i=1,nths
bnds(i) = bnds(i) + bnds(i-1) bnds(i) = bnds(i) + bnds(i-1)
end do end do
info = 0
!$omp end master !$omp end master
!$omp barrier !$omp barrier
!$omp do schedule(static) private(disjoint) reduction(max: info) !$omp do schedule(static) private(disjoint)
do kk=0, nths-1 do kk=0, nths-1
step1: do ii=bnds(kk), bnds(kk+1)-1 step1: do ii=bnds(kk), bnds(kk+1)-1
if (info /= 0) cycle
i = idxs(ii) i = idxs(ii)
if (info /= 0) cycle step1
if ((i<1).or.(i>nr)) then if ((i<1).or.(i>nr)) then
!$omp atomic write
info=psb_err_internal_error_ info=psb_err_internal_error_
!$omp end atomic
call psb_errpush(info,name) call psb_errpush(info,name)
cycle step1 cycle step1
!goto 9999 !goto 9999
@ -227,7 +229,9 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
if (ilaggr(i) == -(nr+1)) then if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i)) nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then if ((nz<0).or.(nz>size(icol))) then
!$omp atomic write
info=psb_err_internal_error_ info=psb_err_internal_error_
!$omp end atomic
call psb_errpush(info,name) call psb_errpush(info,name)
cycle step1 cycle step1
!goto 9999 !goto 9999
@ -263,13 +267,18 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! However, a certain unrepeatability is already present ! However, a certain unrepeatability is already present
! because the sequence of aggregates is computed with a ! because the sequence of aggregates is computed with a
! different order than in serial mode. ! different order than in serial mode.
! In any case, even if the enteries of ILAGGR may be
! overwritten, the important thing is that each entry is
! consistent and they generate a correct aggregation map.
! !
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1 locnaggr(kk) = locnaggr(kk) + 1
itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk
if (itmp < (bnds(kk)-1+locnaggr(kk))) then if (itmp < (bnds(kk)-1+locnaggr(kk))) then
info = 12345678 !$omp atomic update
info = max(12345678,info)
!$omp end atomic
cycle step1 cycle step1
end if end if
!$omp atomic write !$omp atomic write

@ -187,38 +187,40 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! been set because it is strongly connected to an entry J belonging to a ! been set because it is strongly connected to an entry J belonging to a
! different thread. ! different thread.
info = 0 !$omp parallel shared(bnds,idxs,locnaggr,ilaggr,nr,naggr,diag,theta,nths,info) &
!$omp parallel shared(bnds,locnaggr,ilaggr,nr,naggr,diag,theta,nths) &
!$omp private(icol,val,myth,kk) !$omp private(icol,val,myth,kk)
block block
integer(psb_ipk_) :: ii,nlp,k,kp,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz integer(psb_ipk_) :: ii,nlp,k,kp,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz
integer(psb_lpk_) :: itmp integer(psb_lpk_) :: itmp
nths = omp_get_num_threads()
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
!$omp master !$omp master
nths = omp_get_num_threads()
allocate(bnds(0:nths),locnaggr(0:nths+1)) allocate(bnds(0:nths),locnaggr(0:nths+1))
locnaggr(:) = 0 locnaggr(:) = 0
bnds(0) = 1 bnds(0) = 1
!$omp end master !$omp end master
!$omp barrier !$omp barrier
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
bnds(myth+1) = rsz bnds(myth+1) = rsz
!$omp barrier !$omp barrier
!$omp master !$omp master
do i=1,nths do i=1,nths
bnds(i) = bnds(i) + bnds(i-1) bnds(i) = bnds(i) + bnds(i-1)
end do end do
info = 0
!$omp end master !$omp end master
!$omp barrier !$omp barrier
!$omp do schedule(static) private(disjoint) reduction(max: info) !$omp do schedule(static) private(disjoint)
do kk=0, nths-1 do kk=0, nths-1
step1: do ii=bnds(kk), bnds(kk+1)-1 step1: do ii=bnds(kk), bnds(kk+1)-1
if (info /= 0) cycle
i = idxs(ii) i = idxs(ii)
if (info /= 0) cycle step1
if ((i<1).or.(i>nr)) then if ((i<1).or.(i>nr)) then
!$omp atomic write
info=psb_err_internal_error_ info=psb_err_internal_error_
!$omp end atomic
call psb_errpush(info,name) call psb_errpush(info,name)
cycle step1 cycle step1
!goto 9999 !goto 9999
@ -227,7 +229,9 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
if (ilaggr(i) == -(nr+1)) then if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i)) nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then if ((nz<0).or.(nz>size(icol))) then
!$omp atomic write
info=psb_err_internal_error_ info=psb_err_internal_error_
!$omp end atomic
call psb_errpush(info,name) call psb_errpush(info,name)
cycle step1 cycle step1
!goto 9999 !goto 9999
@ -263,13 +267,18 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! However, a certain unrepeatability is already present ! However, a certain unrepeatability is already present
! because the sequence of aggregates is computed with a ! because the sequence of aggregates is computed with a
! different order than in serial mode. ! different order than in serial mode.
! In any case, even if the enteries of ILAGGR may be
! overwritten, the important thing is that each entry is
! consistent and they generate a correct aggregation map.
! !
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1 locnaggr(kk) = locnaggr(kk) + 1
itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk
if (itmp < (bnds(kk)-1+locnaggr(kk))) then if (itmp < (bnds(kk)-1+locnaggr(kk))) then
info = 12345678 !$omp atomic update
info = max(12345678,info)
!$omp end atomic
cycle step1 cycle step1
end if end if
!$omp atomic write !$omp atomic write

@ -187,38 +187,40 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! been set because it is strongly connected to an entry J belonging to a ! been set because it is strongly connected to an entry J belonging to a
! different thread. ! different thread.
info = 0 !$omp parallel shared(bnds,idxs,locnaggr,ilaggr,nr,naggr,diag,theta,nths,info) &
!$omp parallel shared(bnds,locnaggr,ilaggr,nr,naggr,diag,theta,nths) &
!$omp private(icol,val,myth,kk) !$omp private(icol,val,myth,kk)
block block
integer(psb_ipk_) :: ii,nlp,k,kp,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz integer(psb_ipk_) :: ii,nlp,k,kp,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz
integer(psb_lpk_) :: itmp integer(psb_lpk_) :: itmp
nths = omp_get_num_threads()
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
!$omp master !$omp master
nths = omp_get_num_threads()
allocate(bnds(0:nths),locnaggr(0:nths+1)) allocate(bnds(0:nths),locnaggr(0:nths+1))
locnaggr(:) = 0 locnaggr(:) = 0
bnds(0) = 1 bnds(0) = 1
!$omp end master !$omp end master
!$omp barrier !$omp barrier
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
bnds(myth+1) = rsz bnds(myth+1) = rsz
!$omp barrier !$omp barrier
!$omp master !$omp master
do i=1,nths do i=1,nths
bnds(i) = bnds(i) + bnds(i-1) bnds(i) = bnds(i) + bnds(i-1)
end do end do
info = 0
!$omp end master !$omp end master
!$omp barrier !$omp barrier
!$omp do schedule(static) private(disjoint) reduction(max: info) !$omp do schedule(static) private(disjoint)
do kk=0, nths-1 do kk=0, nths-1
step1: do ii=bnds(kk), bnds(kk+1)-1 step1: do ii=bnds(kk), bnds(kk+1)-1
if (info /= 0) cycle
i = idxs(ii) i = idxs(ii)
if (info /= 0) cycle step1
if ((i<1).or.(i>nr)) then if ((i<1).or.(i>nr)) then
!$omp atomic write
info=psb_err_internal_error_ info=psb_err_internal_error_
!$omp end atomic
call psb_errpush(info,name) call psb_errpush(info,name)
cycle step1 cycle step1
!goto 9999 !goto 9999
@ -227,7 +229,9 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
if (ilaggr(i) == -(nr+1)) then if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i)) nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then if ((nz<0).or.(nz>size(icol))) then
!$omp atomic write
info=psb_err_internal_error_ info=psb_err_internal_error_
!$omp end atomic
call psb_errpush(info,name) call psb_errpush(info,name)
cycle step1 cycle step1
!goto 9999 !goto 9999
@ -263,13 +267,18 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! However, a certain unrepeatability is already present ! However, a certain unrepeatability is already present
! because the sequence of aggregates is computed with a ! because the sequence of aggregates is computed with a
! different order than in serial mode. ! different order than in serial mode.
! In any case, even if the enteries of ILAGGR may be
! overwritten, the important thing is that each entry is
! consistent and they generate a correct aggregation map.
! !
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1 locnaggr(kk) = locnaggr(kk) + 1
itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk
if (itmp < (bnds(kk)-1+locnaggr(kk))) then if (itmp < (bnds(kk)-1+locnaggr(kk))) then
info = 12345678 !$omp atomic update
info = max(12345678,info)
!$omp end atomic
cycle step1 cycle step1
end if end if
!$omp atomic write !$omp atomic write

Loading…
Cancel
Save