Updates and measurements for OpenMP build

dev-openmp
sfilippone 2 years ago
parent 3a5e73e4c8
commit 2fd718be6f

@ -76,7 +76,7 @@ subroutine amg_c_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
integer(psb_ipk_) :: nrow, ncol, nrl, nzl, ip, nzt, i, k
integer(psb_lpk_) :: nrsave, ncsave, nzsave, nza
logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false.
integer(psb_ipk_), save :: idx_spspmm=-1
integer(psb_ipk_), save :: idx_spspmm=-1, idx_cpytrans1=-1, idx_cpytrans2=-1
name='amg_ptap_bld'
if(psb_get_errstatus().ne.0) return
@ -93,7 +93,11 @@ subroutine amg_c_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
ncol = desc_a%get_local_cols()
if ((do_timings).and.(idx_spspmm==-1)) &
& idx_spspmm = psb_get_timer_idx("SPMM_BLD: par_spspmm")
& idx_spspmm = psb_get_timer_idx("PTAP_BLD: par_spspmm")
if ((do_timings).and.(idx_cpytrans1==-1)) &
& idx_cpytrans1 = psb_get_timer_idx("PTAP_BLD: cpy&trans1")
if ((do_timings).and.(idx_cpytrans2==-1)) &
& idx_cpytrans2 = psb_get_timer_idx("PTAP_BLD: cpy&trans2")
naggr = nlaggr(me+1)
ntaggr = sum(nlaggr)
@ -128,6 +132,7 @@ subroutine amg_c_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
! Ok first product done.
if (present(desc_ax)) then
if (do_timings) call psb_tic(idx_cpytrans1)
block
call coo_prol%cp_to_coo(coo_restr,info)
call coo_restr%set_ncols(desc_ac%get_local_cols())
@ -137,7 +142,7 @@ subroutine amg_c_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
call coo_restr%set_ncols(desc_ax%get_local_cols())
end block
call csr_restr%cp_from_coo(coo_restr,info)
if (do_timings) call psb_toc(idx_cpytrans1)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='spcnv coo_restr')
goto 9999
@ -167,27 +172,28 @@ subroutine amg_c_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
call coo_restr%transp()
nzl = coo_restr%get_nzeros()
nrl = desc_ac%get_local_rows()
i=0
nrl = desc_ac%get_local_rows()
call coo_restr%fix(info)
i=coo_restr%get_nzeros()
!
! Only keep local rows
!
do k=1, nzl
if ((1 <= coo_restr%ia(k)) .and.(coo_restr%ia(k) <= nrl)) then
i = i+1
coo_restr%val(i) = coo_restr%val(k)
coo_restr%ia(i) = coo_restr%ia(k)
coo_restr%ja(i) = coo_restr%ja(k)
search: do k=i,1,-1
if (coo_restr%ia(k) <= nrl) then
call coo_restr%set_nzeros(k)
exit search
end if
end do
call coo_restr%set_nzeros(i)
call coo_restr%fix(info)
end do search
nzl = coo_restr%get_nzeros()
call coo_restr%set_nrows(desc_ac%get_local_rows())
call coo_restr%set_ncols(desc_a%get_local_cols())
if (debug) call check_coo(me,trim(name)//' Check 2 on coo_restr:',coo_restr)
if (do_timings) call psb_tic(idx_cpytrans2)
call csr_restr%cp_from_coo(coo_restr,info)
if (do_timings) call psb_toc(idx_cpytrans2)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='spcnv coo_restr')
goto 9999

@ -72,7 +72,9 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
use psb_base_mod
use amg_base_prec_type
use amg_c_inner_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
! Arguments
@ -99,6 +101,9 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
integer(psb_ipk_) :: nrow, ncol, n_ne
integer(psb_lpk_) :: nrglob
character(len=20) :: name, ch_err
integer(psb_ipk_), save :: idx_soc1_p1=-1, idx_soc1_p2=-1, idx_soc1_p3=-1
integer(psb_ipk_), save :: idx_soc1_p0=-1
logical, parameter :: do_timings=.true.
info=psb_success_
name = 'amg_soc1_map_bld'
@ -114,6 +119,14 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
nrglob = desc_a%get_global_rows()
if ((do_timings).and.(idx_soc1_p0==-1)) &
& idx_soc1_p0 = psb_get_timer_idx("SOC1_MAP: phase0")
if ((do_timings).and.(idx_soc1_p1==-1)) &
& idx_soc1_p1 = psb_get_timer_idx("SOC1_MAP: phase1")
if ((do_timings).and.(idx_soc1_p2==-1)) &
& idx_soc1_p2 = psb_get_timer_idx("SOC1_MAP: phase2")
if ((do_timings).and.(idx_soc1_p3==-1)) &
& idx_soc1_p3 = psb_get_timer_idx("SOC1_MAP: phase3")
nr = a%get_nrows()
nc = a%get_ncols()
@ -133,41 +146,194 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
goto 9999
end if
if (do_timings) call psb_tic(idx_soc1_p0)
call a%cp_to(acsr)
if (do_timings) call psb_toc(idx_soc1_p0)
if (clean_zeros) call acsr%clean_zeros(info)
if (iorder == amg_aggr_ord_nat_) then
!$omp parallel do private(i)
do i=1, nr
ilaggr(i) = -(nr+1)
idxs(i) = i
end do
else
!$omp end parallel do
else
!$omp parallel do private(i)
do i=1, nr
ilaggr(i) = -(nr+1)
ideg(i) = acsr%irp(i+1) - acsr%irp(i)
end do
!$omp end parallel do
call psb_msort(ideg,ix=idxs,dir=psb_sort_down_)
end if
if (do_timings) call psb_tic(idx_soc1_p1)
!
! Phase one: Start with disjoint groups.
!
naggr = 0
icnt = 0
#if 0&&defined(OPENMP)
block
integer(psb_ipk_), allocatable :: bnds(:), locnaggr(:)
integer(psb_ipk_) :: myth,nths, kk
!$omp parallel shared(bnds,locnaggr,ilaggr,nr,naggr,diag,theta,nths) private(icol,val,myth,kk)
block
integer(psb_ipk_) :: ii,nlp,k,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz, minip
nths = omp_get_num_threads()
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
!!$ write(0,*) 'From thread : rsz ',myth,rsz
!$omp master
allocate(bnds(0:nths),locnaggr(0:nths))
locnaggr(:) = 0
bnds(0) = 1
!$omp end master
!$omp barrier
bnds(myth+1) = rsz
!$omp master
!!$ write(0,*) 'From master 1: ',bnds
do i=1,nths
bnds(i) = bnds(i) + bnds(i-1)
end do
!!$ write(0,*) 'From master 2: ',bnds
!$omp end master
!$omp barrier
!$omp do schedule(static)
do kk=0, nths-1
!!$ write(0,*) 'From thread ',myth,kk,bnds(kk),bnds(kk+1)-1
step1: do ii=bnds(kk), bnds(kk+1)-1
if (info /= 0) cycle
i = idxs(ii)
if ((i<1).or.(i>nr)) then
info=psb_err_internal_error_
call psb_errpush(info,name)
cycle step1
!goto 9999
end if
if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then
info=psb_err_internal_error_
call psb_errpush(info,name)
cycle step1
!goto 9999
end if
icol(1:nz) = acsr%ja(acsr%irp(i):acsr%irp(i+1)-1)
val(1:nz) = acsr%val(acsr%irp(i):acsr%irp(i+1)-1)
!
! Build the set of all strongly coupled nodes
!
if (.false.) then
ip = 0
do k=1, nz
j = icol(k)
if ((bnds(myth)<=j).and.(j<=(bnds(myth+1)-1))) then
if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then
ip = ip + 1
icol(ip) = icol(k)
end if
end if
enddo
!
! If the whole strongly coupled neighborhood of I is
! as yet unconnected, turn it into the next aggregate.
! Same if ip==0 (in which case, neighborhood only
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1
do k=1, ip
ilaggr(icol(k)) = locnaggr(kk)
end do
ilaggr(i) = locnaggr(kk)
end if
else
ip = 0
minip = nr +1
do k=1, nz
j = icol(k)
if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then
ip = ip + 1
icol(ip) = icol(k)
minip = min(icol(ip),minip)
end if
enddo
if (bnds(myth)<=minip) then
!
! If the whole strongly coupled neighborhood of I is
! as yet unconnected, turn it into the next aggregate.
! Same if ip==0 (in which case, neighborhood only
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
!$omp critical(update_ilaggr)
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1
do k=1, ip
ilaggr(icol(k)) = locnaggr(kk)
end do
ilaggr(i) = locnaggr(kk)
end if
!$omp end critical(update_ilaggr)
end if
endif
end if
end if
enddo step1
end do
!$omp end do
!$omp barrier
!$omp master
naggr = sum(locnaggr(0:nths-1))
!!$ write(0,*) 'NAGGR ',naggr, 'locnaggr ',locnaggr(0:nths-1)
do i=1,nths
locnaggr(i) = locnaggr(i) + locnaggr(i-1)
end do
do i=nths,1,-1
locnaggr(i) = locnaggr(i-1)
end do
locnaggr(0) = 0
!$omp end master
!$omp barrier
!$omp do schedule(static)
do kk=0, nths-1
do ii=bnds(kk), bnds(kk+1)-1
if (ilaggr(ii) > 0) ilaggr(ii) = ilaggr(ii) + locnaggr(kk)
end do
end do
!$omp end do
end block
!$omp end parallel
end block
!!$ write(0,*) 'Out of parallel looop NAGGR ',naggr
#else
step1: do ii=1, nr
if (info /= 0) cycle
i = idxs(ii)
if ((i<1).or.(i>nr)) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
cycle step1
!goto 9999
end if
if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
cycle step1
!goto 9999
end if
icol(1:nz) = acsr%ja(acsr%irp(i):acsr%irp(i+1)-1)
@ -176,7 +342,7 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
!
! Build the set of all strongly coupled nodes
!
ip = 0
ip = 0
do k=1, nz
j = icol(k)
if ((1<=j).and.(j<=nr)) then
@ -194,8 +360,7 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
icnt = icnt + 1
if (disjoint) then
naggr = naggr + 1
do k=1, ip
ilaggr(icol(k)) = naggr
@ -204,16 +369,22 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
endif
enddo step1
!!$ write(0,*) 'NAGGR ',naggr
#endif
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 1:',count(ilaggr == -(nr+1))
& ' Check 1:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
if (do_timings) call psb_toc(idx_soc1_p1)
if (do_timings) call psb_tic(idx_soc1_p2)
!
! Phase two: join the neighbours
!
! $ omp workshare
tmpaggr = ilaggr
! $ omp end workshare
! $ omp parallel do schedule(static) shared(tmpaggr,ilaggr,nr,naggr,diag,theta) private(ii,i,j,k,nz,icol,val,ip)
step2: do ii=1,nr
i = idxs(ii)
@ -244,8 +415,15 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
end if
end do step2
! $ omp end parallel do
if (do_timings) call psb_toc(idx_soc1_p2)
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 1.5:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
if (do_timings) call psb_tic(idx_soc1_p3)
!
! Phase three: sweep over leftovers, if any
!
@ -274,7 +452,6 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
enddo
if (ip > 0) then
icnt = icnt + 1
naggr = naggr + 1
ilaggr(i) = naggr
do k=1, ip
@ -309,7 +486,7 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
endif
end if
end do
if (do_timings) call psb_toc(idx_soc1_p3)
if (naggr > ncol) then
!write(0,*) name,'Error : naggr > ncol',naggr,ncol
info=psb_err_internal_error_
@ -336,9 +513,14 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
nlaggr(:) = 0
nlaggr(me+1) = naggr
call psb_sum(ctxt,nlaggr(1:np))
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 2:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
!!$ write(0,*) nlaggr(1:np),'ILAGGR : ',ilaggr(1:nr)
call acsr%free()
call psb_erractionrestore(err_act)
return

@ -76,7 +76,7 @@ subroutine amg_d_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
integer(psb_ipk_) :: nrow, ncol, nrl, nzl, ip, nzt, i, k
integer(psb_lpk_) :: nrsave, ncsave, nzsave, nza
logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false.
integer(psb_ipk_), save :: idx_spspmm=-1
integer(psb_ipk_), save :: idx_spspmm=-1, idx_cpytrans1=-1, idx_cpytrans2=-1
name='amg_ptap_bld'
if(psb_get_errstatus().ne.0) return
@ -93,7 +93,11 @@ subroutine amg_d_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
ncol = desc_a%get_local_cols()
if ((do_timings).and.(idx_spspmm==-1)) &
& idx_spspmm = psb_get_timer_idx("SPMM_BLD: par_spspmm")
& idx_spspmm = psb_get_timer_idx("PTAP_BLD: par_spspmm")
if ((do_timings).and.(idx_cpytrans1==-1)) &
& idx_cpytrans1 = psb_get_timer_idx("PTAP_BLD: cpy&trans1")
if ((do_timings).and.(idx_cpytrans2==-1)) &
& idx_cpytrans2 = psb_get_timer_idx("PTAP_BLD: cpy&trans2")
naggr = nlaggr(me+1)
ntaggr = sum(nlaggr)
@ -128,6 +132,7 @@ subroutine amg_d_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
! Ok first product done.
if (present(desc_ax)) then
if (do_timings) call psb_tic(idx_cpytrans1)
block
call coo_prol%cp_to_coo(coo_restr,info)
call coo_restr%set_ncols(desc_ac%get_local_cols())
@ -137,7 +142,7 @@ subroutine amg_d_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
call coo_restr%set_ncols(desc_ax%get_local_cols())
end block
call csr_restr%cp_from_coo(coo_restr,info)
if (do_timings) call psb_toc(idx_cpytrans1)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='spcnv coo_restr')
goto 9999
@ -167,27 +172,28 @@ subroutine amg_d_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
call coo_restr%transp()
nzl = coo_restr%get_nzeros()
nrl = desc_ac%get_local_rows()
i=0
nrl = desc_ac%get_local_rows()
call coo_restr%fix(info)
i=coo_restr%get_nzeros()
!
! Only keep local rows
!
do k=1, nzl
if ((1 <= coo_restr%ia(k)) .and.(coo_restr%ia(k) <= nrl)) then
i = i+1
coo_restr%val(i) = coo_restr%val(k)
coo_restr%ia(i) = coo_restr%ia(k)
coo_restr%ja(i) = coo_restr%ja(k)
search: do k=i,1,-1
if (coo_restr%ia(k) <= nrl) then
call coo_restr%set_nzeros(k)
exit search
end if
end do
call coo_restr%set_nzeros(i)
call coo_restr%fix(info)
end do search
nzl = coo_restr%get_nzeros()
call coo_restr%set_nrows(desc_ac%get_local_rows())
call coo_restr%set_ncols(desc_a%get_local_cols())
if (debug) call check_coo(me,trim(name)//' Check 2 on coo_restr:',coo_restr)
if (do_timings) call psb_tic(idx_cpytrans2)
call csr_restr%cp_from_coo(coo_restr,info)
if (do_timings) call psb_toc(idx_cpytrans2)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='spcnv coo_restr')
goto 9999

@ -72,7 +72,9 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
use psb_base_mod
use amg_base_prec_type
use amg_d_inner_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
! Arguments
@ -99,6 +101,9 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
integer(psb_ipk_) :: nrow, ncol, n_ne
integer(psb_lpk_) :: nrglob
character(len=20) :: name, ch_err
integer(psb_ipk_), save :: idx_soc1_p1=-1, idx_soc1_p2=-1, idx_soc1_p3=-1
integer(psb_ipk_), save :: idx_soc1_p0=-1
logical, parameter :: do_timings=.true.
info=psb_success_
name = 'amg_soc1_map_bld'
@ -114,6 +119,14 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
nrglob = desc_a%get_global_rows()
if ((do_timings).and.(idx_soc1_p0==-1)) &
& idx_soc1_p0 = psb_get_timer_idx("SOC1_MAP: phase0")
if ((do_timings).and.(idx_soc1_p1==-1)) &
& idx_soc1_p1 = psb_get_timer_idx("SOC1_MAP: phase1")
if ((do_timings).and.(idx_soc1_p2==-1)) &
& idx_soc1_p2 = psb_get_timer_idx("SOC1_MAP: phase2")
if ((do_timings).and.(idx_soc1_p3==-1)) &
& idx_soc1_p3 = psb_get_timer_idx("SOC1_MAP: phase3")
nr = a%get_nrows()
nc = a%get_ncols()
@ -133,41 +146,194 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
goto 9999
end if
if (do_timings) call psb_tic(idx_soc1_p0)
call a%cp_to(acsr)
if (do_timings) call psb_toc(idx_soc1_p0)
if (clean_zeros) call acsr%clean_zeros(info)
if (iorder == amg_aggr_ord_nat_) then
!$omp parallel do private(i)
do i=1, nr
ilaggr(i) = -(nr+1)
idxs(i) = i
end do
else
!$omp end parallel do
else
!$omp parallel do private(i)
do i=1, nr
ilaggr(i) = -(nr+1)
ideg(i) = acsr%irp(i+1) - acsr%irp(i)
end do
!$omp end parallel do
call psb_msort(ideg,ix=idxs,dir=psb_sort_down_)
end if
if (do_timings) call psb_tic(idx_soc1_p1)
!
! Phase one: Start with disjoint groups.
!
naggr = 0
icnt = 0
#if 0&&defined(OPENMP)
block
integer(psb_ipk_), allocatable :: bnds(:), locnaggr(:)
integer(psb_ipk_) :: myth,nths, kk
!$omp parallel shared(bnds,locnaggr,ilaggr,nr,naggr,diag,theta,nths) private(icol,val,myth,kk)
block
integer(psb_ipk_) :: ii,nlp,k,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz, minip
nths = omp_get_num_threads()
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
!!$ write(0,*) 'From thread : rsz ',myth,rsz
!$omp master
allocate(bnds(0:nths),locnaggr(0:nths))
locnaggr(:) = 0
bnds(0) = 1
!$omp end master
!$omp barrier
bnds(myth+1) = rsz
!$omp master
!!$ write(0,*) 'From master 1: ',bnds
do i=1,nths
bnds(i) = bnds(i) + bnds(i-1)
end do
!!$ write(0,*) 'From master 2: ',bnds
!$omp end master
!$omp barrier
!$omp do schedule(static)
do kk=0, nths-1
!!$ write(0,*) 'From thread ',myth,kk,bnds(kk),bnds(kk+1)-1
step1: do ii=bnds(kk), bnds(kk+1)-1
if (info /= 0) cycle
i = idxs(ii)
if ((i<1).or.(i>nr)) then
info=psb_err_internal_error_
call psb_errpush(info,name)
cycle step1
!goto 9999
end if
if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then
info=psb_err_internal_error_
call psb_errpush(info,name)
cycle step1
!goto 9999
end if
icol(1:nz) = acsr%ja(acsr%irp(i):acsr%irp(i+1)-1)
val(1:nz) = acsr%val(acsr%irp(i):acsr%irp(i+1)-1)
!
! Build the set of all strongly coupled nodes
!
if (.false.) then
ip = 0
do k=1, nz
j = icol(k)
if ((bnds(myth)<=j).and.(j<=(bnds(myth+1)-1))) then
if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then
ip = ip + 1
icol(ip) = icol(k)
end if
end if
enddo
!
! If the whole strongly coupled neighborhood of I is
! as yet unconnected, turn it into the next aggregate.
! Same if ip==0 (in which case, neighborhood only
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1
do k=1, ip
ilaggr(icol(k)) = locnaggr(kk)
end do
ilaggr(i) = locnaggr(kk)
end if
else
ip = 0
minip = nr +1
do k=1, nz
j = icol(k)
if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then
ip = ip + 1
icol(ip) = icol(k)
minip = min(icol(ip),minip)
end if
enddo
if (bnds(myth)<=minip) then
!
! If the whole strongly coupled neighborhood of I is
! as yet unconnected, turn it into the next aggregate.
! Same if ip==0 (in which case, neighborhood only
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
!$omp critical(update_ilaggr)
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1
do k=1, ip
ilaggr(icol(k)) = locnaggr(kk)
end do
ilaggr(i) = locnaggr(kk)
end if
!$omp end critical(update_ilaggr)
end if
endif
end if
end if
enddo step1
end do
!$omp end do
!$omp barrier
!$omp master
naggr = sum(locnaggr(0:nths-1))
!!$ write(0,*) 'NAGGR ',naggr, 'locnaggr ',locnaggr(0:nths-1)
do i=1,nths
locnaggr(i) = locnaggr(i) + locnaggr(i-1)
end do
do i=nths,1,-1
locnaggr(i) = locnaggr(i-1)
end do
locnaggr(0) = 0
!$omp end master
!$omp barrier
!$omp do schedule(static)
do kk=0, nths-1
do ii=bnds(kk), bnds(kk+1)-1
if (ilaggr(ii) > 0) ilaggr(ii) = ilaggr(ii) + locnaggr(kk)
end do
end do
!$omp end do
end block
!$omp end parallel
end block
!!$ write(0,*) 'Out of parallel looop NAGGR ',naggr
#else
step1: do ii=1, nr
if (info /= 0) cycle
i = idxs(ii)
if ((i<1).or.(i>nr)) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
cycle step1
!goto 9999
end if
if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
cycle step1
!goto 9999
end if
icol(1:nz) = acsr%ja(acsr%irp(i):acsr%irp(i+1)-1)
@ -176,7 +342,7 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
!
! Build the set of all strongly coupled nodes
!
ip = 0
ip = 0
do k=1, nz
j = icol(k)
if ((1<=j).and.(j<=nr)) then
@ -194,8 +360,7 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
icnt = icnt + 1
if (disjoint) then
naggr = naggr + 1
do k=1, ip
ilaggr(icol(k)) = naggr
@ -204,16 +369,22 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
endif
enddo step1
!!$ write(0,*) 'NAGGR ',naggr
#endif
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 1:',count(ilaggr == -(nr+1))
& ' Check 1:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
if (do_timings) call psb_toc(idx_soc1_p1)
if (do_timings) call psb_tic(idx_soc1_p2)
!
! Phase two: join the neighbours
!
! $ omp workshare
tmpaggr = ilaggr
! $ omp end workshare
! $ omp parallel do schedule(static) shared(tmpaggr,ilaggr,nr,naggr,diag,theta) private(ii,i,j,k,nz,icol,val,ip)
step2: do ii=1,nr
i = idxs(ii)
@ -244,8 +415,15 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
end if
end do step2
! $ omp end parallel do
if (do_timings) call psb_toc(idx_soc1_p2)
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 1.5:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
if (do_timings) call psb_tic(idx_soc1_p3)
!
! Phase three: sweep over leftovers, if any
!
@ -274,7 +452,6 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
enddo
if (ip > 0) then
icnt = icnt + 1
naggr = naggr + 1
ilaggr(i) = naggr
do k=1, ip
@ -309,7 +486,7 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
endif
end if
end do
if (do_timings) call psb_toc(idx_soc1_p3)
if (naggr > ncol) then
!write(0,*) name,'Error : naggr > ncol',naggr,ncol
info=psb_err_internal_error_
@ -336,9 +513,14 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
nlaggr(:) = 0
nlaggr(me+1) = naggr
call psb_sum(ctxt,nlaggr(1:np))
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 2:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
!!$ write(0,*) nlaggr(1:np),'ILAGGR : ',ilaggr(1:nr)
call acsr%free()
call psb_erractionrestore(err_act)
return

@ -76,7 +76,7 @@ subroutine amg_s_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
integer(psb_ipk_) :: nrow, ncol, nrl, nzl, ip, nzt, i, k
integer(psb_lpk_) :: nrsave, ncsave, nzsave, nza
logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false.
integer(psb_ipk_), save :: idx_spspmm=-1
integer(psb_ipk_), save :: idx_spspmm=-1, idx_cpytrans1=-1, idx_cpytrans2=-1
name='amg_ptap_bld'
if(psb_get_errstatus().ne.0) return
@ -93,7 +93,11 @@ subroutine amg_s_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
ncol = desc_a%get_local_cols()
if ((do_timings).and.(idx_spspmm==-1)) &
& idx_spspmm = psb_get_timer_idx("SPMM_BLD: par_spspmm")
& idx_spspmm = psb_get_timer_idx("PTAP_BLD: par_spspmm")
if ((do_timings).and.(idx_cpytrans1==-1)) &
& idx_cpytrans1 = psb_get_timer_idx("PTAP_BLD: cpy&trans1")
if ((do_timings).and.(idx_cpytrans2==-1)) &
& idx_cpytrans2 = psb_get_timer_idx("PTAP_BLD: cpy&trans2")
naggr = nlaggr(me+1)
ntaggr = sum(nlaggr)
@ -128,6 +132,7 @@ subroutine amg_s_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
! Ok first product done.
if (present(desc_ax)) then
if (do_timings) call psb_tic(idx_cpytrans1)
block
call coo_prol%cp_to_coo(coo_restr,info)
call coo_restr%set_ncols(desc_ac%get_local_cols())
@ -137,7 +142,7 @@ subroutine amg_s_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
call coo_restr%set_ncols(desc_ax%get_local_cols())
end block
call csr_restr%cp_from_coo(coo_restr,info)
if (do_timings) call psb_toc(idx_cpytrans1)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='spcnv coo_restr')
goto 9999
@ -167,27 +172,28 @@ subroutine amg_s_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
call coo_restr%transp()
nzl = coo_restr%get_nzeros()
nrl = desc_ac%get_local_rows()
i=0
nrl = desc_ac%get_local_rows()
call coo_restr%fix(info)
i=coo_restr%get_nzeros()
!
! Only keep local rows
!
do k=1, nzl
if ((1 <= coo_restr%ia(k)) .and.(coo_restr%ia(k) <= nrl)) then
i = i+1
coo_restr%val(i) = coo_restr%val(k)
coo_restr%ia(i) = coo_restr%ia(k)
coo_restr%ja(i) = coo_restr%ja(k)
search: do k=i,1,-1
if (coo_restr%ia(k) <= nrl) then
call coo_restr%set_nzeros(k)
exit search
end if
end do
call coo_restr%set_nzeros(i)
call coo_restr%fix(info)
end do search
nzl = coo_restr%get_nzeros()
call coo_restr%set_nrows(desc_ac%get_local_rows())
call coo_restr%set_ncols(desc_a%get_local_cols())
if (debug) call check_coo(me,trim(name)//' Check 2 on coo_restr:',coo_restr)
if (do_timings) call psb_tic(idx_cpytrans2)
call csr_restr%cp_from_coo(coo_restr,info)
if (do_timings) call psb_toc(idx_cpytrans2)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='spcnv coo_restr')
goto 9999

@ -72,7 +72,9 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
use psb_base_mod
use amg_base_prec_type
use amg_s_inner_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
! Arguments
@ -99,6 +101,9 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
integer(psb_ipk_) :: nrow, ncol, n_ne
integer(psb_lpk_) :: nrglob
character(len=20) :: name, ch_err
integer(psb_ipk_), save :: idx_soc1_p1=-1, idx_soc1_p2=-1, idx_soc1_p3=-1
integer(psb_ipk_), save :: idx_soc1_p0=-1
logical, parameter :: do_timings=.true.
info=psb_success_
name = 'amg_soc1_map_bld'
@ -114,6 +119,14 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
nrglob = desc_a%get_global_rows()
if ((do_timings).and.(idx_soc1_p0==-1)) &
& idx_soc1_p0 = psb_get_timer_idx("SOC1_MAP: phase0")
if ((do_timings).and.(idx_soc1_p1==-1)) &
& idx_soc1_p1 = psb_get_timer_idx("SOC1_MAP: phase1")
if ((do_timings).and.(idx_soc1_p2==-1)) &
& idx_soc1_p2 = psb_get_timer_idx("SOC1_MAP: phase2")
if ((do_timings).and.(idx_soc1_p3==-1)) &
& idx_soc1_p3 = psb_get_timer_idx("SOC1_MAP: phase3")
nr = a%get_nrows()
nc = a%get_ncols()
@ -133,41 +146,194 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
goto 9999
end if
if (do_timings) call psb_tic(idx_soc1_p0)
call a%cp_to(acsr)
if (do_timings) call psb_toc(idx_soc1_p0)
if (clean_zeros) call acsr%clean_zeros(info)
if (iorder == amg_aggr_ord_nat_) then
!$omp parallel do private(i)
do i=1, nr
ilaggr(i) = -(nr+1)
idxs(i) = i
end do
else
!$omp end parallel do
else
!$omp parallel do private(i)
do i=1, nr
ilaggr(i) = -(nr+1)
ideg(i) = acsr%irp(i+1) - acsr%irp(i)
end do
!$omp end parallel do
call psb_msort(ideg,ix=idxs,dir=psb_sort_down_)
end if
if (do_timings) call psb_tic(idx_soc1_p1)
!
! Phase one: Start with disjoint groups.
!
naggr = 0
icnt = 0
#if 0&&defined(OPENMP)
block
integer(psb_ipk_), allocatable :: bnds(:), locnaggr(:)
integer(psb_ipk_) :: myth,nths, kk
!$omp parallel shared(bnds,locnaggr,ilaggr,nr,naggr,diag,theta,nths) private(icol,val,myth,kk)
block
integer(psb_ipk_) :: ii,nlp,k,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz, minip
nths = omp_get_num_threads()
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
!!$ write(0,*) 'From thread : rsz ',myth,rsz
!$omp master
allocate(bnds(0:nths),locnaggr(0:nths))
locnaggr(:) = 0
bnds(0) = 1
!$omp end master
!$omp barrier
bnds(myth+1) = rsz
!$omp master
!!$ write(0,*) 'From master 1: ',bnds
do i=1,nths
bnds(i) = bnds(i) + bnds(i-1)
end do
!!$ write(0,*) 'From master 2: ',bnds
!$omp end master
!$omp barrier
!$omp do schedule(static)
do kk=0, nths-1
!!$ write(0,*) 'From thread ',myth,kk,bnds(kk),bnds(kk+1)-1
step1: do ii=bnds(kk), bnds(kk+1)-1
if (info /= 0) cycle
i = idxs(ii)
if ((i<1).or.(i>nr)) then
info=psb_err_internal_error_
call psb_errpush(info,name)
cycle step1
!goto 9999
end if
if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then
info=psb_err_internal_error_
call psb_errpush(info,name)
cycle step1
!goto 9999
end if
icol(1:nz) = acsr%ja(acsr%irp(i):acsr%irp(i+1)-1)
val(1:nz) = acsr%val(acsr%irp(i):acsr%irp(i+1)-1)
!
! Build the set of all strongly coupled nodes
!
if (.false.) then
ip = 0
do k=1, nz
j = icol(k)
if ((bnds(myth)<=j).and.(j<=(bnds(myth+1)-1))) then
if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then
ip = ip + 1
icol(ip) = icol(k)
end if
end if
enddo
!
! If the whole strongly coupled neighborhood of I is
! as yet unconnected, turn it into the next aggregate.
! Same if ip==0 (in which case, neighborhood only
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1
do k=1, ip
ilaggr(icol(k)) = locnaggr(kk)
end do
ilaggr(i) = locnaggr(kk)
end if
else
ip = 0
minip = nr +1
do k=1, nz
j = icol(k)
if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then
ip = ip + 1
icol(ip) = icol(k)
minip = min(icol(ip),minip)
end if
enddo
if (bnds(myth)<=minip) then
!
! If the whole strongly coupled neighborhood of I is
! as yet unconnected, turn it into the next aggregate.
! Same if ip==0 (in which case, neighborhood only
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
!$omp critical(update_ilaggr)
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1
do k=1, ip
ilaggr(icol(k)) = locnaggr(kk)
end do
ilaggr(i) = locnaggr(kk)
end if
!$omp end critical(update_ilaggr)
end if
endif
end if
end if
enddo step1
end do
!$omp end do
!$omp barrier
!$omp master
naggr = sum(locnaggr(0:nths-1))
!!$ write(0,*) 'NAGGR ',naggr, 'locnaggr ',locnaggr(0:nths-1)
do i=1,nths
locnaggr(i) = locnaggr(i) + locnaggr(i-1)
end do
do i=nths,1,-1
locnaggr(i) = locnaggr(i-1)
end do
locnaggr(0) = 0
!$omp end master
!$omp barrier
!$omp do schedule(static)
do kk=0, nths-1
do ii=bnds(kk), bnds(kk+1)-1
if (ilaggr(ii) > 0) ilaggr(ii) = ilaggr(ii) + locnaggr(kk)
end do
end do
!$omp end do
end block
!$omp end parallel
end block
!!$ write(0,*) 'Out of parallel looop NAGGR ',naggr
#else
step1: do ii=1, nr
if (info /= 0) cycle
i = idxs(ii)
if ((i<1).or.(i>nr)) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
cycle step1
!goto 9999
end if
if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
cycle step1
!goto 9999
end if
icol(1:nz) = acsr%ja(acsr%irp(i):acsr%irp(i+1)-1)
@ -176,7 +342,7 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
!
! Build the set of all strongly coupled nodes
!
ip = 0
ip = 0
do k=1, nz
j = icol(k)
if ((1<=j).and.(j<=nr)) then
@ -194,8 +360,7 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
icnt = icnt + 1
if (disjoint) then
naggr = naggr + 1
do k=1, ip
ilaggr(icol(k)) = naggr
@ -204,16 +369,22 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
endif
enddo step1
!!$ write(0,*) 'NAGGR ',naggr
#endif
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 1:',count(ilaggr == -(nr+1))
& ' Check 1:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
if (do_timings) call psb_toc(idx_soc1_p1)
if (do_timings) call psb_tic(idx_soc1_p2)
!
! Phase two: join the neighbours
!
! $ omp workshare
tmpaggr = ilaggr
! $ omp end workshare
! $ omp parallel do schedule(static) shared(tmpaggr,ilaggr,nr,naggr,diag,theta) private(ii,i,j,k,nz,icol,val,ip)
step2: do ii=1,nr
i = idxs(ii)
@ -244,8 +415,15 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
end if
end do step2
! $ omp end parallel do
if (do_timings) call psb_toc(idx_soc1_p2)
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 1.5:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
if (do_timings) call psb_tic(idx_soc1_p3)
!
! Phase three: sweep over leftovers, if any
!
@ -274,7 +452,6 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
enddo
if (ip > 0) then
icnt = icnt + 1
naggr = naggr + 1
ilaggr(i) = naggr
do k=1, ip
@ -309,7 +486,7 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
endif
end if
end do
if (do_timings) call psb_toc(idx_soc1_p3)
if (naggr > ncol) then
!write(0,*) name,'Error : naggr > ncol',naggr,ncol
info=psb_err_internal_error_
@ -336,9 +513,14 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
nlaggr(:) = 0
nlaggr(me+1) = naggr
call psb_sum(ctxt,nlaggr(1:np))
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 2:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
!!$ write(0,*) nlaggr(1:np),'ILAGGR : ',ilaggr(1:nr)
call acsr%free()
call psb_erractionrestore(err_act)
return

@ -76,7 +76,7 @@ subroutine amg_z_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
integer(psb_ipk_) :: nrow, ncol, nrl, nzl, ip, nzt, i, k
integer(psb_lpk_) :: nrsave, ncsave, nzsave, nza
logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false.
integer(psb_ipk_), save :: idx_spspmm=-1
integer(psb_ipk_), save :: idx_spspmm=-1, idx_cpytrans1=-1, idx_cpytrans2=-1
name='amg_ptap_bld'
if(psb_get_errstatus().ne.0) return
@ -93,7 +93,11 @@ subroutine amg_z_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
ncol = desc_a%get_local_cols()
if ((do_timings).and.(idx_spspmm==-1)) &
& idx_spspmm = psb_get_timer_idx("SPMM_BLD: par_spspmm")
& idx_spspmm = psb_get_timer_idx("PTAP_BLD: par_spspmm")
if ((do_timings).and.(idx_cpytrans1==-1)) &
& idx_cpytrans1 = psb_get_timer_idx("PTAP_BLD: cpy&trans1")
if ((do_timings).and.(idx_cpytrans2==-1)) &
& idx_cpytrans2 = psb_get_timer_idx("PTAP_BLD: cpy&trans2")
naggr = nlaggr(me+1)
ntaggr = sum(nlaggr)
@ -128,6 +132,7 @@ subroutine amg_z_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
! Ok first product done.
if (present(desc_ax)) then
if (do_timings) call psb_tic(idx_cpytrans1)
block
call coo_prol%cp_to_coo(coo_restr,info)
call coo_restr%set_ncols(desc_ac%get_local_cols())
@ -137,7 +142,7 @@ subroutine amg_z_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
call coo_restr%set_ncols(desc_ax%get_local_cols())
end block
call csr_restr%cp_from_coo(coo_restr,info)
if (do_timings) call psb_toc(idx_cpytrans1)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='spcnv coo_restr')
goto 9999
@ -167,27 +172,28 @@ subroutine amg_z_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,&
call coo_restr%transp()
nzl = coo_restr%get_nzeros()
nrl = desc_ac%get_local_rows()
i=0
nrl = desc_ac%get_local_rows()
call coo_restr%fix(info)
i=coo_restr%get_nzeros()
!
! Only keep local rows
!
do k=1, nzl
if ((1 <= coo_restr%ia(k)) .and.(coo_restr%ia(k) <= nrl)) then
i = i+1
coo_restr%val(i) = coo_restr%val(k)
coo_restr%ia(i) = coo_restr%ia(k)
coo_restr%ja(i) = coo_restr%ja(k)
search: do k=i,1,-1
if (coo_restr%ia(k) <= nrl) then
call coo_restr%set_nzeros(k)
exit search
end if
end do
call coo_restr%set_nzeros(i)
call coo_restr%fix(info)
end do search
nzl = coo_restr%get_nzeros()
call coo_restr%set_nrows(desc_ac%get_local_rows())
call coo_restr%set_ncols(desc_a%get_local_cols())
if (debug) call check_coo(me,trim(name)//' Check 2 on coo_restr:',coo_restr)
if (do_timings) call psb_tic(idx_cpytrans2)
call csr_restr%cp_from_coo(coo_restr,info)
if (do_timings) call psb_toc(idx_cpytrans2)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='spcnv coo_restr')
goto 9999

@ -72,7 +72,9 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
use psb_base_mod
use amg_base_prec_type
use amg_z_inner_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
! Arguments
@ -99,6 +101,9 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
integer(psb_ipk_) :: nrow, ncol, n_ne
integer(psb_lpk_) :: nrglob
character(len=20) :: name, ch_err
integer(psb_ipk_), save :: idx_soc1_p1=-1, idx_soc1_p2=-1, idx_soc1_p3=-1
integer(psb_ipk_), save :: idx_soc1_p0=-1
logical, parameter :: do_timings=.true.
info=psb_success_
name = 'amg_soc1_map_bld'
@ -114,6 +119,14 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
nrglob = desc_a%get_global_rows()
if ((do_timings).and.(idx_soc1_p0==-1)) &
& idx_soc1_p0 = psb_get_timer_idx("SOC1_MAP: phase0")
if ((do_timings).and.(idx_soc1_p1==-1)) &
& idx_soc1_p1 = psb_get_timer_idx("SOC1_MAP: phase1")
if ((do_timings).and.(idx_soc1_p2==-1)) &
& idx_soc1_p2 = psb_get_timer_idx("SOC1_MAP: phase2")
if ((do_timings).and.(idx_soc1_p3==-1)) &
& idx_soc1_p3 = psb_get_timer_idx("SOC1_MAP: phase3")
nr = a%get_nrows()
nc = a%get_ncols()
@ -133,41 +146,194 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
goto 9999
end if
if (do_timings) call psb_tic(idx_soc1_p0)
call a%cp_to(acsr)
if (do_timings) call psb_toc(idx_soc1_p0)
if (clean_zeros) call acsr%clean_zeros(info)
if (iorder == amg_aggr_ord_nat_) then
!$omp parallel do private(i)
do i=1, nr
ilaggr(i) = -(nr+1)
idxs(i) = i
end do
else
!$omp end parallel do
else
!$omp parallel do private(i)
do i=1, nr
ilaggr(i) = -(nr+1)
ideg(i) = acsr%irp(i+1) - acsr%irp(i)
end do
!$omp end parallel do
call psb_msort(ideg,ix=idxs,dir=psb_sort_down_)
end if
if (do_timings) call psb_tic(idx_soc1_p1)
!
! Phase one: Start with disjoint groups.
!
naggr = 0
icnt = 0
#if 0&&defined(OPENMP)
block
integer(psb_ipk_), allocatable :: bnds(:), locnaggr(:)
integer(psb_ipk_) :: myth,nths, kk
!$omp parallel shared(bnds,locnaggr,ilaggr,nr,naggr,diag,theta,nths) private(icol,val,myth,kk)
block
integer(psb_ipk_) :: ii,nlp,k,n,ia,isz, nc, i,j,m, nz, ilg, ip, rsz, minip
nths = omp_get_num_threads()
myth = omp_get_thread_num()
rsz = nr/nths
if (myth < mod(nr,nths)) rsz = rsz + 1
!!$ write(0,*) 'From thread : rsz ',myth,rsz
!$omp master
allocate(bnds(0:nths),locnaggr(0:nths))
locnaggr(:) = 0
bnds(0) = 1
!$omp end master
!$omp barrier
bnds(myth+1) = rsz
!$omp master
!!$ write(0,*) 'From master 1: ',bnds
do i=1,nths
bnds(i) = bnds(i) + bnds(i-1)
end do
!!$ write(0,*) 'From master 2: ',bnds
!$omp end master
!$omp barrier
!$omp do schedule(static)
do kk=0, nths-1
!!$ write(0,*) 'From thread ',myth,kk,bnds(kk),bnds(kk+1)-1
step1: do ii=bnds(kk), bnds(kk+1)-1
if (info /= 0) cycle
i = idxs(ii)
if ((i<1).or.(i>nr)) then
info=psb_err_internal_error_
call psb_errpush(info,name)
cycle step1
!goto 9999
end if
if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then
info=psb_err_internal_error_
call psb_errpush(info,name)
cycle step1
!goto 9999
end if
icol(1:nz) = acsr%ja(acsr%irp(i):acsr%irp(i+1)-1)
val(1:nz) = acsr%val(acsr%irp(i):acsr%irp(i+1)-1)
!
! Build the set of all strongly coupled nodes
!
if (.false.) then
ip = 0
do k=1, nz
j = icol(k)
if ((bnds(myth)<=j).and.(j<=(bnds(myth+1)-1))) then
if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then
ip = ip + 1
icol(ip) = icol(k)
end if
end if
enddo
!
! If the whole strongly coupled neighborhood of I is
! as yet unconnected, turn it into the next aggregate.
! Same if ip==0 (in which case, neighborhood only
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1
do k=1, ip
ilaggr(icol(k)) = locnaggr(kk)
end do
ilaggr(i) = locnaggr(kk)
end if
else
ip = 0
minip = nr +1
do k=1, nz
j = icol(k)
if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then
ip = ip + 1
icol(ip) = icol(k)
minip = min(icol(ip),minip)
end if
enddo
if (bnds(myth)<=minip) then
!
! If the whole strongly coupled neighborhood of I is
! as yet unconnected, turn it into the next aggregate.
! Same if ip==0 (in which case, neighborhood only
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
!$omp critical(update_ilaggr)
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
locnaggr(kk) = locnaggr(kk) + 1
do k=1, ip
ilaggr(icol(k)) = locnaggr(kk)
end do
ilaggr(i) = locnaggr(kk)
end if
!$omp end critical(update_ilaggr)
end if
endif
end if
end if
enddo step1
end do
!$omp end do
!$omp barrier
!$omp master
naggr = sum(locnaggr(0:nths-1))
!!$ write(0,*) 'NAGGR ',naggr, 'locnaggr ',locnaggr(0:nths-1)
do i=1,nths
locnaggr(i) = locnaggr(i) + locnaggr(i-1)
end do
do i=nths,1,-1
locnaggr(i) = locnaggr(i-1)
end do
locnaggr(0) = 0
!$omp end master
!$omp barrier
!$omp do schedule(static)
do kk=0, nths-1
do ii=bnds(kk), bnds(kk+1)-1
if (ilaggr(ii) > 0) ilaggr(ii) = ilaggr(ii) + locnaggr(kk)
end do
end do
!$omp end do
end block
!$omp end parallel
end block
!!$ write(0,*) 'Out of parallel looop NAGGR ',naggr
#else
step1: do ii=1, nr
if (info /= 0) cycle
i = idxs(ii)
if ((i<1).or.(i>nr)) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
cycle step1
!goto 9999
end if
if (ilaggr(i) == -(nr+1)) then
nz = (acsr%irp(i+1)-acsr%irp(i))
if ((nz<0).or.(nz>size(icol))) then
info=psb_err_internal_error_
call psb_errpush(info,name)
goto 9999
cycle step1
!goto 9999
end if
icol(1:nz) = acsr%ja(acsr%irp(i):acsr%irp(i+1)-1)
@ -176,7 +342,7 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
!
! Build the set of all strongly coupled nodes
!
ip = 0
ip = 0
do k=1, nz
j = icol(k)
if ((1<=j).and.(j<=nr)) then
@ -194,8 +360,7 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
! contains I even if it does not look like it from matrix)
!
disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0)
if (disjoint) then
icnt = icnt + 1
if (disjoint) then
naggr = naggr + 1
do k=1, ip
ilaggr(icol(k)) = naggr
@ -204,16 +369,22 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
endif
enddo step1
!!$ write(0,*) 'NAGGR ',naggr
#endif
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 1:',count(ilaggr == -(nr+1))
& ' Check 1:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
if (do_timings) call psb_toc(idx_soc1_p1)
if (do_timings) call psb_tic(idx_soc1_p2)
!
! Phase two: join the neighbours
!
! $ omp workshare
tmpaggr = ilaggr
! $ omp end workshare
! $ omp parallel do schedule(static) shared(tmpaggr,ilaggr,nr,naggr,diag,theta) private(ii,i,j,k,nz,icol,val,ip)
step2: do ii=1,nr
i = idxs(ii)
@ -244,8 +415,15 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
end if
end do step2
! $ omp end parallel do
if (do_timings) call psb_toc(idx_soc1_p2)
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 1.5:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
if (do_timings) call psb_tic(idx_soc1_p3)
!
! Phase three: sweep over leftovers, if any
!
@ -274,7 +452,6 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
end if
enddo
if (ip > 0) then
icnt = icnt + 1
naggr = naggr + 1
ilaggr(i) = naggr
do k=1, ip
@ -309,7 +486,7 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
endif
end if
end do
if (do_timings) call psb_toc(idx_soc1_p3)
if (naggr > ncol) then
!write(0,*) name,'Error : naggr > ncol',naggr,ncol
info=psb_err_internal_error_
@ -336,9 +513,14 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in
nlaggr(:) = 0
nlaggr(me+1) = naggr
call psb_sum(ctxt,nlaggr(1:np))
if (debug_level >= psb_debug_outer_) then
write(debug_unit,*) me,' ',trim(name),&
& ' Check 2:',naggr,count(ilaggr(1:nr) == -(nr+1)), count(ilaggr(1:nr)>0),&
& count(ilaggr(1:nr) == -(nr+1))+count(ilaggr(1:nr)>0),nr
end if
!!$ write(0,*) nlaggr(1:np),'ILAGGR : ',ilaggr(1:nr)
call acsr%free()
call psb_erractionrestore(err_act)
return

@ -56,6 +56,8 @@ subroutine amg_c_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, i, err_act, debug_unit, debug_level
character(len=20) :: name='d_bwgs_solver_bld', ch_err
integer(psb_ipk_), save :: idx_tril=-1
logical, parameter :: do_timings=.true.
info=psb_success_
call psb_erractionsave(err_act)
@ -65,6 +67,8 @@ subroutine amg_c_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
call psb_info(ctxt, me, np)
if (debug_level >= psb_debug_outer_) &
& write(debug_unit,*) me,' ',trim(name),' start'
if ((do_timings).and.(idx_tril==-1)) &
& idx_tril = psb_get_timer_idx("BWGS_BLD: tril")
n_row = desc_a%get_local_rows()
@ -77,7 +81,10 @@ subroutine amg_c_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
! This cuts out the off-diagonal part, because it's supposed to
! be handled by the outer Jacobi smoother.
!
!write(0,*) 'Calling A%TRIL in bwgs_solver_bld'
if (do_timings) call psb_tic(idx_tril)
call a%tril(sv%l,info,diag=-ione,jmax=nrow_a,u=sv%u)
if (do_timings) call psb_toc(idx_tril)
else

@ -56,6 +56,8 @@ subroutine amg_c_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, i, err_act, debug_unit, debug_level
character(len=20) :: name='c_gs_solver_bld', ch_err
integer(psb_ipk_), save :: idx_tril=-1
logical, parameter :: do_timings=.true.
info=psb_success_
call psb_erractionsave(err_act)
@ -65,6 +67,8 @@ subroutine amg_c_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
call psb_info(ctxt, me, np)
if (debug_level >= psb_debug_outer_) &
& write(debug_unit,*) me,' ',trim(name),' start'
if ((do_timings).and.(idx_tril==-1)) &
& idx_tril = psb_get_timer_idx("GS_BLD: tril")
n_row = desc_a%get_local_rows()
@ -76,9 +80,12 @@ subroutine amg_c_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
!
! This cuts out the off-diagonal part, because it's supposed to
! be handled by the outer Jacobi smoother.
!
!
!write(0,*) 'Calling A%TRIL in gs_solver_bld'
if (do_timings) call psb_tic(idx_tril)
call a%tril(sv%l,info,diag=izero,jmax=nrow_a,u=sv%u)
if (do_timings) call psb_toc(idx_tril)
!write(0,*) 'From A%TRIL in gs_solver_bld',a%get_nzeros(),sv%l%get_nzeros(),sv%u%get_nzeros()
else
info = psb_err_missing_override_method_

@ -56,6 +56,8 @@ subroutine amg_d_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, i, err_act, debug_unit, debug_level
character(len=20) :: name='d_bwgs_solver_bld', ch_err
integer(psb_ipk_), save :: idx_tril=-1
logical, parameter :: do_timings=.true.
info=psb_success_
call psb_erractionsave(err_act)
@ -65,6 +67,8 @@ subroutine amg_d_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
call psb_info(ctxt, me, np)
if (debug_level >= psb_debug_outer_) &
& write(debug_unit,*) me,' ',trim(name),' start'
if ((do_timings).and.(idx_tril==-1)) &
& idx_tril = psb_get_timer_idx("BWGS_BLD: tril")
n_row = desc_a%get_local_rows()
@ -77,7 +81,10 @@ subroutine amg_d_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
! This cuts out the off-diagonal part, because it's supposed to
! be handled by the outer Jacobi smoother.
!
!write(0,*) 'Calling A%TRIL in bwgs_solver_bld'
if (do_timings) call psb_tic(idx_tril)
call a%tril(sv%l,info,diag=-ione,jmax=nrow_a,u=sv%u)
if (do_timings) call psb_toc(idx_tril)
else

@ -56,6 +56,8 @@ subroutine amg_d_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, i, err_act, debug_unit, debug_level
character(len=20) :: name='d_gs_solver_bld', ch_err
integer(psb_ipk_), save :: idx_tril=-1
logical, parameter :: do_timings=.true.
info=psb_success_
call psb_erractionsave(err_act)
@ -65,6 +67,8 @@ subroutine amg_d_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
call psb_info(ctxt, me, np)
if (debug_level >= psb_debug_outer_) &
& write(debug_unit,*) me,' ',trim(name),' start'
if ((do_timings).and.(idx_tril==-1)) &
& idx_tril = psb_get_timer_idx("GS_BLD: tril")
n_row = desc_a%get_local_rows()
@ -76,9 +80,12 @@ subroutine amg_d_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
!
! This cuts out the off-diagonal part, because it's supposed to
! be handled by the outer Jacobi smoother.
!
!
!write(0,*) 'Calling A%TRIL in gs_solver_bld'
if (do_timings) call psb_tic(idx_tril)
call a%tril(sv%l,info,diag=izero,jmax=nrow_a,u=sv%u)
if (do_timings) call psb_toc(idx_tril)
!write(0,*) 'From A%TRIL in gs_solver_bld',a%get_nzeros(),sv%l%get_nzeros(),sv%u%get_nzeros()
else
info = psb_err_missing_override_method_

@ -56,6 +56,8 @@ subroutine amg_s_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, i, err_act, debug_unit, debug_level
character(len=20) :: name='d_bwgs_solver_bld', ch_err
integer(psb_ipk_), save :: idx_tril=-1
logical, parameter :: do_timings=.true.
info=psb_success_
call psb_erractionsave(err_act)
@ -65,6 +67,8 @@ subroutine amg_s_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
call psb_info(ctxt, me, np)
if (debug_level >= psb_debug_outer_) &
& write(debug_unit,*) me,' ',trim(name),' start'
if ((do_timings).and.(idx_tril==-1)) &
& idx_tril = psb_get_timer_idx("BWGS_BLD: tril")
n_row = desc_a%get_local_rows()
@ -77,7 +81,10 @@ subroutine amg_s_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
! This cuts out the off-diagonal part, because it's supposed to
! be handled by the outer Jacobi smoother.
!
!write(0,*) 'Calling A%TRIL in bwgs_solver_bld'
if (do_timings) call psb_tic(idx_tril)
call a%tril(sv%l,info,diag=-ione,jmax=nrow_a,u=sv%u)
if (do_timings) call psb_toc(idx_tril)
else

@ -56,6 +56,8 @@ subroutine amg_s_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, i, err_act, debug_unit, debug_level
character(len=20) :: name='s_gs_solver_bld', ch_err
integer(psb_ipk_), save :: idx_tril=-1
logical, parameter :: do_timings=.true.
info=psb_success_
call psb_erractionsave(err_act)
@ -65,6 +67,8 @@ subroutine amg_s_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
call psb_info(ctxt, me, np)
if (debug_level >= psb_debug_outer_) &
& write(debug_unit,*) me,' ',trim(name),' start'
if ((do_timings).and.(idx_tril==-1)) &
& idx_tril = psb_get_timer_idx("GS_BLD: tril")
n_row = desc_a%get_local_rows()
@ -76,9 +80,12 @@ subroutine amg_s_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
!
! This cuts out the off-diagonal part, because it's supposed to
! be handled by the outer Jacobi smoother.
!
!
!write(0,*) 'Calling A%TRIL in gs_solver_bld'
if (do_timings) call psb_tic(idx_tril)
call a%tril(sv%l,info,diag=izero,jmax=nrow_a,u=sv%u)
if (do_timings) call psb_toc(idx_tril)
!write(0,*) 'From A%TRIL in gs_solver_bld',a%get_nzeros(),sv%l%get_nzeros(),sv%u%get_nzeros()
else
info = psb_err_missing_override_method_

@ -56,6 +56,8 @@ subroutine amg_z_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, i, err_act, debug_unit, debug_level
character(len=20) :: name='d_bwgs_solver_bld', ch_err
integer(psb_ipk_), save :: idx_tril=-1
logical, parameter :: do_timings=.true.
info=psb_success_
call psb_erractionsave(err_act)
@ -65,6 +67,8 @@ subroutine amg_z_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
call psb_info(ctxt, me, np)
if (debug_level >= psb_debug_outer_) &
& write(debug_unit,*) me,' ',trim(name),' start'
if ((do_timings).and.(idx_tril==-1)) &
& idx_tril = psb_get_timer_idx("BWGS_BLD: tril")
n_row = desc_a%get_local_rows()
@ -77,7 +81,10 @@ subroutine amg_z_bwgs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
! This cuts out the off-diagonal part, because it's supposed to
! be handled by the outer Jacobi smoother.
!
!write(0,*) 'Calling A%TRIL in bwgs_solver_bld'
if (do_timings) call psb_tic(idx_tril)
call a%tril(sv%l,info,diag=-ione,jmax=nrow_a,u=sv%u)
if (do_timings) call psb_toc(idx_tril)
else

@ -56,6 +56,8 @@ subroutine amg_z_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, i, err_act, debug_unit, debug_level
character(len=20) :: name='z_gs_solver_bld', ch_err
integer(psb_ipk_), save :: idx_tril=-1
logical, parameter :: do_timings=.true.
info=psb_success_
call psb_erractionsave(err_act)
@ -65,6 +67,8 @@ subroutine amg_z_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
call psb_info(ctxt, me, np)
if (debug_level >= psb_debug_outer_) &
& write(debug_unit,*) me,' ',trim(name),' start'
if ((do_timings).and.(idx_tril==-1)) &
& idx_tril = psb_get_timer_idx("GS_BLD: tril")
n_row = desc_a%get_local_rows()
@ -76,9 +80,12 @@ subroutine amg_z_gs_solver_bld(a,desc_a,sv,info,b,amold,vmold,imold)
!
! This cuts out the off-diagonal part, because it's supposed to
! be handled by the outer Jacobi smoother.
!
!
!write(0,*) 'Calling A%TRIL in gs_solver_bld'
if (do_timings) call psb_tic(idx_tril)
call a%tril(sv%l,info,diag=izero,jmax=nrow_a,u=sv%u)
if (do_timings) call psb_toc(idx_tril)
!write(0,*) 'From A%TRIL in gs_solver_bld',a%get_nzeros(),sv%l%get_nzeros(),sv%u%get_nzeros()
else
info = psb_err_missing_override_method_

@ -195,7 +195,7 @@ program amg_d_pde3d
! other variables
integer(psb_ipk_) :: info, i, k
character(len=20) :: name,ch_err
type(psb_d_csr_sparse_mat) :: amold
info=psb_success_
@ -402,7 +402,7 @@ program amg_d_pde3d
end if
call psb_barrier(ctxt)
t1 = psb_wtime()
call prec%smoothers_build(a,desc_a,info)
call prec%smoothers_build(a,desc_a,info,amold=amold)
tprec = psb_wtime()-t1
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='amg_smoothers_bld')
@ -492,7 +492,7 @@ program amg_d_pde3d
write(psb_out_unit,'("Storage format for DESC_A : ",a )') desc_a%get_fmt()
end if
call psb_print_timers(ctxt)
!
! cleanup storage and exit
!

@ -195,7 +195,7 @@ program amg_s_pde3d
! other variables
integer(psb_ipk_) :: info, i, k
character(len=20) :: name,ch_err
type(psb_s_csr_sparse_mat) :: amold
info=psb_success_
@ -402,7 +402,7 @@ program amg_s_pde3d
end if
call psb_barrier(ctxt)
t1 = psb_wtime()
call prec%smoothers_build(a,desc_a,info)
call prec%smoothers_build(a,desc_a,info,amold=amold)
tprec = psb_wtime()-t1
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='amg_smoothers_bld')
@ -492,7 +492,7 @@ program amg_s_pde3d
write(psb_out_unit,'("Storage format for DESC_A : ",a )') desc_a%get_fmt()
end if
call psb_print_timers(ctxt)
!
! cleanup storage and exit
!

@ -1,6 +1,6 @@
%%%%%%%%%%% General arguments % Lines starting with % are ignored.
CSR ! Storage format CSR COO JAD
0200 ! IDIM; domain size. Linear system size is IDIM**3
0200 ! IDIM; domain size. Linear system size is IDIM**3
CONST ! PDECOEFF: CONST, EXP, GAUSS Coefficients of the PDE
BICGSTAB ! Iterative method: BiCGSTAB BiCGSTABL BiCG CG CGS FCG GCR RGMRES
2 ! ISTOPC
@ -9,7 +9,7 @@ BICGSTAB ! Iterative method: BiCGSTAB BiCGSTABL BiCG CG CGS F
30 ! IRST (restart for RGMRES and BiCGSTABL)
1.d-6 ! EPS
%%%%%%%%%%% Main preconditioner choices %%%%%%%%%%%%%%%%
ML-VCYCLE-BJAC-D-BJAC ! Longer descriptive name for preconditioner (up to 20 chars)
ML-VBM-VCYCLE-FBGS-D-BJAC ! Longer descriptive name for preconditioner (up to 20 chars)
ML ! Preconditioner type: NONE JACOBI GS FBGS BJAC AS ML
%%%%%%%%%%% First smoother (for all levels but coarsest) %%%%%%%%%%%%%%%%
FBGS ! Smoother type JACOBI FBGS GS BWGS BJAC AS. For 1-level, repeats previous.
@ -39,8 +39,8 @@ VCYCLE ! Type of multilevel CYCLE: VCYCLE WCYCLE KCYCLE MUL
-3 ! Max Number of levels in a multilevel preconditioner; if <0, lib default
-3 ! Target coarse matrix size per process; if <0, lib default
SMOOTHED ! Type of aggregation: SMOOTHED UNSMOOTHED
COUPLED ! Parallel aggregation: DEC, SYMDEC, COUPLED
MATCHBOXP ! aggregation measure SOC1, MATCHBOXP
DEC ! Parallel aggregation: DEC, SYMDEC, COUPLED
SOC1 ! aggregation measure SOC1, MATCHBOXP
8 ! Requested size of the aggregates for MATCHBOXP
NATURAL ! Ordering of aggregation NATURAL DEGREE
-1.5 ! Coarsening ratio, if < 0 use library default

Loading…
Cancel
Save