Fix performance of owner search in descriptor assembly

newG2L
Salvatore Filippone 4 years ago
parent a9d2c73783
commit 321814d247

@ -79,7 +79,7 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info,samesize)
integer(psb_lpk_) :: mglob, ih integer(psb_lpk_) :: mglob, ih
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, nresp integer(psb_ipk_) :: np,me, nresp
logical, parameter :: use_psi_adj=.true. logical, parameter :: use_psi_adj=.false.
real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx
character(len=20) :: name character(len=20) :: name
logical :: samesize_ logical :: samesize_
@ -113,7 +113,7 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info,samesize)
samesize_ = .false. samesize_ = .false.
end if end if
nv = size(idx) nv = size(idx)
! write(0,*) me,name,' :',use_psi_adj,samesize_,nv !if (me == 0) write(0,*) me,name,' :',use_psi_adj,samesize_,nv
if (use_psi_adj) then if (use_psi_adj) then
! !
! Reuse the adjcncy version by tricking it with an adjcncy list ! Reuse the adjcncy version by tricking it with an adjcncy list
@ -152,6 +152,7 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info,samesize)
call mpi_reduce_scatter_block(lclidx,iprc,nv,psb_mpi_ipk_,mpi_max,icomm,minfo) call mpi_reduce_scatter_block(lclidx,iprc,nv,psb_mpi_ipk_,mpi_max,icomm,minfo)
else else
! if (me == 0) write(0,*) 'a2a_fnd_owner : version 3'
! !
! 1. allgetherv ! 1. allgetherv
! 2. local conversion ! 2. local conversion

@ -144,8 +144,8 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
goto 9999 goto 9999
end if end if
iprc = -1 iprc = -1
! write(0,*) me,name,' Going through ',nidx,nadj
xchg_alg = psi_get_adj_alg() xchg_alg = psi_get_adj_alg()
!if (me == 0) write(0,*) me,'adj_fnd_owner alg: ',xchg_alg,' Going through ',nidx,nadj
select case(xchg_alg) select case(xchg_alg)
case(psi_adj_fnd_a2av_) case(psi_adj_fnd_a2av_)
if (do_timings) call psb_tic(idx_phase1) if (do_timings) call psb_tic(idx_phase1)

@ -39,8 +39,8 @@
! Arguments: ! Arguments:
! idx(:) - integer Required indices on the calling process. ! idx(:) - integer Required indices on the calling process.
! !
! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding ! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers
! indices ! for the corresponding indices
! idxmap - class(psb_indx_map). The index map ! idxmap - class(psb_indx_map). The index map
! info - integer. return code. ! info - integer. return code.
! !
@ -102,7 +102,7 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
integer(psb_mpk_) :: icomm, minfo integer(psb_mpk_) :: icomm, minfo
integer(psb_ipk_) :: i,n_row,n_col,err_act,ip,j,ipnt, nsampl_out,& integer(psb_ipk_) :: i,n_row,n_col,err_act,ip,j,ipnt, nsampl_out,&
& nv, n_answers, nqries, nsampl_in, locr_max, & & nv, n_answers, nqries, nsampl_in, locr_max, &
& nqries_max, nadj, maxspace, mxnsin & nqries_max, nadj, maxspace, mxnsin, mnnsin
integer(psb_lpk_) :: mglob, ih integer(psb_lpk_) :: mglob, ih
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, nresp integer(psb_ipk_) :: np,me, nresp
@ -186,15 +186,15 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
maxspace = nt*locr_max maxspace = nt*locr_max
if (tmpv(4) > 0) maxspace = min(maxspace,tmpv(4)) if (tmpv(4) > 0) maxspace = min(maxspace,tmpv(4))
maxspace = max(maxspace,np) maxspace = max(maxspace,np)
if (trace.and.(me == 0)) write(0,*) ' Through graph_fnd_owner with maxspace:',maxspace if (trace.and.(me == 0)) write(0,*) ' Through graph_fnd_owner with maxspace:',maxspace,maxspace/np
if (do_timings) call psb_tic(idx_sweep0) if (do_timings) call psb_tic(idx_sweep0)
if ((tmpv(1) > 0).and.(tmpv(2) >0)) then if ((tmpv(1) > 0).and.(tmpv(2) >0)) then
! !
! Do a preliminary run on the user-defined adjacency lists ! Do a preliminary run on the user-defined adjacency lists
! !
if (trace.and.(me == 0)) write(0,*) ' Initial sweep on user-defined topology'
if (debugsz) write(0,*) me,' Initial sweep on user-defined topology',nqries if (debugsz) write(0,*) me,' Initial sweep on user-defined topology',nqries
nsampl_in = min(nqries,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) nsampl_in = min(nqries,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj)))
if (trace.and.(me == 0)) write(0,*) ' Initial sweep on user-defined topology',nsampl_in
call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers) call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers)
call idxmap%xtnd_p_adjcncy(ladj) call idxmap%xtnd_p_adjcncy(ladj)
nqries = nv - n_answers nqries = nv - n_answers
@ -216,7 +216,7 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
! 1. Select a sample such that the total size is <= maxspace ! 1. Select a sample such that the total size is <= maxspace
! sample query is then sent to all processes ! sample query is then sent to all processes
! !
! if (trace.and.(me == 0)) write(0,*) 'Looping in graph_fnd_owner: ', nqries_max if (trace.and.(me == 0)) write(0,*) 'Looping in graph_fnd_owner: ', nqries_max
nsampl_in = nqries nsampl_in = nqries
nsampl_in = min(max(1,(maxspace+np-1)/np),nsampl_in) nsampl_in = min(max(1,(maxspace+np-1)/np),nsampl_in)
! !
@ -231,7 +231,7 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
! 2. Do a search on all processes; this is supposed to find ! 2. Do a search on all processes; this is supposed to find
! the owning process for all inputs; ! the owning process for all inputs;
! !
call psi_a2a_fnd_owner(tidx(1:nsampl_in),tprc,idxmap,info, samesize=.true.) call psi_a2a_fnd_owner(tidx(1:nsampl_in),tprc,idxmap,info)
if (debugsz) write(0,*) me,' From a2a_fnd_owner ',info if (debugsz) write(0,*) me,' From a2a_fnd_owner ',info
! !
! We might have padded when looking for owners, so the actual samples ! We might have padded when looking for owners, so the actual samples
@ -260,12 +260,17 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
! Need to set up a proper loop here to have a complete ! Need to set up a proper loop here to have a complete
! sweep over the input vector. Done inside adj_fnd_sweep. ! sweep over the input vector. Done inside adj_fnd_sweep.
! !
!!$ write(0,*) me,' After a2a ',nqries !write(0,*) me,' After a2a ',nqries
nsampl_in = min(nqries,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) nsampl_in = min(nqries,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj)))
mxnsin = nsampl_in mxnsin = nsampl_in
call psb_max(ctxt,mxnsin) call psb_max(ctxt,mxnsin)
!!$ write(0,*) me, ' mxnsin ',mxnsin ! mnnsin = nsampl_in
if (mxnsin>0) call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers) ! if (mnnsin==0) mnnsin=HUGE(mnnsin)
! call psb_min(ctxt,mnnsin)
! write(0,*) me, ' mxnsin ',mxnsin
if (trace.and.(me == 0)) write(0,*) ' Further sweep',nsampl_in, mxnsin, mnnsin
if (mxnsin>0) call psi_adj_fnd_sweep(idx(n_answers+1:),iprc(n_answers+1:),ladj,&
& idxmap,nsampl_in,n_answers)
call idxmap%xtnd_p_adjcncy(ladj) call idxmap%xtnd_p_adjcncy(ladj)
nqries = nv - n_answers nqries = nv - n_answers
@ -293,7 +298,7 @@ contains
integer(psb_ipk_), intent(out) :: tsmpl(:), ns_out integer(psb_ipk_), intent(out) :: tsmpl(:), ns_out
logical, intent(in), optional :: pad logical, intent(in), optional :: pad
! !
integer(psb_ipk_) :: nv, ns, k integer(psb_ipk_) :: nv, ns, k, ipnt
logical :: pad_ logical :: pad_
if (present(pad)) then if (present(pad)) then
@ -312,7 +317,7 @@ contains
! Make sure we sweep through the entire vector immediately. ! Make sure we sweep through the entire vector immediately.
! But also make sure we do not overrun tsmpl ! But also make sure we do not overrun tsmpl
! !
if (ns == 0) ns = nv !if (ns == 0) ns = nv
ns = min(ns,size(tsmpl)) ns = min(ns,size(tsmpl))
ns_out = 0 ns_out = 0
@ -362,7 +367,7 @@ contains
class(psb_indx_map), intent(inout) :: idxmap class(psb_indx_map), intent(inout) :: idxmap
! !
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: ipnt, ns_in, ns_out, n_rem, me, np, isw integer(psb_ipk_) :: ipnt, ns_in, ns_out, n_rem, me, np, isw, n_reml
integer(psb_lpk_), allocatable :: tidx(:) integer(psb_lpk_), allocatable :: tidx(:)
integer(psb_ipk_), allocatable :: tsmpl(:) integer(psb_ipk_), allocatable :: tsmpl(:)
@ -374,20 +379,21 @@ contains
isw = 1 isw = 1
do do
!write(0,*) me,' Into sampling ',n_samples !write(0,*) me,' Into sampling ',n_samples
call psi_get_sample(ipnt, idx,iprc,tidx,tsmpl,n_samples,ns_out) call psi_get_sample(ipnt, idx,iprc,tidx,tsmpl,n_samples,ns_out,pad=.true.)
ns_in = min(n_samples,ns_out) ns_in = min(n_samples,ns_out)
!write(0,*) me,' From second sampling ',ns_out !write(0,*) me,' From second sampling ',ns_out, ns_in
call psi_adjcncy_fnd_owner(tidx(1:ns_in),tprc,ladj,idxmap,info) call psi_adjcncy_fnd_owner(tidx(1:ns_in),tprc,ladj,idxmap,info)
call psi_cpy_out(iprc,tprc,tsmpl,ns_in,ns_out) call psi_cpy_out(iprc,tprc,tsmpl,ns_in,ns_out)
!write(0,*) me,' Sweep ',isw,' answers:',ns_out !write(0,*) me,' Sweep ',isw,' answers:',ns_out
n_answers = n_answers + ns_out n_answers = n_answers + ns_out
n_rem = size(idx) - ipnt n_rem = size(idx) - ipnt
n_reml = n_rem
call psb_max(ctxt,n_rem) call psb_max(ctxt,n_rem)
!write(0,*) me,' Sweep ',isw,n_rem, ipnt, n_samples !if (me == 0) write(0,*) me,' fnd_sweep Sweep ',isw,n_rem, ipnt, n_samples, n_reml
if (n_rem <= 0) exit
isw = isw + 1 isw = isw + 1
if (n_rem <= 0) exit
end do end do
! if (me == 0) write(0,*)'adj_fnd_sweep: sweeps: ',isw
end subroutine psi_adj_fnd_sweep end subroutine psi_adj_fnd_sweep

Loading…
Cancel
Save