New structure for A2A and graph_fnd_owner

New implementation for A2A: use mpi_allgather and mpi_reduce_scatter,
with two variants: one for variable inputs, one for fixed size
inputs.
Modified graph_fnd_owner to pass padded and uniform sized input to
A2A.
fnd_owner
Salvatore Filippone 5 years ago
parent 7c908c0849
commit 3cfdc033fd

@ -47,7 +47,7 @@
! so it goes for an all-to-all by building an auxiliary neighbours list and ! so it goes for an all-to-all by building an auxiliary neighbours list and
! reusing the neighbour version. ! reusing the neighbour version.
! !
subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info) subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info,samesize)
use psb_serial_mod use psb_serial_mod
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
@ -66,16 +66,22 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info)
integer(psb_ipk_), allocatable, intent(out) :: iprc(:) integer(psb_ipk_), allocatable, intent(out) :: iprc(:)
class(psb_indx_map), intent(in) :: idxmap class(psb_indx_map), intent(in) :: idxmap
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional :: samesize
integer(psb_ipk_), allocatable :: tmpadj(:) integer(psb_ipk_), allocatable :: tmpadj(:)
integer(psb_mpk_) :: icomm, minfo, iictxt integer(psb_lpk_), allocatable :: rmtidx(:)
integer(psb_ipk_) :: i,n_row,n_col,err_act,nv integer(psb_ipk_), allocatable :: tproc(:), lclidx(:)
integer(psb_mpk_), allocatable :: hsz(:),hidx(:), sdidx(:), rvidx(:),&
& sdsz(:), rvsz(:), sdhd(:), rvhd(:), p2pstat(:,:)
integer(psb_mpk_) :: icomm, minfo, iictxt,nv
integer(psb_ipk_) :: i,n_row,n_col,err_act,gsz
integer(psb_lpk_) :: mglob, ih integer(psb_lpk_) :: mglob, ih
integer(psb_ipk_) :: ictxt,np,me, nresp integer(psb_ipk_) :: ictxt,np,me, nresp
logical, parameter :: use_psi_adj=.true. logical, parameter :: use_psi_adj=.true.
real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx
character(len=20) :: name character(len=20) :: name
logical :: samesize_
info = psb_success_ info = psb_success_
name = 'psi_a2a_fnd_owner' name = 'psi_a2a_fnd_owner'
@ -101,22 +107,94 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info)
goto 9999 goto 9999
end if end if
if (present(samesize)) then
samesize_ = samesize
else
samesize_ = .false.
end if
nv = size(idx)
! write(0,*) me,name,' :',use_psi_adj,samesize_,nv
if (use_psi_adj) then if (use_psi_adj) then
! !
! Reuse the adjcncy version by tricking it with an adjcncy list ! Reuse the adjcncy version by tricking it with an adjcncy list
! that contains everybody but ME. ! that contains everybody but ME.
! !
nv = size(idx)
call psb_realloc(np-1,tmpadj,info) call psb_realloc(np-1,tmpadj,info)
tmpadj(1:me) = [(i,i=0,me-1)] tmpadj(1:me) = [(i,i=0,me-1)]
tmpadj(me+1:np-1) = [(i,i=me+1,np-1)] tmpadj(me+1:np-1) = [(i,i=me+1,np-1)]
call psi_adjcncy_fnd_owner(idx,iprc,tmpadj,idxmap,info) call psi_adjcncy_fnd_owner(idx,iprc,tmpadj,idxmap,info)
else else
! if (samesize_) then
! 1. allgetherv !
! 2. local conversion ! Variant when IDX is guaranteed to have the same size on all
! 3. reduce_scatter ! processes. To be tested for performance: is it worth it?
! ! Probably yes.
!
gsz = nv*np
Allocate(rmtidx(gsz),lclidx(gsz),iprc(nv),stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
end if
call mpi_allgather(idx,nv,psb_mpi_lpk_,rmtidx,nv,psb_mpi_lpk_,icomm,minfo)
call idxmap%g2l(rmtidx(1:gsz),lclidx(1:gsz),info,owned=.true.)
!
! Reuse lclidx to encode owning process
!
do i=1, gsz
if ((1<=lclidx(i)).and.(lclidx(i)<=n_row)) then
lclidx(i) = me
else
lclidx(i) = -1
end if
end do
call mpi_reduce_scatter_block(lclidx,iprc,nv,psb_mpi_ipk_,mpi_max,icomm,minfo)
else
!
! 1. allgetherv
! 2. local conversion
! 3. reduce_scatter
!
!
! The basic idea is very simple.
! First we collect (to all) all the requests.
Allocate(hidx(np+1),hsz(np),stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
end if
call mpi_allgather(nv,1,psb_mpi_mpk_,hsz,1,psb_mpi_mpk_,icomm,minfo)
hidx(1) = 0
do i=1, np
hidx(i+1) = hidx(i) + hsz(i)
end do
gsz = hidx(np+1)
Allocate(rmtidx(gsz),lclidx(gsz),iprc(nv),stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
end if
call mpi_allgatherv(idx,hsz(me+1),psb_mpi_lpk_,&
& rmtidx,hsz,hidx,psb_mpi_lpk_,&
& icomm,minfo)
call idxmap%g2l(rmtidx(1:gsz),lclidx(1:gsz),info,owned=.true.)
!
! Reuse lclidx to encode owning process
!
do i=1, gsz
if ((1<=lclidx(i)).and.(lclidx(i)<=n_row)) then
lclidx(i) = me
else
lclidx(i) = -1
end if
end do
call mpi_reduce_scatter(lclidx,iprc,hsz,psb_mpi_ipk_,mpi_max,icomm,minfo)
end if
end if end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -101,8 +101,8 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
integer(psb_ipk_), allocatable :: tprc(:), tsmpl(:), ladj(:) integer(psb_ipk_), allocatable :: tprc(:), tsmpl(:), ladj(:)
integer(psb_mpk_) :: icomm, minfo, iictxt integer(psb_mpk_) :: icomm, minfo, iictxt
integer(psb_ipk_) :: i,n_row,n_col,err_act,ip,j,ipnt, nsampl_out,& integer(psb_ipk_) :: i,n_row,n_col,err_act,ip,j,ipnt, nsampl_out,&
& nv, n_answers, n_rest, nsampl_in, locr_max, & & nv, n_answers, nreqst, nsampl_in, locr_max, &
& nrest_max, nadj, maxspace, mxnsin & nreqst_max, nadj, maxspace, mxnsin
integer(psb_lpk_) :: mglob, ih integer(psb_lpk_) :: mglob, ih
integer(psb_ipk_) :: ictxt,np,me, nresp integer(psb_ipk_) :: ictxt,np,me, nresp
integer(psb_ipk_), parameter :: nt=4 integer(psb_ipk_), parameter :: nt=4
@ -164,15 +164,19 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
nadj = psb_size(ladj) nadj = psb_size(ladj)
! This makes ladj allocated with size 0 just in case ! This makes ladj allocated with size 0 just in case
call psb_realloc(nadj,ladj,info) call psb_realloc(nadj,ladj,info)
n_rest = nv - n_answers !
nrest_max = n_rest ! Throughout the subroutine, nreqst is the number of local inquiries
! that have not been answered yet
!
nreqst = nv - n_answers
nreqst_max = nreqst
! !
! Choice of maxspace should be adjusted to account for a default ! Choice of maxspace should be adjusted to account for a default
! "sensible" size and/or a user-specified value ! "sensible" size and/or a user-specified value
! !
tmpv(1) = nadj tmpv(1) = nadj
tmpv(2) = nrest_max tmpv(2) = nreqst_max
tmpv(3) = n_row tmpv(3) = n_row
tmpv(4) = psb_cd_get_maxspace() tmpv(4) = psb_cd_get_maxspace()
call psb_max(ictxt,tmpv) call psb_max(ictxt,tmpv)
@ -187,12 +191,12 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
! Do a preliminary run on the user-defined adjacency lists ! Do a preliminary run on the user-defined adjacency lists
! !
if (trace.and.(me == 0)) write(0,*) ' Initial sweep on user-defined topology' if (trace.and.(me == 0)) write(0,*) ' Initial sweep on user-defined topology'
nsampl_in = min(n_rest,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) nsampl_in = min(nreqst,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj)))
call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers) call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers)
call idxmap%xtnd_p_adjcncy(ladj) call idxmap%xtnd_p_adjcncy(ladj)
n_rest = nv - n_answers nreqst = nv - n_answers
nrest_max = n_rest nreqst_max = nreqst
call psb_max(ictxt,nrest_max) call psb_max(ictxt,nreqst_max)
if (trace.and.(me == 0)) write(0,*) ' After initial sweep:',nrest_max if (trace.and.(me == 0)) write(0,*) ' After initial sweep:',nrest_max
end if end if
if (do_timings) call psb_toc(idx_sweep0) if (do_timings) call psb_toc(idx_sweep0)
@ -208,28 +212,32 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
! sample query is then sent to all processes ! sample query is then sent to all processes
! !
! if (trace.and.(me == 0)) write(0,*) 'Looping in graph_fnd_owner: ', nrest_max ! if (trace.and.(me == 0)) write(0,*) 'Looping in graph_fnd_owner: ', nrest_max
nsampl_in = min(n_rest,max(1,(maxspace+np-1)/np)) nsampl_in = psb_cd_get_samplesize()
!nsampl_in = min(n_rest,32) nsampl_in = min(max(1,(maxspace+np-1)/np),nsampl_in)
! !
! Choose a sample, should it be done in this simplistic way? ! Choose a sample, should it be done in this simplistic way?
! Note: nsampl_in is a hint, not an absolute, hence nsampl_out ! Note: nsampl_in is a hint, not an absolute, hence nsampl_out
! !
ipnt = 1 ipnt = 1
!!$ write(0,*) me,' Into first sampling ',nsampl_in call psi_get_sample(ipnt, idx,iprc,tidx,tsmpl,nsampl_in,nsampl_out, pad=.true.)
call psi_get_sample(ipnt, idx,iprc,tidx,tsmpl,nsampl_in,nsampl_out)
nsampl_in = min(nsampl_out,nsampl_in) nsampl_in = min(nsampl_out,nsampl_in)
!!$ write(0,*) me,' From first sampling ',nsampl_in !!$ write(0,*) me,' From first sampling ',nsampl_in
! !
! 2. Do a search on all processes; this is supposed to find ! 2. Do a search on all processes; this is supposed to find
! the owning process for all inputs; ! the owning process for all inputs;
! !
call psi_a2a_fnd_owner(tidx(1:nsampl_in),tprc,idxmap,info) call psi_a2a_fnd_owner(tidx(1:nsampl_in),tprc,idxmap,info, samesize=.true.)
!
! We might have padded when looking for owners, so the actual samples
! could be less than they appear. Should be explained better.
!
nsampl_in = min(nreqst,nsampl_in)
call psi_cpy_out(iprc,tprc,tsmpl,nsampl_in,nsampl_out) call psi_cpy_out(iprc,tprc,tsmpl,nsampl_in,nsampl_out)
if (nsampl_out /= nsampl_in) then if (nsampl_out /= nsampl_in) then
write(0,*) me,'Warning: indices not found by a2a_fnd_owner ',nsampl_out,nsampl_in write(0,*) me,'Warning: indices not found by a2a_fnd_owner ',nsampl_out,nsampl_in
end if end if
n_answers = n_answers + nsampl_out n_answers = n_answers + nsampl_out
n_rest = nv - n_answers nreqst = nv - n_answers
! !
! 3. Extract the resulting adjacency list and add it to the ! 3. Extract the resulting adjacency list and add it to the
! indxmap; ! indxmap;
@ -246,16 +254,16 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
! Need to set up a proper loop here to have a complete ! Need to set up a proper loop here to have a complete
! sweep over the input vector. Done inside adj_fnd_sweep. ! sweep over the input vector. Done inside adj_fnd_sweep.
! !
!!$ write(0,*) me,' After a2a ',n_rest !!$ write(0,*) me,' After a2a ',nreqst
nsampl_in = min(n_rest,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) nsampl_in = min(nreqst,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj)))
mxnsin = nsampl_in mxnsin = nsampl_in
call psb_max(ictxt,mxnsin) call psb_max(ictxt,mxnsin)
!!$ write(0,*) me, ' mxnsin ',mxnsin !!$ write(0,*) me, ' mxnsin ',mxnsin
if (mxnsin>0) call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers) if (mxnsin>0) call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers)
call idxmap%xtnd_p_adjcncy(ladj) call idxmap%xtnd_p_adjcncy(ladj)
n_rest = nv - n_answers nreqst = nv - n_answers
nrest_max = n_rest nrest_max = nreqst
call psb_max(ictxt,nrest_max) call psb_max(ictxt,nrest_max)
if (trace.and.(me == 0)) write(0,*) ' fnd_owner_loop remaining:',nrest_max if (trace.and.(me == 0)) write(0,*) ' fnd_owner_loop remaining:',nrest_max
if (do_timings) call psb_toc(idx_loop_neigh) if (do_timings) call psb_toc(idx_loop_neigh)
@ -270,16 +278,23 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
contains contains
subroutine psi_get_sample(ipntidx,idx,iprc,tidx,tsmpl,ns_in,ns_out) subroutine psi_get_sample(ipntidx,idx,iprc,tidx,tsmpl,ns_in,ns_out,pad)
implicit none implicit none
integer(psb_ipk_), intent(inout) :: ipntidx integer(psb_ipk_), intent(inout) :: ipntidx
integer(psb_lpk_), intent(in) :: idx(:) integer(psb_lpk_), intent(in) :: idx(:)
integer(psb_ipk_), intent(in) :: ns_in, iprc(:) integer(psb_ipk_), intent(in) :: ns_in, iprc(:)
integer(psb_lpk_), intent(out) :: tidx(:) integer(psb_lpk_), intent(out) :: tidx(:)
integer(psb_ipk_), intent(out) :: tsmpl(:), ns_out integer(psb_ipk_), intent(out) :: tsmpl(:), ns_out
logical, intent(in), optional :: pad
! !
integer(psb_ipk_) :: nv, ns integer(psb_ipk_) :: nv, ns, k
logical :: pad_
if (present(pad)) then
pad_ = pad
else
pad_ = .false.
end if
nv = size(idx) nv = size(idx)
! !
! Choose a sample, should it be done in this simplistic way? ! Choose a sample, should it be done in this simplistic way?
@ -302,6 +317,13 @@ contains
ipntidx = ipntidx + 1 ipntidx = ipntidx + 1
if (ns_out >= ns) exit if (ns_out >= ns) exit
end do end do
if (pad_) then
do k = ns_out+1, ns
tsmpl(k) = -1
tidx(k) = -1
end do
ns_out = ns
end if
end subroutine psi_get_sample end subroutine psi_get_sample

@ -285,14 +285,6 @@ module psb_desc_mod
module procedure psb_cdfree module procedure psb_cdfree
end interface psb_free end interface psb_free
interface psb_cd_set_maxspace
module procedure psb_cd_set_maxspace
end interface psb_cd_set_maxspace
interface psb_cd_get_maxspace
module procedure psb_cd_get_maxspace
end interface psb_cd_get_maxspace
interface psb_cd_set_large_threshold interface psb_cd_set_large_threshold
module procedure psb_i_cd_set_large_threshold module procedure psb_i_cd_set_large_threshold
end interface psb_cd_set_large_threshold end interface psb_cd_set_large_threshold
@ -310,7 +302,6 @@ module psb_desc_mod
integer(psb_lpk_), private, save :: cd_large_threshold = psb_default_large_threshold integer(psb_lpk_), private, save :: cd_large_threshold = psb_default_large_threshold
integer(psb_ipk_), private, save :: cd_maxspace = -1
contains contains
@ -363,21 +354,6 @@ contains
val = cd_large_threshold val = cd_large_threshold
end function psb_cd_get_large_threshold end function psb_cd_get_large_threshold
subroutine psb_cd_set_maxspace(ith)
implicit none
integer(psb_ipk_), intent(in) :: ith
if (ith > 0) then
cd_maxspace = ith
end if
end subroutine psb_cd_set_maxspace
function psb_cd_get_maxspace() result(val)
implicit none
integer(psb_ipk_) :: val
val = cd_maxspace
end function psb_cd_get_maxspace
function psb_cd_is_large_size(m) result(val) function psb_cd_is_large_size(m) result(val)
use psb_penv_mod use psb_penv_mod

@ -29,7 +29,7 @@
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE. ! POSSIBILITY OF SUCH DAMAGE.
! !
!!$ !
! !
! !
! package: psb_indx_map_mod ! package: psb_indx_map_mod
@ -278,13 +278,14 @@ module psb_indx_map_mod
end interface end interface
interface interface
subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info) subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info,samesize)
import :: psb_indx_map, psb_ipk_, psb_lpk_ import :: psb_indx_map, psb_ipk_, psb_lpk_
implicit none implicit none
integer(psb_lpk_), intent(in) :: idx(:) integer(psb_lpk_), intent(in) :: idx(:)
integer(psb_ipk_), allocatable, intent(out) :: iprc(:) integer(psb_ipk_), allocatable, intent(out) :: iprc(:)
class(psb_indx_map), intent(in) :: idxmap class(psb_indx_map), intent(in) :: idxmap
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional :: samesize
end subroutine psi_a2a_fnd_owner end subroutine psi_a2a_fnd_owner
end interface end interface
@ -311,6 +312,25 @@ module psb_indx_map_mod
end subroutine psi_graph_fnd_owner end subroutine psi_graph_fnd_owner
end interface end interface
interface psb_cd_set_maxspace
module procedure psb_cd_set_maxspace
end interface psb_cd_set_maxspace
interface psb_cd_get_maxspace
module procedure psb_cd_get_maxspace
end interface psb_cd_get_maxspace
interface psb_cd_set_samplesize
module procedure psb_cd_set_samplesize
end interface psb_cd_set_samplesize
interface psb_cd_get_samplesize
module procedure psb_cd_get_samplesize
end interface psb_cd_get_samplesize
integer(psb_ipk_), private, save :: cd_maxspace = -1
integer(psb_ipk_), private, save :: samplesize = 32
integer, parameter :: psi_symm_flag_norv_ = 0 integer, parameter :: psi_symm_flag_norv_ = 0
integer, parameter :: psi_symm_flag_inrv_ = 1 integer, parameter :: psi_symm_flag_inrv_ = 1
interface psi_symm_dep_list interface psi_symm_dep_list
@ -364,6 +384,35 @@ contains
end select end select
end function psi_get_adj_alg_fmt end function psi_get_adj_alg_fmt
subroutine psb_cd_set_maxspace(ith)
implicit none
integer(psb_ipk_), intent(in) :: ith
if (ith > 0) then
cd_maxspace = ith
end if
end subroutine psb_cd_set_maxspace
function psb_cd_get_maxspace() result(val)
implicit none
integer(psb_ipk_) :: val
val = cd_maxspace
end function psb_cd_get_maxspace
subroutine psb_cd_set_samplesize(ith)
implicit none
integer(psb_ipk_), intent(in) :: ith
if (ith > 0) then
samplesize = ith
end if
end subroutine psb_cd_set_samplesize
function psb_cd_get_samplesize() result(val)
implicit none
integer(psb_ipk_) :: val
val = samplesize
end function psb_cd_get_samplesize
!> !>
!! \memberof psb_indx_map !! \memberof psb_indx_map

Loading…
Cancel
Save