Internal docs for fnd_owner variants.

merge-paraggr
Salvatore Filippone 5 years ago
parent e60e3c9d91
commit c57aa2ac5c

@ -30,20 +30,22 @@
!
!
!
! File: psi_fnd_owner.f90
! File: psi_a2a_fnd_owner.f90
!
! Subroutine: psi_fnd_owner
! Subroutine: psi_a2a_fnd_owner
! Figure out who owns global indices.
!
! Arguments:
! nv - integer Number of indices required on the calling
! process
! idx(:) - integer Required indices on the calling process.
! Note: the indices should be unique!
! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding
! indices
! desc_a - type(psb_desc_type). The communication descriptor.
! idxmap - class(psb_indx_map). The index map
! info - integer. return code.
!
! This version does not assume any prior knowledge about the process topology,
! so it goes for an all-to-all by building an auxiliary neighbours list and
! reusing the neighbour version.
!
subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info)
use psb_serial_mod
@ -99,7 +101,7 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info)
end if
!
! Reuse the other version by tricking it with an adjcncy list
! Reuse the adjcncy version by tricking it with an adjcncy list
! that contains everybody but ME.
!
nv = size(idx)

@ -29,13 +29,28 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
!
!
! File: psi_fnd_owner.f90
! File: psi_adjcncy_fnd_owner.f90
!
! Subroutine: psi_fnd_owner
! Subroutine: psi_adjcncy_fnd_owner
! Figure out who owns global indices.
!
! Arguments:
! idx(:) - integer Required indices on the calling process.
! Note: the indices should be unique!
! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding
! indices
! adj(:) - integer(psb_ipk_) Input: list of topological neighbours for current process.
!
! idxmap - class(psb_indx_map). The index map
! info - integer. return code.
!
! This version takes on input a list of processes that are assumed to
! be topological neighbours of the current one. Each process will send to all
! of its neighbours the list of indices for which it is trying to find the
! owner, prepare its own answers, and collect answers from others.
! There are
!
subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
use psb_serial_mod
@ -69,8 +84,10 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
& last_ih, last_j, nidx, nrecv, nadj
integer(psb_lpk_) :: mglob, ih
integer(psb_ipk_) :: ictxt,np,me
logical, parameter :: gettime=.false., new_impl=.true.
logical, parameter :: a2av_impl=.true., debug=.false.
logical, parameter :: gettime=.false., debug=.false.
logical, parameter :: a2av_impl=.true.
logical, parameter :: mpi_irecv_impl=.false.
logical, parameter :: psb_rcv_impl=.false.
real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx
character(len=20) :: name
@ -175,6 +192,10 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
call mpi_alltoallv(tproc,rvsz,rvidx,psb_mpi_ipk_,&
& lclidx,sdsz,sdidx,psb_mpi_ipk_,icomm,iret)
!
! Because IPRC has been initialized to -1, the MAX operation selects
! the answers.
!
do i=0, np-1
if (sdsz(i)>0) then
! Must be nidx == sdsz(i)
@ -183,186 +204,188 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
end do
if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx)
else
if (new_impl) then
!
! First simple minded version with auxiliary arrays
! dimensioned on NP.
! Could it be improved with a loop based on the maximum length
! of adj(:) ???
!
Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),&
& sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),&
& stat=info)
sdhd(:) = mpi_request_null
rvhd(:) = mpi_request_null
!
! First, send sizes according to adjcncy list
!
sdsz = 0
do j=1, nadj
sdsz(adj(j)) = nidx
end do
!write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:)
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
& rvsz,1,psb_mpi_mpk_,icomm,minfo)
hidx(0) = 0
do i=0, np-1
hidx(i+1) = hidx(i) + rvsz(i)
end do
hsize = hidx(np)
! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:)
!
! Second, allocate buffers and exchange data
!
Allocate(rmtidx(hsize),lclidx(max(hsize,nidx*nadj)),tproc(max(hsize,nidx)),stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
else if (mpi_irecv_impl) then
!
! First simple minded version with auxiliary arrays
! dimensioned on NP.
! Could it be improved with a loop based on the maximum length
! of adj(:) ???
!
Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),&
& sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),&
& stat=info)
sdhd(:) = mpi_request_null
rvhd(:) = mpi_request_null
!
! First, send sizes according to adjcncy list
!
sdsz = 0
do j=1, nadj
sdsz(adj(j)) = nidx
end do
!write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:)
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
& rvsz,1,psb_mpi_mpk_,icomm,minfo)
hidx(0) = 0
do i=0, np-1
hidx(i+1) = hidx(i) + rvsz(i)
end do
hsize = hidx(np)
! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:)
!
! Second, allocate buffers and exchange data
!
Allocate(rmtidx(hsize),lclidx(max(hsize,nidx*nadj)),tproc(max(hsize,nidx)),stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
end if
do i = 0, np-1
if (rvsz(i)>0) then
! write(0,*) me, ' First receive from ',i,rvsz(i)
prc = psb_get_mpi_rank(ictxt,i)
p2ptag = psb_long_swap_tag
!write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc
call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),&
& psb_mpi_lpk_,prc,&
& p2ptag, icomm,rvhd(i),iret)
end if
end do
do j=1, nadj
if (nidx > 0) then
prc = psb_get_mpi_rank(ictxt,adj(j))
p2ptag = psb_long_swap_tag
!write(0,*) me, ' First send to ',adj(j),nidx, prc
call mpi_send(idx,nidx,&
& psb_mpi_lpk_,prc,&
& p2ptag, icomm,iret)
end if
end do
call mpi_waitall(np,rvhd,p2pstat,iret)
!
! Third, compute local answers
!
call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.)
do i=1, hsize
tproc(i) = -1
if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me
end do
!
! At this point we can reuse lclidx to receive messages
!
rvhd(:) = mpi_request_null
do j=1, nadj
!write(0,*) me, ' First send to ',adj(j),nidx
if (nidx > 0) then
prc = psb_get_mpi_rank(ictxt,adj(j))
p2ptag = psb_int_swap_tag
!write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc
call mpi_irecv(lclidx((j-1)*nidx+1),nidx, &
& psb_mpi_ipk_,prc,&
& p2ptag, icomm,rvhd(j),iret)
end if
do i = 0, np-1
if (rvsz(i)>0) then
! write(0,*) me, ' First receive from ',i,rvsz(i)
prc = psb_get_mpi_rank(ictxt,i)
p2ptag = psb_long_swap_tag
!write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc
call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),&
& psb_mpi_lpk_,prc,&
& p2ptag, icomm,rvhd(i),iret)
end if
end do
do j=1, nadj
if (nidx > 0) then
prc = psb_get_mpi_rank(ictxt,adj(j))
p2ptag = psb_long_swap_tag
!write(0,*) me, ' First send to ',adj(j),nidx, prc
call mpi_send(idx,nidx,&
& psb_mpi_lpk_,prc,&
& p2ptag, icomm,iret)
end if
end do
call mpi_waitall(np,rvhd,p2pstat,iret)
!
! Third, compute local answers
!
call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.)
do i=1, hsize
tproc(i) = -1
if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me
end do
!
! At this point we can reuse lclidx to receive messages
!
rvhd(:) = mpi_request_null
do j=1, nadj
!write(0,*) me, ' First send to ',adj(j),nidx
if (nidx > 0) then
prc = psb_get_mpi_rank(ictxt,adj(j))
p2ptag = psb_int_swap_tag
!write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc
call mpi_irecv(lclidx((j-1)*nidx+1),nidx, &
& psb_mpi_ipk_,prc,&
& p2ptag, icomm,rvhd(j),iret)
end if
end do
!
! Fourth, send data back;
!
do i = 0, np-1
if (rvsz(i)>0) then
prc = psb_get_mpi_rank(ictxt,i)
p2ptag = psb_int_swap_tag
!write(0,*) me, ' Second send to ',i,rvsz(i), prc
call mpi_send(tproc(hidx(i)+1),rvsz(i),&
& psb_mpi_ipk_,prc,&
& p2ptag, icomm,iret)
end if
end do
!
! Fifth: receive and combine. MAX works because default
! answer is -1.
!
call mpi_waitall(np,rvhd,p2pstat,iret)
do j = 1, nadj
iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx))
end do
if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx)
else
Allocate(hidx(0:np),hsz(np),&
& sdsz(0:np-1),rvsz(0:np-1),stat=info)
!
! First, send sizes according to adjcncy list
!
sdsz = 0
do j=1, nadj
sdsz(adj(j)) = nidx
end do
!write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:)
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
& rvsz,1,psb_mpi_mpk_,icomm,minfo)
hidx(0) = 0
do i=0, np-1
hidx(i+1) = hidx(i) + rvsz(i)
end do
hsize = hidx(np)
! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:)
!
! Second, allocate buffers and exchange data
!
Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
end do
!
! Fourth, send data back;
!
do i = 0, np-1
if (rvsz(i)>0) then
prc = psb_get_mpi_rank(ictxt,i)
p2ptag = psb_int_swap_tag
!write(0,*) me, ' Second send to ',i,rvsz(i), prc
call mpi_send(tproc(hidx(i)+1),rvsz(i),&
& psb_mpi_ipk_,prc,&
& p2ptag, icomm,iret)
end if
do j=1, nadj
!write(0,*) me, ' First send to ',adj(j),nidx
if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j))
end do
do i = 0, np-1
if (rvsz(i)>0) then
! write(0,*) me, ' First receive from ',i,rvsz(i)
call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i)
end if
end do
!
! Third, compute local answers
!
call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.)
do i=1, hsize
tproc(i) = -1
if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me
end do
!
! Fourth, send data back;
!
do i = 0, np-1
if (rvsz(i)>0) then
!write(0,*) me, ' Second send to ',i,rvsz(i)
call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i)
end if
end do
!
! Fifth: receive and combine. MAX works because default
! answer is -1. Reuse tproc
!
do j = 1, nadj
!write(0,*) me, ' Second receive from ',adj(j), nidx
if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j))
iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx))
end do
end do
!
! Fifth: receive and combine. MAX works because default
! answer is -1.
!
call mpi_waitall(np,rvhd,p2pstat,iret)
do j = 1, nadj
iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx))
end do
if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx)
else if (psb_rcv_impl) then
Allocate(hidx(0:np),hsz(np),&
& sdsz(0:np-1),rvsz(0:np-1),stat=info)
!
! First, send sizes according to adjcncy list
!
sdsz = 0
do j=1, nadj
sdsz(adj(j)) = nidx
end do
!write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:)
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
& rvsz,1,psb_mpi_mpk_,icomm,minfo)
hidx(0) = 0
do i=0, np-1
hidx(i+1) = hidx(i) + rvsz(i)
end do
hsize = hidx(np)
! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:)
!
! Second, allocate buffers and exchange data
!
Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
end if
do j=1, nadj
!write(0,*) me, ' First send to ',adj(j),nidx
if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j))
end do
do i = 0, np-1
if (rvsz(i)>0) then
! write(0,*) me, ' First receive from ',i,rvsz(i)
call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i)
end if
end do
!
! Third, compute local answers
!
call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.)
do i=1, hsize
tproc(i) = -1
if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me
end do
!
! Fourth, send data back;
!
do i = 0, np-1
if (rvsz(i)>0) then
!write(0,*) me, ' Second send to ',i,rvsz(i)
call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i)
end if
end do
!
! Fifth: receive and combine. MAX works because default
! answer is -1. Reuse tproc
!
do j = 1, nadj
!write(0,*) me, ' Second receive from ',adj(j), nidx
if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j))
iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx))
end do
else
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid exchange alg choice')
goto 9999
end if
call psb_erractionrestore(err_act)
return

@ -30,20 +30,50 @@
!
!
!
! File: psi_fnd_owner.f90
!
! Subroutine: psi_fnd_owner
! File: psi_graph_fnd_owner.f90
!
! Subroutine: psi_graph_fnd_owner
! Figure out who owns global indices.
!
! Arguments:
! nv - integer Number of indices required on the calling
! process
! idx(:) - integer Required indices on the calling process.
! Note: the indices should be unique!
!
! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding
! indices
! desc_a - type(psb_desc_type). The communication descriptor.
! idxmap - class(psb_indx_map). The index map
! info - integer. return code.
!
! This is the method to find out who owns a set of indices.
! In principle we could do the following:
! 1. Do an allgatherv of IDX
! 2. For each of the collected indices figure if current proces owns it
! 3. Scatter the results
! 4. Loop through the answers
! This method is guaranteed to find the owner, unless an input index has
! an invalid value, however it could easily require too much additional space
! because each block of indices is replicated to all processes.
! Therefore the current routine takes a different approach:
! -1. Figure out a maximum size for a buffer to collect the IDX; the buffer
! should allow for at least one index from each process (i.e. min size NP); also
! check if we have an adjacency list of processes on input;
! 0. If the initial adjacency list is not empty, use psi_adj_fnd_sweep to go
! through all indices and use multiple calls to psi_adjcncy_fnd_owner
! (up to the buffer size) to see if the owning processes are in the
! initial neighbours list;
! 1. Extract a sample from IDX, up to the buffer size, and do a call
! to psi_a2a_fnd_owner. This is guaranteed to find the owners of all indices
! in the sample;
! 2. Build the list of processes that own the sample indices; these are
! (a subset of) the topological neighbours, and store the list in IDXMAP
! 3. Use psi_adj_fnd_sweep to go through all remaining indices and use
! multiple calls to psi_adjcncy_fnd_owner (up to the buffer size)
! to see if the owning processes are in the current neighbours list;
! 4. If the input indices IDX have not been exhausted, cycle to 1.
!
! Thus, we are alternating between asking all processes for a subset of indices, and
! asking a subset of processes for all the indices, thereby limiting the memory footprint to
! a predefined maximum (that the user can force with psb_cd_set_maxspace()).
!
subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
use psb_serial_mod
@ -76,7 +106,7 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
integer(psb_lpk_) :: mglob, ih
integer(psb_ipk_) :: ictxt,np,me, nresp
integer(psb_ipk_), parameter :: nt=4
integer(psb_ipk_) :: tmpv(2)
integer(psb_ipk_) :: tmpv(4)
logical, parameter :: do_timings=.false., trace=.false.
integer(psb_ipk_), save :: idx_sweep0=-1, idx_loop_a2a=-1, idx_loop_neigh=-1
real(psb_dpk_) :: t0, t1, t2, t3, t4
@ -113,17 +143,6 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
goto 9999
end if
!
! Choice of maxspace should be adjusted to account for a default
! "sensible" size and/or a user-specified value
!
tmpv(1) = n_row
tmpv(2) = psb_cd_get_maxspace()
call psb_max(ictxt,tmpv)
locr_max = tmpv(1)
maxspace = min(nt*locr_max,tmpv(2))
maxspace = max(maxspace,np)
if (trace.and.(me == 0)) write(0,*) ' Through graph_fnd_owner with maxspace:',maxspace
!
!
!
@ -148,9 +167,20 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
n_rest = nv - n_answers
nrest_max = n_rest
!
! Choice of maxspace should be adjusted to account for a default
! "sensible" size and/or a user-specified value
!
tmpv(1) = nadj
tmpv(2) = nrest_max
tmpv(3) = n_row
tmpv(4) = psb_cd_get_maxspace()
call psb_max(ictxt,tmpv)
locr_max = tmpv(3)
maxspace = nt*locr_max
if (tmpv(4) > 0) maxspace = min(maxspace,tmpv(4))
maxspace = max(maxspace,np)
if (trace.and.(me == 0)) write(0,*) ' Through graph_fnd_owner with maxspace:',maxspace
if (do_timings) call psb_tic(idx_sweep0)
if ((tmpv(1) > 0).and.(tmpv(2) >0)) then
!

@ -30,20 +30,26 @@
!
!
!
! File: psi_fnd_owner.f90
! File: psi_indx_map_fnd_owner.f90
!
! Subroutine: psi_fnd_owner
! Subroutine: psi_indx_map_fnd_owner
! Figure out who owns global indices.
!
! Arguments:
! nv - integer Number of indices required on the calling
! process
! idx(:) - integer Required indices on the calling process.
! Note: the indices should be unique!
! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding
! indices
! desc_a - type(psb_desc_type). The communication descriptor.
! idxmap - class(psb_indx_map). The index map
! info - integer. return code.
!
! This is the default implementation of the FND_OWNER method.
! If a particular index map class has additional information, it can override it
! (see e.g. the GEN_BLOCK_MAP class).
!
! 1. Check if IDXM%PARTS is available, and use it; or
! 2. Check if TEMPVG(:) is allocated, and use it; or
! 3. Call the general method PSI_GRAPH_FND_OWNER.
!
subroutine psi_indx_map_fnd_owner(idx,iprc,idxmap,info)
use psb_serial_mod

Loading…
Cancel
Save