diff --git a/base/internals/psb_indx_map_fnd_owner.F90 b/base/internals/psb_indx_map_fnd_owner.F90 index 3e2cb576..1b1924d9 100644 --- a/base/internals/psb_indx_map_fnd_owner.F90 +++ b/base/internals/psb_indx_map_fnd_owner.F90 @@ -30,20 +30,27 @@ ! ! ! -! File: psi_fnd_owner.f90 +! File: psb_fnd_owner.f90 ! -! Subroutine: psi_fnd_owner +! Subroutine: psb_fnd_owner ! Figure out who owns global indices. ! ! Arguments: -! nv - integer Number of indices required on the calling -! process ! idx(:) - integer Required indices on the calling process. ! Note: the indices should be unique! ! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding ! indices -! desc_a - type(psb_desc_type). The communication descriptor. +! idxmap - class(psb_indx_map). The index map ! info - integer. return code. +! +! This is the default implementation of the FND_OWNER method. +! If a particular index map class has additional information, it can override it +! (see e.g. the GEN_BLOCK_MAP class). +! +! 1. Check if IDXM%PARTS is available, and use it; or +! 2. Check if TEMPVG(:) is allocated, and use it; or +! 3. Call the general method PSI_GRAPH_FND_OWNER. +! ! subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info) use psb_serial_mod diff --git a/base/internals/psi_a2a_fnd_owner.F90 b/base/internals/psi_a2a_fnd_owner.F90 index e755bf82..93a097e0 100644 --- a/base/internals/psi_a2a_fnd_owner.F90 +++ b/base/internals/psi_a2a_fnd_owner.F90 @@ -28,22 +28,24 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -! ! -! File: psi_fnd_owner.f90 +! File: psi_a2a_fnd_owner.f90 ! -! Subroutine: psi_fnd_owner +! Subroutine: psi_a2a_fnd_owner ! Figure out who owns global indices. ! ! Arguments: -! nv - integer Number of indices required on the calling -! process ! idx(:) - integer Required indices on the calling process. ! Note: the indices should be unique! ! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding ! indices -! desc_a - type(psb_desc_type). The communication descriptor. +! idxmap - class(psb_indx_map). The index map ! info - integer. return code. +! +! This version does not assume any prior knowledge about the process topology, +! so it goes for an all-to-all by building an auxiliary neighbours list and +! reusing the neighbour version. +! ! subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info) use psb_serial_mod diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 index ad66904a..b0fbce93 100644 --- a/base/internals/psi_adjcncy_fnd_owner.F90 +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -29,13 +29,29 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! +! ! -! File: psi_fnd_owner.f90 +! File: psi_adjcncy_fnd_owner.f90 ! -! Subroutine: psi_fnd_owner +! Subroutine: psi_adjcncy_fnd_owner ! Figure out who owns global indices. ! ! Arguments: +! idx(:) - integer Required indices on the calling process. +! Note: the indices should be unique! +! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding +! indices +! adj(:) - integer(psb_ipk_) Input: list of topological neighbours for current process. +! +! idxmap - class(psb_indx_map). The index map +! info - integer. return code. +! +! This version takes on input a list of processes that are assumed to +! be topological neighbours of the current one. Each process will send to all +! of its neighbours the list of indices for which it is trying to find the +! owner, prepare its own answers, and collect answers from others. +! There are three possibile implementations: using mpi_alltoallv, using mpi_isend/irecv, +! using psb_snd/psb_rcv. The default is mpi_alltoallv. ! subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) use psb_serial_mod @@ -69,8 +85,10 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) & last_ih, last_j, nidx, nrecv, nadj integer(psb_ipk_) :: mglob, ih integer(psb_ipk_) :: ictxt,np,me - logical, parameter :: gettime=.false., new_impl=.true. - logical, parameter :: a2av_impl=.true., debug=.false. + logical, parameter :: gettime=.false., debug=.false. + logical, parameter :: a2av_impl=.true. + logical, parameter :: mpi_irecv_impl=.false. + logical, parameter :: psb_rcv_impl=.false. real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx character(len=20) :: name @@ -175,6 +193,10 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) call mpi_alltoallv(tproc,rvsz,rvidx,psb_mpi_ipk_integer,& & lclidx,sdsz,sdidx,psb_mpi_ipk_integer,icomm,iret) + ! + ! Because IPRC has been initialized to -1, the MAX operation selects + ! the answers. + ! do i=0, np-1 if (sdsz(i)>0) then ! Must be nidx == sdsz(i) @@ -183,197 +205,199 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) end do if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx) - else - if (new_impl) then - ! - ! First simple minded version with auxiliary arrays - ! dimensioned on NP. - ! Could it be improved with a loop based on the maximum length - ! of adj(:) ??? - ! - Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),& - & sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),& - & stat=info) - sdhd(:) = mpi_request_null - rvhd(:) = mpi_request_null - ! - ! First, send sizes according to adjcncy list - ! - sdsz = 0 - do j=1, nadj - sdsz(adj(j)) = nidx - end do - !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) - - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& - & rvsz,1,psb_mpi_def_integer,icomm,minfo) - hidx(0) = 0 - do i=0, np-1 - hidx(i+1) = hidx(i) + rvsz(i) - end do - hsize = hidx(np) - ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) - ! - ! Second, allocate buffers and exchange data - ! - Allocate(rmtidx(hsize),lclidx(max(hsize,nidx*nadj)),tproc(max(hsize,nidx)),stat=info) - - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + else if (mpi_irecv_impl) then + ! + ! First simple minded version with auxiliary arrays + ! dimensioned on NP. + ! Could it be improved with a loop based on the maximum length + ! of adj(:) ??? + ! + Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),& + & sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),& + & stat=info) + sdhd(:) = mpi_request_null + rvhd(:) = mpi_request_null + ! + ! First, send sizes according to adjcncy list + ! + sdsz = 0 + do j=1, nadj + sdsz(adj(j)) = nidx + end do + !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) + + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& + & rvsz,1,psb_mpi_def_integer,icomm,minfo) + hidx(0) = 0 + do i=0, np-1 + hidx(i+1) = hidx(i) + rvsz(i) + end do + hsize = hidx(np) + ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) + ! + ! Second, allocate buffers and exchange data + ! + Allocate(rmtidx(hsize),lclidx(max(hsize,nidx*nadj)),tproc(max(hsize,nidx)),stat=info) + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + do i = 0, np-1 + if (rvsz(i)>0) then + ! write(0,*) me, ' First receive from ',i,rvsz(i) + call psb_get_rank(prc,ictxt,i) + p2ptag = psb_int_swap_tag + !write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc + call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),& + & psb_mpi_ipk_integer,prc,& + & p2ptag, icomm,rvhd(i),iret) end if - do i = 0, np-1 - if (rvsz(i)>0) then - ! write(0,*) me, ' First receive from ',i,rvsz(i) - call psb_get_rank(prc,ictxt,i) - p2ptag = psb_int_swap_tag - !write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc - call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),& - & psb_mpi_ipk_integer,prc,& - & p2ptag, icomm,rvhd(i),iret) - end if - end do - do j=1, nadj - if (nidx > 0) then - !call psb_snd(ictxt,idx(1:nidx),adj(j)) - call psb_get_rank(prc ,ictxt,adj(j)) - p2ptag = psb_int_swap_tag - !write(0,*) me, ' First send to ',adj(j),nidx, prc - call mpi_send(idx,nidx,& - & psb_mpi_ipk_integer,prc,& - & p2ptag, icomm,iret) - end if - end do + end do + do j=1, nadj + if (nidx > 0) then + !call psb_snd(ictxt,idx(1:nidx),adj(j)) + call psb_get_rank(prc ,ictxt,adj(j)) + p2ptag = psb_int_swap_tag + !write(0,*) me, ' First send to ',adj(j),nidx, prc + call mpi_send(idx,nidx,& + & psb_mpi_ipk_integer,prc,& + & p2ptag, icomm,iret) + end if + end do !!$ do i = 0, np-1 !!$ if (rvsz(i)>0) then !!$ ! write(0,*) me, ' First receive from ',i,rvsz(i) !!$ call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) !!$ end if !!$ end do - call mpi_waitall(np,rvhd,p2pstat,iret) - - ! - ! Third, compute local answers - ! - call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) - do i=1, hsize - tproc(i) = -1 - if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me - end do - ! - ! At this point we can reuse lclidx to receive messages - ! - rvhd(:) = mpi_request_null - do j=1, nadj - !write(0,*) me, ' First send to ',adj(j),nidx - if (nidx > 0) then - !call psb_snd(ictxt,idx(1:nidx),adj(j)) - call psb_get_rank(prc,ictxt,adj(j)) - p2ptag = psb_int_swap_tag - !write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc - call mpi_irecv(lclidx((j-1)*nidx+1),nidx, & - & psb_mpi_ipk_integer,prc,& - & p2ptag, icomm,rvhd(j),iret) - end if - end do - - ! - ! Fourth, send data back; - ! - do i = 0, np-1 - if (rvsz(i)>0) then - !call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) - call psb_get_rank(prc,ictxt,i) - p2ptag = psb_int_swap_tag - !write(0,*) me, ' Second send to ',i,rvsz(i), prc - call mpi_send(tproc(hidx(i)+1),rvsz(i),& - & psb_mpi_ipk_integer,prc,& - & p2ptag, icomm,iret) - end if - end do - ! - ! Fifth: receive and combine. MAX works because default - ! answer is -1. - ! - call mpi_waitall(np,rvhd,p2pstat,iret) - do j = 1, nadj - !write(0,*) me, ' Second receive from ',adj(j), nidx - !if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) - iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx)) - end do - if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx) - - else - - Allocate(hidx(0:np),hsz(np),& - & sdsz(0:np-1),rvsz(0:np-1),stat=info) - ! - ! First, send sizes according to adjcncy list - ! - sdsz = 0 - do j=1, nadj - sdsz(adj(j)) = nidx - end do - !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) - - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& - & rvsz,1,psb_mpi_def_integer,icomm,minfo) - hidx(0) = 0 - do i=0, np-1 - hidx(i+1) = hidx(i) + rvsz(i) - end do - hsize = hidx(np) - ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) - ! - ! Second, allocate buffers and exchange data - ! - Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info) - - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + call mpi_waitall(np,rvhd,p2pstat,iret) + + ! + ! Third, compute local answers + ! + call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) + do i=1, hsize + tproc(i) = -1 + if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + end do + ! + ! At this point we can reuse lclidx to receive messages + ! + rvhd(:) = mpi_request_null + do j=1, nadj + !write(0,*) me, ' First send to ',adj(j),nidx + if (nidx > 0) then + !call psb_snd(ictxt,idx(1:nidx),adj(j)) + call psb_get_rank(prc,ictxt,adj(j)) + p2ptag = psb_int_swap_tag + !write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc + call mpi_irecv(lclidx((j-1)*nidx+1),nidx, & + & psb_mpi_ipk_integer,prc,& + & p2ptag, icomm,rvhd(j),iret) + end if + end do + + ! + ! Fourth, send data back; + ! + do i = 0, np-1 + if (rvsz(i)>0) then + !call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) + call psb_get_rank(prc,ictxt,i) + p2ptag = psb_int_swap_tag + !write(0,*) me, ' Second send to ',i,rvsz(i), prc + call mpi_send(tproc(hidx(i)+1),rvsz(i),& + & psb_mpi_ipk_integer,prc,& + & p2ptag, icomm,iret) end if - do j=1, nadj - !write(0,*) me, ' First send to ',adj(j),nidx - if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j)) - end do - do i = 0, np-1 - if (rvsz(i)>0) then - ! write(0,*) me, ' First receive from ',i,rvsz(i) - call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) - end if - end do - - ! - ! Third, compute local answers - ! - call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) - do i=1, hsize - tproc(i) = -1 - if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me - end do - - ! - ! Fourth, send data back; - ! - do i = 0, np-1 - if (rvsz(i)>0) then - !write(0,*) me, ' Second send to ',i,rvsz(i) - call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) - end if - end do - ! - ! Fifth: receive and combine. MAX works because default - ! answer is -1. Reuse tproc - ! - do j = 1, nadj - !write(0,*) me, ' Second receive from ',adj(j), nidx - if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) - iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) - end do + end do + ! + ! Fifth: receive and combine. MAX works because default + ! answer is -1. + ! + call mpi_waitall(np,rvhd,p2pstat,iret) + do j = 1, nadj + !write(0,*) me, ' Second receive from ',adj(j), nidx + !if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) + iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx)) + end do + if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx) + + else if (psb_rcv_impl) then + + Allocate(hidx(0:np),hsz(np),& + & sdsz(0:np-1),rvsz(0:np-1),stat=info) + ! + ! First, send sizes according to adjcncy list + ! + sdsz = 0 + do j=1, nadj + sdsz(adj(j)) = nidx + end do + !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) + + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& + & rvsz,1,psb_mpi_def_integer,icomm,minfo) + hidx(0) = 0 + do i=0, np-1 + hidx(i+1) = hidx(i) + rvsz(i) + end do + hsize = hidx(np) + ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) + ! + ! Second, allocate buffers and exchange data + ! + Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info) + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 end if + do j=1, nadj + !write(0,*) me, ' First send to ',adj(j),nidx + if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j)) + end do + do i = 0, np-1 + if (rvsz(i)>0) then + ! write(0,*) me, ' First receive from ',i,rvsz(i) + call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) + end if + end do + + ! + ! Third, compute local answers + ! + call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) + do i=1, hsize + tproc(i) = -1 + if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + end do + + ! + ! Fourth, send data back; + ! + do i = 0, np-1 + if (rvsz(i)>0) then + !write(0,*) me, ' Second send to ',i,rvsz(i) + call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) + end if + end do + ! + ! Fifth: receive and combine. MAX works because default + ! answer is -1. Reuse tproc + ! + do j = 1, nadj + !write(0,*) me, ' Second receive from ',adj(j), nidx + if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) + iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) + end do + else + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid exchange alg choice') + goto 9999 end if - + call psb_erractionrestore(err_act) return