From caec98e94262e6479e0e0e94d987c81823704903 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 21 Nov 2019 14:28:40 +0000 Subject: [PATCH] New adjcncy and a2a fnd_owner Reimplement adjcncy_fnd_owner to use alltoallv. Version with sends/irecv still available under compile time constant. Reimplement a2a_fnd_owner to use adjcncy_fnd_owner. Older version still available under compile time constant. --- base/internals/psi_a2a_fnd_owner.F90 | 344 +++++++++++----------- base/internals/psi_adjcncy_fnd_owner.F90 | 352 ++++++++++++++--------- 2 files changed, 392 insertions(+), 304 deletions(-) diff --git a/base/internals/psi_a2a_fnd_owner.F90 b/base/internals/psi_a2a_fnd_owner.F90 index 003ae1bf..231bc87b 100644 --- a/base/internals/psi_a2a_fnd_owner.F90 +++ b/base/internals/psi_a2a_fnd_owner.F90 @@ -67,7 +67,7 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info) integer(psb_lpk_), allocatable :: answers(:,:), idxsrch(:,:), hproc(:) - integer(psb_ipk_), allocatable :: helem(:), hhidx(:) + integer(psb_ipk_), allocatable :: helem(:), hhidx(:), tmpadj(:) integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & & sdsz(:),sdidx(:), rvsz(:), rvidx(:) integer(psb_mpk_) :: icomm, minfo, iictxt @@ -75,7 +75,7 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info) & last_ih, last_j, nv integer(psb_lpk_) :: mglob, ih integer(psb_ipk_) :: ictxt,np,me, nresp - logical, parameter :: gettime=.false. + logical, parameter :: gettime=.false., use_adj=.true. real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx character(len=20) :: name @@ -103,190 +103,204 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info) goto 9999 end if - if (gettime) then - t0 = psb_wtime() - end if - nv = size(idx) - call psb_realloc(nv,iprc,info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc') - goto 9999 - end if + if (use_adj) then + ! + ! Reuse the other version by tricking it with an adjcncy list + ! that contains everybody but ME. + ! + nv = size(idx) + call psb_realloc(np-1,tmpadj,info) + tmpadj(1:me) = [(i,i=0,me-1)] + tmpadj(me+1:np-1) = [(i,i=me+1,np-1)] + call psi_adjcncy_fnd_owner(idx,iprc,tmpadj,idxmap,info) + + else + if (gettime) then + t0 = psb_wtime() + end if + nv = size(idx) + call psb_realloc(nv,iprc,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc') + goto 9999 + end if - ! - ! The basic idea is very simple. - ! First we collect (to all) all the requests. - Allocate(hidx(np+1),hsz(np),& - & sdsz(0:np-1),sdidx(0:np-1),& - & rvsz(0:np-1),rvidx(0:np-1),& - & stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if + ! + ! The basic idea is very simple. + ! First we collect (to all) all the requests. + Allocate(hidx(np+1),hsz(np),& + & sdsz(0:np-1),sdidx(0:np-1),& + & rvsz(0:np-1),rvidx(0:np-1),& + & stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if - hsz = 0 - hsz(me+1) = nv - call psb_amx(iictxt,hsz) - hidx(1) = 0 - do i=1, np - hidx(i+1) = hidx(i) + hsz(i) - end do - hsize = hidx(np+1) - Allocate(helem(hsize),hproc(hsize),stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if + hsz = 0 + hsz(me+1) = nv + call psb_amx(iictxt,hsz) + hidx(1) = 0 + do i=1, np + hidx(i+1) = hidx(i) + hsz(i) + end do + hsize = hidx(np+1) + Allocate(helem(hsize),hproc(hsize),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if - if (gettime) then - t3 = psb_wtime() - end if + if (gettime) then + t3 = psb_wtime() + end if - call mpi_allgatherv(idx,hsz(me+1),psb_mpi_lpk_,& - & hproc,hsz,hidx,psb_mpi_lpk_,& - & icomm,minfo) - if (gettime) then - tamx = psb_wtime() - t3 - end if + call mpi_allgatherv(idx,hsz(me+1),psb_mpi_lpk_,& + & hproc,hsz,hidx,psb_mpi_lpk_,& + & icomm,minfo) + if (gettime) then + tamx = psb_wtime() - t3 + end if - ! Second, we figure out locally whether we own the indices (whoever is - ! asking for them). - if (gettime) then - t3 = psb_wtime() - end if + ! Second, we figure out locally whether we own the indices (whoever is + ! asking for them). + if (gettime) then + t3 = psb_wtime() + end if - call idxmap%g2l(hproc(1:hsize),helem(1:hsize),info,owned=.true.) - if (gettime) then - tidx = psb_wtime()-t3 - end if - if (info == psb_err_iarray_outside_bounds_) info = psb_success_ - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_idx_cnv') - goto 9999 - end if + call idxmap%g2l(hproc(1:hsize),helem(1:hsize),info,owned=.true.) + if (gettime) then + tidx = psb_wtime()-t3 + end if + if (info == psb_err_iarray_outside_bounds_) info = psb_success_ + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_idx_cnv') + goto 9999 + end if - ! Third: we build the answers for those indices we own, - ! with a section for each process asking. - hidx = hidx +1 - j = 0 - do ip = 0, np-1 - sdidx(ip) = j - sdsz(ip) = 0 - do i=hidx(ip+1), hidx(ip+1+1)-1 - if ((0 < helem(i)).and. (helem(i) <= n_row)) then - j = j + 1 - hproc(j) = hproc(i) - sdsz(ip) = sdsz(ip) + 1 - end if + ! Third: we build the answers for those indices we own, + ! with a section for each process asking. + hidx = hidx +1 + j = 0 + do ip = 0, np-1 + sdidx(ip) = j + sdsz(ip) = 0 + do i=hidx(ip+1), hidx(ip+1+1)-1 + if ((0 < helem(i)).and. (helem(i) <= n_row)) then + j = j + 1 + hproc(j) = hproc(i) + sdsz(ip) = sdsz(ip) + 1 + end if + end do end do - end do - if (gettime) then - t3 = psb_wtime() - end if + if (gettime) then + t3 = psb_wtime() + end if - ! Collect all the answers with alltoallv (need sizes) - call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& - & rvsz,1,psb_mpi_mpk_,icomm,minfo) + ! Collect all the answers with alltoallv (need sizes) + call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& + & rvsz,1,psb_mpi_mpk_,icomm,minfo) - isz = sum(rvsz) + isz = sum(rvsz) - allocate(answers(isz,2),idxsrch(nv,2),stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - j = 0 - do ip=0, np-1 - rvidx(ip) = j - j = j + rvsz(ip) - end do - call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_lpk_,& - & answers(:,1),rvsz,rvidx,psb_mpi_lpk_,& - & icomm,minfo) - if (gettime) then - tamx = psb_wtime() - t3 + tamx - end if - j = 1 - do ip = 0,np-1 - do k=1,rvsz(ip) - answers(j,2) = ip - j = j + 1 + allocate(answers(isz,2),idxsrch(nv,2),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + j = 0 + do ip=0, np-1 + rvidx(ip) = j + j = j + rvsz(ip) + end do + call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_lpk_,& + & answers(:,1),rvsz,rvidx,psb_mpi_lpk_,& + & icomm,minfo) + if (gettime) then + tamx = psb_wtime() - t3 + tamx + end if + j = 1 + do ip = 0,np-1 + do k=1,rvsz(ip) + answers(j,2) = ip + j = j + 1 + end do end do - end do - ! Sort the answers and the requests, so we can - ! match them efficiently - call psb_msort(answers(:,1),ix=answers(:,2),& - & flag=psb_sort_keep_idx_) - idxsrch(1:nv,1) = idx(1:nv) - call psb_msort(idxsrch(1:nv,1),ix=idxsrch(1:nv,2)) + ! Sort the answers and the requests, so we can + ! match them efficiently + call psb_msort(answers(:,1),ix=answers(:,2),& + & flag=psb_sort_keep_idx_) + idxsrch(1:nv,1) = idx(1:nv) + call psb_msort(idxsrch(1:nv,1),ix=idxsrch(1:nv,2)) - ! Now extract the answers for our local query - last_ih = -1 - last_j = -1 - j = 1 - do i=1, nv - ih = idxsrch(i,1) - if (ih == last_ih) then - iprc(idxsrch(i,2)) = answers(last_j,2) - else + ! Now extract the answers for our local query + last_ih = -1 + last_j = -1 + j = 1 + do i=1, nv + ih = idxsrch(i,1) + if (ih == last_ih) then + iprc(idxsrch(i,2)) = answers(last_j,2) + else - do - if (j > size(answers,1)) then - ! Last resort attempt. - j = psb_bsrch(ih,size(answers,1,kind=psb_ipk_),answers(:,1)) - if (j == -1) then - write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, & - & 'not found : ',size(answers,1),':',answers(:,1) - info = psb_err_internal_error_ - call psb_errpush(psb_err_internal_error_,& - & name,a_err='out bounds srch ih') - goto 9999 + do + if (j > size(answers,1)) then + ! Last resort attempt. + j = psb_bsrch(ih,size(answers,1,kind=psb_ipk_),answers(:,1)) + if (j == -1) then + write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, & + & 'not found : ',size(answers,1),':',answers(:,1) + info = psb_err_internal_error_ + call psb_errpush(psb_err_internal_error_,& + & name,a_err='out bounds srch ih') + goto 9999 + end if end if - end if - if (answers(j,1) == ih) exit - if (answers(j,1) > ih) then - k = j - j = psb_bsrch(ih,k,answers(1:k,1)) - if (j == -1) then - write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, & - & 'not found : ',size(answers,1),':',answers(:,1) - info = psb_err_internal_error_ - call psb_errpush(psb_err_internal_error_,name,a_err='out bounds srch ih') - goto 9999 + if (answers(j,1) == ih) exit + if (answers(j,1) > ih) then + k = j + j = psb_bsrch(ih,k,answers(1:k,1)) + if (j == -1) then + write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, & + & 'not found : ',size(answers,1),':',answers(:,1) + info = psb_err_internal_error_ + call psb_errpush(psb_err_internal_error_,name,a_err='out bounds srch ih') + goto 9999 + end if end if - end if - j = j + 1 - end do - ! Note that the answers here are given in order - ! of sending process, so we are implicitly getting - ! the max process index in case of overlap. - last_ih = ih - do - last_j = j - iprc(idxsrch(i,2)) = answers(j,2) - j = j + 1 - if (j > size(answers,1)) exit - if (answers(j,1) /= ih) exit - end do + j = j + 1 + end do + ! Note that the answers here are given in order + ! of sending process, so we are implicitly getting + ! the max process index in case of overlap. + last_ih = ih + do + last_j = j + iprc(idxsrch(i,2)) = answers(j,2) + j = j + 1 + if (j > size(answers,1)) exit + if (answers(j,1) /= ih) exit + end do + end if + end do + + if (gettime) then + call psb_barrier(ictxt) + t1 = psb_wtime() + t1 = t1 -t0 - tamx - tidx + call psb_amx(ictxt,tamx) + call psb_amx(ictxt,tidx) + call psb_amx(ictxt,t1) + if (me == psb_root_) then + write(psb_out_unit,'(" a2a_owner idx time : ",es10.4)') tidx + write(psb_out_unit,'(" a2a_owner amx time : ",es10.4)') tamx + write(psb_out_unit,'(" a2a_owner remainedr : ",es10.4)') t1 + endif end if - end do - if (gettime) then - call psb_barrier(ictxt) - t1 = psb_wtime() - t1 = t1 -t0 - tamx - tidx - call psb_amx(ictxt,tamx) - call psb_amx(ictxt,tidx) - call psb_amx(ictxt,t1) - if (me == psb_root_) then - write(psb_out_unit,'(" a2a_owner idx time : ",es10.4)') tidx - write(psb_out_unit,'(" a2a_owner amx time : ",es10.4)') tamx - write(psb_out_unit,'(" a2a_owner remainedr : ",es10.4)') t1 - endif end if call psb_erractionrestore(err_act) diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 index c13e7d8b..7ce89661 100644 --- a/base/internals/psi_adjcncy_fnd_owner.F90 +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -61,7 +61,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) integer(psb_lpk_), allocatable :: rmtidx(:) integer(psb_ipk_), allocatable :: tproc(:), lclidx(:) - integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & + integer(psb_mpk_), allocatable :: hsz(:),hidx(:), sdidx(:), rvidx(:),& & sdsz(:), rvsz(:), sdhd(:), rvhd(:), p2pstat(:,:) integer(psb_mpk_) :: prc, p2ptag, iret integer(psb_mpk_) :: icomm, minfo, iictxt @@ -69,7 +69,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) & last_ih, last_j, nidx, nrecv, nadj integer(psb_lpk_) :: mglob, ih integer(psb_ipk_) :: ictxt,np,me - logical, parameter :: gettime=.false., new_impl=.true. + logical, parameter :: gettime=.false., new_impl=.true., a2av_impl=.true., debug=.false. real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx character(len=20) :: name @@ -111,18 +111,19 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) iprc = -1 ! write(0,*) me,name,' Going through ',nidx,nadj - if (new_impl) then + if (a2av_impl) then ! ! First simple minded version with auxiliary arrays ! dimensioned on NP. - ! Could it be improved with a loop based on the maximum length - ! of adj(:) ??? + ! Do the exchange with an alltoallv + ! ! - Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),& - & sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),& - & stat=info) - sdhd(:) = mpi_request_null - rvhd(:) = mpi_request_null + Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1), & + & sdidx(0:np),rvidx(0:np),stat=info) + ! + ! Same send buffer for everybody + ! + sdidx(:) = 0 ! ! First, send sizes according to adjcncy list ! @@ -134,11 +135,11 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& & rvsz,1,psb_mpi_mpk_,icomm,minfo) - hidx(0) = 0 + rvidx(0) = 0 do i=0, np-1 - hidx(i+1) = hidx(i) + rvsz(i) + rvidx(i+1) = rvidx(i) + rvsz(i) end do - hsize = hidx(np) + hsize = rvidx(np) ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) ! ! Second, allocate buffers and exchange data @@ -149,36 +150,10 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') goto 9999 end if - do i = 0, np-1 - if (rvsz(i)>0) then - ! write(0,*) me, ' First receive from ',i,rvsz(i) - prc = psb_get_rank(ictxt,i) - p2ptag = psb_long_swap_tag - !write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc - call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),& - & psb_mpi_lpk_,prc,& - & p2ptag, icomm,rvhd(i),iret) - end if - end do - do j=1, nadj - if (nidx > 0) then - !call psb_snd(ictxt,idx(1:nidx),adj(j)) - prc = psb_get_rank(ictxt,adj(j)) - p2ptag = psb_long_swap_tag - !write(0,*) me, ' First send to ',adj(j),nidx, prc - call mpi_send(idx,nidx,& - & psb_mpi_lpk_,prc,& - & p2ptag, icomm,iret) - end if - end do -!!$ do i = 0, np-1 -!!$ if (rvsz(i)>0) then -!!$ ! write(0,*) me, ' First receive from ',i,rvsz(i) -!!$ call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) -!!$ end if -!!$ end do - call mpi_waitall(np,rvhd,p2pstat,iret) - + + call mpi_alltoallv(idx,sdsz,sdidx,psb_mpi_lpk_,& + & rmtidx,rvsz,rvidx,psb_mpi_lpk_,icomm,iret) + ! ! Third, compute local answers ! @@ -187,116 +162,215 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) tproc(i) = -1 if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me end do + ! - ! At this point we can reuse lclidx to receive messages + ! Fourth, exchange the answers ! - rvhd(:) = mpi_request_null - do j=1, nadj - !write(0,*) me, ' First send to ',adj(j),nidx - if (nidx > 0) then - !call psb_snd(ictxt,idx(1:nidx),adj(j)) - prc = psb_get_rank(ictxt,adj(j)) - p2ptag = psb_int_swap_tag - !write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc - call mpi_irecv(lclidx((j-1)*nidx+1),nidx, & - & psb_mpi_ipk_,prc,& - & p2ptag, icomm,rvhd(j),iret) - end if + ! Adjust sdidx for receive in lclidx array (reused) + do i=0,np-1 + sdidx(i+1) = sdidx(i) + sdsz(i) end do - - ! - ! Fourth, send data back; - ! - do i = 0, np-1 - if (rvsz(i)>0) then - !call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) - prc = psb_get_rank(ictxt,i) - p2ptag = psb_int_swap_tag - !write(0,*) me, ' Second send to ',i,rvsz(i), prc - call mpi_send(tproc(hidx(i)+1),rvsz(i),& - & psb_mpi_ipk_,prc,& - & p2ptag, icomm,iret) + call mpi_alltoallv(tproc,rvsz,rvidx,psb_mpi_ipk_,& + & lclidx,sdsz,sdidx,psb_mpi_ipk_,icomm,iret) + + do i=0, np-1 + if (sdsz(i)>0) then + iprc(1:nidx) = max(iprc(1:nidx), lclidx(sdidx(i)+1:sdidx(i)+sdsz(i))) end if end do - ! - ! Fifth: receive and combine. MAX works because default - ! answer is -1. - ! - call mpi_waitall(np,rvhd,p2pstat,iret) - do j = 1, nadj - !write(0,*) me, ' Second receive from ',adj(j), nidx - !if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) - iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx)) - end do + if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx) else - Allocate(hidx(0:np),hsz(np),& - & sdsz(0:np-1),rvsz(0:np-1),stat=info) - ! - ! First, send sizes according to adjcncy list - ! - sdsz = 0 - do j=1, nadj - sdsz(adj(j)) = nidx - end do - !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) + if (new_impl) then + ! + ! First simple minded version with auxiliary arrays + ! dimensioned on NP. + ! Could it be improved with a loop based on the maximum length + ! of adj(:) ??? + ! + Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),& + & sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),& + & stat=info) + sdhd(:) = mpi_request_null + rvhd(:) = mpi_request_null + ! + ! First, send sizes according to adjcncy list + ! + sdsz = 0 + do j=1, nadj + sdsz(adj(j)) = nidx + end do + !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) - call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& - & rvsz,1,psb_mpi_mpk_,icomm,minfo) - hidx(0) = 0 - do i=0, np-1 - hidx(i+1) = hidx(i) + rvsz(i) - end do - hsize = hidx(np) - ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) - ! - ! Second, allocate buffers and exchange data - ! - Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info) + call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& + & rvsz,1,psb_mpi_mpk_,icomm,minfo) + hidx(0) = 0 + do i=0, np-1 + hidx(i+1) = hidx(i) + rvsz(i) + end do + hsize = hidx(np) + ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) + ! + ! Second, allocate buffers and exchange data + ! + Allocate(rmtidx(hsize),lclidx(max(hsize,nidx*nadj)),tproc(max(hsize,nidx)),stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - do j=1, nadj - !write(0,*) me, ' First send to ',adj(j),nidx - if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j)) - end do - do i = 0, np-1 - if (rvsz(i)>0) then - ! write(0,*) me, ' First receive from ',i,rvsz(i) - call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 end if - end do + do i = 0, np-1 + if (rvsz(i)>0) then + ! write(0,*) me, ' First receive from ',i,rvsz(i) + prc = psb_get_rank(ictxt,i) + p2ptag = psb_long_swap_tag + !write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc + call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),& + & psb_mpi_lpk_,prc,& + & p2ptag, icomm,rvhd(i),iret) + end if + end do + do j=1, nadj + if (nidx > 0) then + !call psb_snd(ictxt,idx(1:nidx),adj(j)) + prc = psb_get_rank(ictxt,adj(j)) + p2ptag = psb_long_swap_tag + !write(0,*) me, ' First send to ',adj(j),nidx, prc + call mpi_send(idx,nidx,& + & psb_mpi_lpk_,prc,& + & p2ptag, icomm,iret) + end if + end do +!!$ do i = 0, np-1 +!!$ if (rvsz(i)>0) then +!!$ ! write(0,*) me, ' First receive from ',i,rvsz(i) +!!$ call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) +!!$ end if +!!$ end do + call mpi_waitall(np,rvhd,p2pstat,iret) - ! - ! Third, compute local answers - ! - call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) - do i=1, hsize - tproc(i) = -1 - if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me - end do + ! + ! Third, compute local answers + ! + call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) + do i=1, hsize + tproc(i) = -1 + if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + end do + ! + ! At this point we can reuse lclidx to receive messages + ! + rvhd(:) = mpi_request_null + do j=1, nadj + !write(0,*) me, ' First send to ',adj(j),nidx + if (nidx > 0) then + !call psb_snd(ictxt,idx(1:nidx),adj(j)) + prc = psb_get_rank(ictxt,adj(j)) + p2ptag = psb_int_swap_tag + !write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc + call mpi_irecv(lclidx((j-1)*nidx+1),nidx, & + & psb_mpi_ipk_,prc,& + & p2ptag, icomm,rvhd(j),iret) + end if + end do - ! - ! Fourth, send data back; - ! - do i = 0, np-1 - if (rvsz(i)>0) then - !write(0,*) me, ' Second send to ',i,rvsz(i) - call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) + ! + ! Fourth, send data back; + ! + do i = 0, np-1 + if (rvsz(i)>0) then + !call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) + prc = psb_get_rank(ictxt,i) + p2ptag = psb_int_swap_tag + !write(0,*) me, ' Second send to ',i,rvsz(i), prc + call mpi_send(tproc(hidx(i)+1),rvsz(i),& + & psb_mpi_ipk_,prc,& + & p2ptag, icomm,iret) + end if + end do + ! + ! Fifth: receive and combine. MAX works because default + ! answer is -1. + ! + call mpi_waitall(np,rvhd,p2pstat,iret) + do j = 1, nadj + !write(0,*) me, ' Second receive from ',adj(j), nidx + !if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) + iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx)) + end do + if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx) + + else + + Allocate(hidx(0:np),hsz(np),& + & sdsz(0:np-1),rvsz(0:np-1),stat=info) + ! + ! First, send sizes according to adjcncy list + ! + sdsz = 0 + do j=1, nadj + sdsz(adj(j)) = nidx + end do + !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) + + call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& + & rvsz,1,psb_mpi_mpk_,icomm,minfo) + hidx(0) = 0 + do i=0, np-1 + hidx(i+1) = hidx(i) + rvsz(i) + end do + hsize = hidx(np) + ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) + ! + ! Second, allocate buffers and exchange data + ! + Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info) + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 end if - end do - ! - ! Fifth: receive and combine. MAX works because default - ! answer is -1. Reuse tproc - ! - do j = 1, nadj - !write(0,*) me, ' Second receive from ',adj(j), nidx - if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) - iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) - end do + do j=1, nadj + !write(0,*) me, ' First send to ',adj(j),nidx + if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j)) + end do + do i = 0, np-1 + if (rvsz(i)>0) then + ! write(0,*) me, ' First receive from ',i,rvsz(i) + call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) + end if + end do + + ! + ! Third, compute local answers + ! + call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) + do i=1, hsize + tproc(i) = -1 + if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + end do + + ! + ! Fourth, send data back; + ! + do i = 0, np-1 + if (rvsz(i)>0) then + !write(0,*) me, ' Second send to ',i,rvsz(i) + call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) + end if + end do + ! + ! Fifth: receive and combine. MAX works because default + ! answer is -1. Reuse tproc + ! + do j = 1, nadj + !write(0,*) me, ' Second receive from ',adj(j), nidx + if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) + iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) + end do + end if end if + call psb_erractionrestore(err_act) return