diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 index 42fc2a8c..0a1cbef7 100644 --- a/base/internals/psi_adjcncy_fnd_owner.F90 +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -62,13 +62,14 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) integer(psb_lpk_), allocatable :: rmtidx(:) integer(psb_ipk_), allocatable :: tproc(:), lclidx(:) integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & - & sdsz(:), rvsz(:) + & sdsz(:), rvsz(:), sdhd(:), rvhd(:), p2pstat(:,:) + integer(psb_mpk_) :: prc, p2ptag, iret integer(psb_mpk_) :: icomm, minfo, iictxt integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, k,& & last_ih, last_j, nidx, nrecv, nadj integer(psb_lpk_) :: mglob, ih integer(psb_ipk_) :: ictxt,np,me - logical, parameter :: gettime=.false. + logical, parameter :: gettime=.false., new_impl=.true. real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx character(len=20) :: name @@ -110,73 +111,192 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) iprc = -1 ! write(0,*) me,name,' Going through ',nidx,nadj - Allocate(hidx(0:np),hsz(np),& - & sdsz(0:np-1),rvsz(0:np-1),stat=info) - ! - ! First, send sizes according to adjcncy list - ! - sdsz = 0 - do j=1, nadj - sdsz(adj(j)) = nidx - end do - !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) - - call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& - & rvsz,1,psb_mpi_mpk_,icomm,minfo) - hidx(0) = 0 - do i=0, np-1 - hidx(i+1) = hidx(i) + rvsz(i) - end do - hsize = hidx(np) - ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) - ! - ! Second, allocate buffers and exchange data - ! - Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info) - - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - do j=1, nadj - !write(0,*) me, ' First send to ',adj(j),nidx - if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j)) - end do - do i = 0, np-1 - if (rvsz(i)>0) then - ! write(0,*) me, ' First receive from ',i,rvsz(i) - call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) + if (new_impl) then + ! + ! First simple minded version with auxiliary arrays + ! dimensioned on NP. + ! Could it be improved with a loop based on the maximum length + ! of adj(:) ??? + ! + Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),& + & sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),& + & stat=info) + sdhd(:) = mpi_request_null + rvhd(:) = mpi_request_null + ! + ! First, send sizes according to adjcncy list + ! + sdsz = 0 + do j=1, nadj + sdsz(adj(j)) = nidx + end do + !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) + + call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& + & rvsz,1,psb_mpi_mpk_,icomm,minfo) + hidx(0) = 0 + do i=0, np-1 + hidx(i+1) = hidx(i) + rvsz(i) + end do + hsize = hidx(np) + ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) + ! + ! Second, allocate buffers and exchange data + ! + Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info) + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 end if - end do - - ! - ! Third, compute local answers - ! - call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) - do i=1, hsize - tproc(i) = -1 - if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me - end do - - ! - ! Fourth, send data back; - ! - do i = 0, np-1 - if (rvsz(i)>0) then - !write(0,*) me, ' Second send to ',i,rvsz(i) - call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) + do i = 0, np-1 + if (rvsz(i)>0) then + ! write(0,*) me, ' First receive from ',i,rvsz(i) + call psb_get_rank(prc,ictxt,i) + p2ptag = psb_long_swap_tag + write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc + call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),& + & psb_mpi_lpk_,prc,& + & p2ptag, icomm,rvhd(i),iret) + end if + end do + do j=1, nadj + if (nidx > 0) then + !call psb_snd(ictxt,idx(1:nidx),adj(j)) + call psb_get_rank(prc,ictxt,adj(j)) + p2ptag = psb_long_swap_tag + write(0,*) me, ' First send to ',adj(j),nidx, prc + call mpi_send(idx,nidx,& + & psb_mpi_lpk_,prc,& + & p2ptag, icomm,iret) + end if + end do +!!$ do i = 0, np-1 +!!$ if (rvsz(i)>0) then +!!$ ! write(0,*) me, ' First receive from ',i,rvsz(i) +!!$ call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) +!!$ end if +!!$ end do + call mpi_waitall(np,rvhd,p2pstat,iret) + + ! + ! Third, compute local answers + ! + call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) + do i=1, hsize + tproc(i) = -1 + if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + end do + ! + ! At this point we can reuse lclidx to receive messages + ! + rvhd(:) = mpi_request_null + do j=1, nadj + !write(0,*) me, ' First send to ',adj(j),nidx + if (nidx > 0) then + !call psb_snd(ictxt,idx(1:nidx),adj(j)) + call psb_get_rank(prc,ictxt,adj(j)) + p2ptag = psb_int_swap_tag + write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc + call mpi_irecv(lclidx((j-1)*nidx+1),nidx, & + & psb_mpi_ipk_,prc,& + & p2ptag, icomm,rvhd(j),iret) + end if + end do + + ! + ! Fourth, send data back; + ! + do i = 0, np-1 + if (rvsz(i)>0) then + !call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) + call psb_get_rank(prc,ictxt,i) + p2ptag = psb_int_swap_tag + write(0,*) me, ' Second send to ',i,rvsz(i), prc + call mpi_send(tproc(hidx(i)+1),rvsz(i),& + & psb_mpi_ipk_,prc,& + & p2ptag, icomm,iret) + end if + end do + ! + ! Fifth: receive and combine. MAX works because default + ! answer is -1. + ! + call mpi_waitall(np,rvhd,p2pstat,iret) + do j = 1, nadj + !write(0,*) me, ' Second receive from ',adj(j), nidx + !if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) + iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx)) + end do + + else + Allocate(hidx(0:np),hsz(np),& + & sdsz(0:np-1),rvsz(0:np-1),stat=info) + ! + ! First, send sizes according to adjcncy list + ! + sdsz = 0 + do j=1, nadj + sdsz(adj(j)) = nidx + end do + !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) + + call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& + & rvsz,1,psb_mpi_mpk_,icomm,minfo) + hidx(0) = 0 + do i=0, np-1 + hidx(i+1) = hidx(i) + rvsz(i) + end do + hsize = hidx(np) + ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) + ! + ! Second, allocate buffers and exchange data + ! + Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info) + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 end if - end do - ! - ! Fifth: receive and combine. MAX works because default - ! answer is -1. Reuse tproc - ! - do j = 1, nadj - !write(0,*) me, ' Second receive from ',adj(j), nidx - if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) - iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) - end do - + do j=1, nadj + !write(0,*) me, ' First send to ',adj(j),nidx + if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j)) + end do + do i = 0, np-1 + if (rvsz(i)>0) then + ! write(0,*) me, ' First receive from ',i,rvsz(i) + call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) + end if + end do + + ! + ! Third, compute local answers + ! + call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) + do i=1, hsize + tproc(i) = -1 + if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + end do + + ! + ! Fourth, send data back; + ! + do i = 0, np-1 + if (rvsz(i)>0) then + !write(0,*) me, ' Second send to ',i,rvsz(i) + call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) + end if + end do + ! + ! Fifth: receive and combine. MAX works because default + ! answer is -1. Reuse tproc + ! + do j = 1, nadj + !write(0,*) me, ' Second receive from ',adj(j), nidx + if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) + iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) + end do + end if call psb_erractionrestore(err_act) return