New adjcncy and a2a fnd_owner

Reimplement adjcncy_fnd_owner to use alltoallv. Version with
sends/irecv still available under compile time constant.

Reimplement a2a_fnd_owner to use adjcncy_fnd_owner. Older version
still available under compile time constant.
fnd_owner
Salvatore Filippone 5 years ago
parent cf3fce32c3
commit caec98e942

@ -67,7 +67,7 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info)
integer(psb_lpk_), allocatable :: answers(:,:), idxsrch(:,:), hproc(:) integer(psb_lpk_), allocatable :: answers(:,:), idxsrch(:,:), hproc(:)
integer(psb_ipk_), allocatable :: helem(:), hhidx(:) integer(psb_ipk_), allocatable :: helem(:), hhidx(:), tmpadj(:)
integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & integer(psb_mpk_), allocatable :: hsz(:),hidx(:), &
& sdsz(:),sdidx(:), rvsz(:), rvidx(:) & sdsz(:),sdidx(:), rvsz(:), rvidx(:)
integer(psb_mpk_) :: icomm, minfo, iictxt integer(psb_mpk_) :: icomm, minfo, iictxt
@ -75,7 +75,7 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info)
& last_ih, last_j, nv & last_ih, last_j, nv
integer(psb_lpk_) :: mglob, ih integer(psb_lpk_) :: mglob, ih
integer(psb_ipk_) :: ictxt,np,me, nresp integer(psb_ipk_) :: ictxt,np,me, nresp
logical, parameter :: gettime=.false. logical, parameter :: gettime=.false., use_adj=.true.
real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx
character(len=20) :: name character(len=20) :: name
@ -103,190 +103,204 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info)
goto 9999 goto 9999
end if end if
if (gettime) then
t0 = psb_wtime()
end if
nv = size(idx) if (use_adj) then
call psb_realloc(nv,iprc,info) !
if (info /= psb_success_) then ! Reuse the other version by tricking it with an adjcncy list
call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc') ! that contains everybody but ME.
goto 9999 !
end if nv = size(idx)
call psb_realloc(np-1,tmpadj,info)
tmpadj(1:me) = [(i,i=0,me-1)]
tmpadj(me+1:np-1) = [(i,i=me+1,np-1)]
call psi_adjcncy_fnd_owner(idx,iprc,tmpadj,idxmap,info)
else
if (gettime) then
t0 = psb_wtime()
end if
nv = size(idx)
call psb_realloc(nv,iprc,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc')
goto 9999
end if
! !
! The basic idea is very simple. ! The basic idea is very simple.
! First we collect (to all) all the requests. ! First we collect (to all) all the requests.
Allocate(hidx(np+1),hsz(np),& Allocate(hidx(np+1),hsz(np),&
& sdsz(0:np-1),sdidx(0:np-1),& & sdsz(0:np-1),sdidx(0:np-1),&
& rvsz(0:np-1),rvidx(0:np-1),& & rvsz(0:np-1),rvidx(0:np-1),&
& stat=info) & stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999 goto 9999
end if end if
hsz = 0 hsz = 0
hsz(me+1) = nv hsz(me+1) = nv
call psb_amx(iictxt,hsz) call psb_amx(iictxt,hsz)
hidx(1) = 0 hidx(1) = 0
do i=1, np do i=1, np
hidx(i+1) = hidx(i) + hsz(i) hidx(i+1) = hidx(i) + hsz(i)
end do end do
hsize = hidx(np+1) hsize = hidx(np+1)
Allocate(helem(hsize),hproc(hsize),stat=info) Allocate(helem(hsize),hproc(hsize),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999 goto 9999
end if end if
if (gettime) then if (gettime) then
t3 = psb_wtime() t3 = psb_wtime()
end if end if
call mpi_allgatherv(idx,hsz(me+1),psb_mpi_lpk_,& call mpi_allgatherv(idx,hsz(me+1),psb_mpi_lpk_,&
& hproc,hsz,hidx,psb_mpi_lpk_,& & hproc,hsz,hidx,psb_mpi_lpk_,&
& icomm,minfo) & icomm,minfo)
if (gettime) then if (gettime) then
tamx = psb_wtime() - t3 tamx = psb_wtime() - t3
end if end if
! Second, we figure out locally whether we own the indices (whoever is ! Second, we figure out locally whether we own the indices (whoever is
! asking for them). ! asking for them).
if (gettime) then if (gettime) then
t3 = psb_wtime() t3 = psb_wtime()
end if end if
call idxmap%g2l(hproc(1:hsize),helem(1:hsize),info,owned=.true.) call idxmap%g2l(hproc(1:hsize),helem(1:hsize),info,owned=.true.)
if (gettime) then if (gettime) then
tidx = psb_wtime()-t3 tidx = psb_wtime()-t3
end if end if
if (info == psb_err_iarray_outside_bounds_) info = psb_success_ if (info == psb_err_iarray_outside_bounds_) info = psb_success_
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_idx_cnv') call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_idx_cnv')
goto 9999 goto 9999
end if end if
! Third: we build the answers for those indices we own, ! Third: we build the answers for those indices we own,
! with a section for each process asking. ! with a section for each process asking.
hidx = hidx +1 hidx = hidx +1
j = 0 j = 0
do ip = 0, np-1 do ip = 0, np-1
sdidx(ip) = j sdidx(ip) = j
sdsz(ip) = 0 sdsz(ip) = 0
do i=hidx(ip+1), hidx(ip+1+1)-1 do i=hidx(ip+1), hidx(ip+1+1)-1
if ((0 < helem(i)).and. (helem(i) <= n_row)) then if ((0 < helem(i)).and. (helem(i) <= n_row)) then
j = j + 1 j = j + 1
hproc(j) = hproc(i) hproc(j) = hproc(i)
sdsz(ip) = sdsz(ip) + 1 sdsz(ip) = sdsz(ip) + 1
end if end if
end do
end do end do
end do
if (gettime) then if (gettime) then
t3 = psb_wtime() t3 = psb_wtime()
end if end if
! Collect all the answers with alltoallv (need sizes) ! Collect all the answers with alltoallv (need sizes)
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
& rvsz,1,psb_mpi_mpk_,icomm,minfo) & rvsz,1,psb_mpi_mpk_,icomm,minfo)
isz = sum(rvsz) isz = sum(rvsz)
allocate(answers(isz,2),idxsrch(nv,2),stat=info) allocate(answers(isz,2),idxsrch(nv,2),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999 goto 9999
end if end if
j = 0 j = 0
do ip=0, np-1 do ip=0, np-1
rvidx(ip) = j rvidx(ip) = j
j = j + rvsz(ip) j = j + rvsz(ip)
end do end do
call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_lpk_,& call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_lpk_,&
& answers(:,1),rvsz,rvidx,psb_mpi_lpk_,& & answers(:,1),rvsz,rvidx,psb_mpi_lpk_,&
& icomm,minfo) & icomm,minfo)
if (gettime) then if (gettime) then
tamx = psb_wtime() - t3 + tamx tamx = psb_wtime() - t3 + tamx
end if end if
j = 1 j = 1
do ip = 0,np-1 do ip = 0,np-1
do k=1,rvsz(ip) do k=1,rvsz(ip)
answers(j,2) = ip answers(j,2) = ip
j = j + 1 j = j + 1
end do
end do end do
end do ! Sort the answers and the requests, so we can
! Sort the answers and the requests, so we can ! match them efficiently
! match them efficiently call psb_msort(answers(:,1),ix=answers(:,2),&
call psb_msort(answers(:,1),ix=answers(:,2),& & flag=psb_sort_keep_idx_)
& flag=psb_sort_keep_idx_) idxsrch(1:nv,1) = idx(1:nv)
idxsrch(1:nv,1) = idx(1:nv) call psb_msort(idxsrch(1:nv,1),ix=idxsrch(1:nv,2))
call psb_msort(idxsrch(1:nv,1),ix=idxsrch(1:nv,2))
! Now extract the answers for our local query ! Now extract the answers for our local query
last_ih = -1 last_ih = -1
last_j = -1 last_j = -1
j = 1 j = 1
do i=1, nv do i=1, nv
ih = idxsrch(i,1) ih = idxsrch(i,1)
if (ih == last_ih) then if (ih == last_ih) then
iprc(idxsrch(i,2)) = answers(last_j,2) iprc(idxsrch(i,2)) = answers(last_j,2)
else else
do do
if (j > size(answers,1)) then if (j > size(answers,1)) then
! Last resort attempt. ! Last resort attempt.
j = psb_bsrch(ih,size(answers,1,kind=psb_ipk_),answers(:,1)) j = psb_bsrch(ih,size(answers,1,kind=psb_ipk_),answers(:,1))
if (j == -1) then if (j == -1) then
write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, & write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, &
& 'not found : ',size(answers,1),':',answers(:,1) & 'not found : ',size(answers,1),':',answers(:,1)
info = psb_err_internal_error_ info = psb_err_internal_error_
call psb_errpush(psb_err_internal_error_,& call psb_errpush(psb_err_internal_error_,&
& name,a_err='out bounds srch ih') & name,a_err='out bounds srch ih')
goto 9999 goto 9999
end if
end if end if
end if if (answers(j,1) == ih) exit
if (answers(j,1) == ih) exit if (answers(j,1) > ih) then
if (answers(j,1) > ih) then k = j
k = j j = psb_bsrch(ih,k,answers(1:k,1))
j = psb_bsrch(ih,k,answers(1:k,1)) if (j == -1) then
if (j == -1) then write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, &
write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, & & 'not found : ',size(answers,1),':',answers(:,1)
& 'not found : ',size(answers,1),':',answers(:,1) info = psb_err_internal_error_
info = psb_err_internal_error_ call psb_errpush(psb_err_internal_error_,name,a_err='out bounds srch ih')
call psb_errpush(psb_err_internal_error_,name,a_err='out bounds srch ih') goto 9999
goto 9999 end if
end if end if
end if
j = j + 1 j = j + 1
end do end do
! Note that the answers here are given in order ! Note that the answers here are given in order
! of sending process, so we are implicitly getting ! of sending process, so we are implicitly getting
! the max process index in case of overlap. ! the max process index in case of overlap.
last_ih = ih last_ih = ih
do do
last_j = j last_j = j
iprc(idxsrch(i,2)) = answers(j,2) iprc(idxsrch(i,2)) = answers(j,2)
j = j + 1 j = j + 1
if (j > size(answers,1)) exit if (j > size(answers,1)) exit
if (answers(j,1) /= ih) exit if (answers(j,1) /= ih) exit
end do end do
end if
end do
if (gettime) then
call psb_barrier(ictxt)
t1 = psb_wtime()
t1 = t1 -t0 - tamx - tidx
call psb_amx(ictxt,tamx)
call psb_amx(ictxt,tidx)
call psb_amx(ictxt,t1)
if (me == psb_root_) then
write(psb_out_unit,'(" a2a_owner idx time : ",es10.4)') tidx
write(psb_out_unit,'(" a2a_owner amx time : ",es10.4)') tamx
write(psb_out_unit,'(" a2a_owner remainedr : ",es10.4)') t1
endif
end if end if
end do
if (gettime) then
call psb_barrier(ictxt)
t1 = psb_wtime()
t1 = t1 -t0 - tamx - tidx
call psb_amx(ictxt,tamx)
call psb_amx(ictxt,tidx)
call psb_amx(ictxt,t1)
if (me == psb_root_) then
write(psb_out_unit,'(" a2a_owner idx time : ",es10.4)') tidx
write(psb_out_unit,'(" a2a_owner amx time : ",es10.4)') tamx
write(psb_out_unit,'(" a2a_owner remainedr : ",es10.4)') t1
endif
end if end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -61,7 +61,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
integer(psb_lpk_), allocatable :: rmtidx(:) integer(psb_lpk_), allocatable :: rmtidx(:)
integer(psb_ipk_), allocatable :: tproc(:), lclidx(:) integer(psb_ipk_), allocatable :: tproc(:), lclidx(:)
integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & integer(psb_mpk_), allocatable :: hsz(:),hidx(:), sdidx(:), rvidx(:),&
& sdsz(:), rvsz(:), sdhd(:), rvhd(:), p2pstat(:,:) & sdsz(:), rvsz(:), sdhd(:), rvhd(:), p2pstat(:,:)
integer(psb_mpk_) :: prc, p2ptag, iret integer(psb_mpk_) :: prc, p2ptag, iret
integer(psb_mpk_) :: icomm, minfo, iictxt integer(psb_mpk_) :: icomm, minfo, iictxt
@ -69,7 +69,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
& last_ih, last_j, nidx, nrecv, nadj & last_ih, last_j, nidx, nrecv, nadj
integer(psb_lpk_) :: mglob, ih integer(psb_lpk_) :: mglob, ih
integer(psb_ipk_) :: ictxt,np,me integer(psb_ipk_) :: ictxt,np,me
logical, parameter :: gettime=.false., new_impl=.true. logical, parameter :: gettime=.false., new_impl=.true., a2av_impl=.true., debug=.false.
real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx
character(len=20) :: name character(len=20) :: name
@ -111,18 +111,19 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
iprc = -1 iprc = -1
! write(0,*) me,name,' Going through ',nidx,nadj ! write(0,*) me,name,' Going through ',nidx,nadj
if (new_impl) then if (a2av_impl) then
! !
! First simple minded version with auxiliary arrays ! First simple minded version with auxiliary arrays
! dimensioned on NP. ! dimensioned on NP.
! Could it be improved with a loop based on the maximum length ! Do the exchange with an alltoallv
! of adj(:) ??? !
! !
Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),& Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1), &
& sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),& & sdidx(0:np),rvidx(0:np),stat=info)
& stat=info) !
sdhd(:) = mpi_request_null ! Same send buffer for everybody
rvhd(:) = mpi_request_null !
sdidx(:) = 0
! !
! First, send sizes according to adjcncy list ! First, send sizes according to adjcncy list
! !
@ -134,11 +135,11 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
& rvsz,1,psb_mpi_mpk_,icomm,minfo) & rvsz,1,psb_mpi_mpk_,icomm,minfo)
hidx(0) = 0 rvidx(0) = 0
do i=0, np-1 do i=0, np-1
hidx(i+1) = hidx(i) + rvsz(i) rvidx(i+1) = rvidx(i) + rvsz(i)
end do end do
hsize = hidx(np) hsize = rvidx(np)
! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:)
! !
! Second, allocate buffers and exchange data ! Second, allocate buffers and exchange data
@ -149,36 +150,10 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999 goto 9999
end if end if
do i = 0, np-1
if (rvsz(i)>0) then call mpi_alltoallv(idx,sdsz,sdidx,psb_mpi_lpk_,&
! write(0,*) me, ' First receive from ',i,rvsz(i) & rmtidx,rvsz,rvidx,psb_mpi_lpk_,icomm,iret)
prc = psb_get_rank(ictxt,i)
p2ptag = psb_long_swap_tag
!write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc
call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),&
& psb_mpi_lpk_,prc,&
& p2ptag, icomm,rvhd(i),iret)
end if
end do
do j=1, nadj
if (nidx > 0) then
!call psb_snd(ictxt,idx(1:nidx),adj(j))
prc = psb_get_rank(ictxt,adj(j))
p2ptag = psb_long_swap_tag
!write(0,*) me, ' First send to ',adj(j),nidx, prc
call mpi_send(idx,nidx,&
& psb_mpi_lpk_,prc,&
& p2ptag, icomm,iret)
end if
end do
!!$ do i = 0, np-1
!!$ if (rvsz(i)>0) then
!!$ ! write(0,*) me, ' First receive from ',i,rvsz(i)
!!$ call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i)
!!$ end if
!!$ end do
call mpi_waitall(np,rvhd,p2pstat,iret)
! !
! Third, compute local answers ! Third, compute local answers
! !
@ -187,116 +162,215 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
tproc(i) = -1 tproc(i) = -1
if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me
end do end do
! !
! At this point we can reuse lclidx to receive messages ! Fourth, exchange the answers
! !
rvhd(:) = mpi_request_null ! Adjust sdidx for receive in lclidx array (reused)
do j=1, nadj do i=0,np-1
!write(0,*) me, ' First send to ',adj(j),nidx sdidx(i+1) = sdidx(i) + sdsz(i)
if (nidx > 0) then
!call psb_snd(ictxt,idx(1:nidx),adj(j))
prc = psb_get_rank(ictxt,adj(j))
p2ptag = psb_int_swap_tag
!write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc
call mpi_irecv(lclidx((j-1)*nidx+1),nidx, &
& psb_mpi_ipk_,prc,&
& p2ptag, icomm,rvhd(j),iret)
end if
end do end do
call mpi_alltoallv(tproc,rvsz,rvidx,psb_mpi_ipk_,&
! & lclidx,sdsz,sdidx,psb_mpi_ipk_,icomm,iret)
! Fourth, send data back;
! do i=0, np-1
do i = 0, np-1 if (sdsz(i)>0) then
if (rvsz(i)>0) then iprc(1:nidx) = max(iprc(1:nidx), lclidx(sdidx(i)+1:sdidx(i)+sdsz(i)))
!call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i)
prc = psb_get_rank(ictxt,i)
p2ptag = psb_int_swap_tag
!write(0,*) me, ' Second send to ',i,rvsz(i), prc
call mpi_send(tproc(hidx(i)+1),rvsz(i),&
& psb_mpi_ipk_,prc,&
& p2ptag, icomm,iret)
end if end if
end do end do
!
! Fifth: receive and combine. MAX works because default
! answer is -1.
!
call mpi_waitall(np,rvhd,p2pstat,iret)
do j = 1, nadj
!write(0,*) me, ' Second receive from ',adj(j), nidx
!if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j))
iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx))
end do
if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx)
else else
Allocate(hidx(0:np),hsz(np),& if (new_impl) then
& sdsz(0:np-1),rvsz(0:np-1),stat=info) !
! ! First simple minded version with auxiliary arrays
! First, send sizes according to adjcncy list ! dimensioned on NP.
! ! Could it be improved with a loop based on the maximum length
sdsz = 0 ! of adj(:) ???
do j=1, nadj !
sdsz(adj(j)) = nidx Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),&
end do & sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),&
!write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) & stat=info)
sdhd(:) = mpi_request_null
rvhd(:) = mpi_request_null
!
! First, send sizes according to adjcncy list
!
sdsz = 0
do j=1, nadj
sdsz(adj(j)) = nidx
end do
!write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:)
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
& rvsz,1,psb_mpi_mpk_,icomm,minfo) & rvsz,1,psb_mpi_mpk_,icomm,minfo)
hidx(0) = 0 hidx(0) = 0
do i=0, np-1 do i=0, np-1
hidx(i+1) = hidx(i) + rvsz(i) hidx(i+1) = hidx(i) + rvsz(i)
end do end do
hsize = hidx(np) hsize = hidx(np)
! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:)
! !
! Second, allocate buffers and exchange data ! Second, allocate buffers and exchange data
! !
Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info) Allocate(rmtidx(hsize),lclidx(max(hsize,nidx*nadj)),tproc(max(hsize,nidx)),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999 goto 9999
end if
do j=1, nadj
!write(0,*) me, ' First send to ',adj(j),nidx
if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j))
end do
do i = 0, np-1
if (rvsz(i)>0) then
! write(0,*) me, ' First receive from ',i,rvsz(i)
call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i)
end if end if
end do do i = 0, np-1
if (rvsz(i)>0) then
! write(0,*) me, ' First receive from ',i,rvsz(i)
prc = psb_get_rank(ictxt,i)
p2ptag = psb_long_swap_tag
!write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc
call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),&
& psb_mpi_lpk_,prc,&
& p2ptag, icomm,rvhd(i),iret)
end if
end do
do j=1, nadj
if (nidx > 0) then
!call psb_snd(ictxt,idx(1:nidx),adj(j))
prc = psb_get_rank(ictxt,adj(j))
p2ptag = psb_long_swap_tag
!write(0,*) me, ' First send to ',adj(j),nidx, prc
call mpi_send(idx,nidx,&
& psb_mpi_lpk_,prc,&
& p2ptag, icomm,iret)
end if
end do
!!$ do i = 0, np-1
!!$ if (rvsz(i)>0) then
!!$ ! write(0,*) me, ' First receive from ',i,rvsz(i)
!!$ call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i)
!!$ end if
!!$ end do
call mpi_waitall(np,rvhd,p2pstat,iret)
! !
! Third, compute local answers ! Third, compute local answers
! !
call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.)
do i=1, hsize do i=1, hsize
tproc(i) = -1 tproc(i) = -1
if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me
end do end do
!
! At this point we can reuse lclidx to receive messages
!
rvhd(:) = mpi_request_null
do j=1, nadj
!write(0,*) me, ' First send to ',adj(j),nidx
if (nidx > 0) then
!call psb_snd(ictxt,idx(1:nidx),adj(j))
prc = psb_get_rank(ictxt,adj(j))
p2ptag = psb_int_swap_tag
!write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc
call mpi_irecv(lclidx((j-1)*nidx+1),nidx, &
& psb_mpi_ipk_,prc,&
& p2ptag, icomm,rvhd(j),iret)
end if
end do
! !
! Fourth, send data back; ! Fourth, send data back;
! !
do i = 0, np-1 do i = 0, np-1
if (rvsz(i)>0) then if (rvsz(i)>0) then
!write(0,*) me, ' Second send to ',i,rvsz(i) !call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i)
call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) prc = psb_get_rank(ictxt,i)
p2ptag = psb_int_swap_tag
!write(0,*) me, ' Second send to ',i,rvsz(i), prc
call mpi_send(tproc(hidx(i)+1),rvsz(i),&
& psb_mpi_ipk_,prc,&
& p2ptag, icomm,iret)
end if
end do
!
! Fifth: receive and combine. MAX works because default
! answer is -1.
!
call mpi_waitall(np,rvhd,p2pstat,iret)
do j = 1, nadj
!write(0,*) me, ' Second receive from ',adj(j), nidx
!if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j))
iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx))
end do
if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx)
else
Allocate(hidx(0:np),hsz(np),&
& sdsz(0:np-1),rvsz(0:np-1),stat=info)
!
! First, send sizes according to adjcncy list
!
sdsz = 0
do j=1, nadj
sdsz(adj(j)) = nidx
end do
!write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:)
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
& rvsz,1,psb_mpi_mpk_,icomm,minfo)
hidx(0) = 0
do i=0, np-1
hidx(i+1) = hidx(i) + rvsz(i)
end do
hsize = hidx(np)
! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:)
!
! Second, allocate buffers and exchange data
!
Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
end if end if
end do do j=1, nadj
! !write(0,*) me, ' First send to ',adj(j),nidx
! Fifth: receive and combine. MAX works because default if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j))
! answer is -1. Reuse tproc end do
! do i = 0, np-1
do j = 1, nadj if (rvsz(i)>0) then
!write(0,*) me, ' Second receive from ',adj(j), nidx ! write(0,*) me, ' First receive from ',i,rvsz(i)
if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i)
iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) end if
end do end do
!
! Third, compute local answers
!
call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.)
do i=1, hsize
tproc(i) = -1
if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me
end do
!
! Fourth, send data back;
!
do i = 0, np-1
if (rvsz(i)>0) then
!write(0,*) me, ' Second send to ',i,rvsz(i)
call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i)
end if
end do
!
! Fifth: receive and combine. MAX works because default
! answer is -1. Reuse tproc
!
do j = 1, nadj
!write(0,*) me, ' Second receive from ',adj(j), nidx
if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j))
iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx))
end do
end if
end if end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

Loading…
Cancel
Save