Merge branch 'fnd_owner' of https://github.com/sfilippone/psblas3 into fnd_owner

fnd_owner
Salvatore Filippone 5 years ago
commit c845a7881e

@ -66,16 +66,11 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info)
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
integer(psb_lpk_), allocatable :: answers(:,:), idxsrch(:,:), hproc(:) integer(psb_ipk_), allocatable :: tmpadj(:)
integer(psb_ipk_), allocatable :: helem(:), hhidx(:), tmpadj(:)
integer(psb_mpk_), allocatable :: hsz(:),hidx(:), &
& sdsz(:),sdidx(:), rvsz(:), rvidx(:)
integer(psb_mpk_) :: icomm, minfo, iictxt integer(psb_mpk_) :: icomm, minfo, iictxt
integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, k,& integer(psb_ipk_) :: i,n_row,n_col,err_act,nv
& last_ih, last_j, nv
integer(psb_lpk_) :: mglob, ih integer(psb_lpk_) :: mglob, ih
integer(psb_ipk_) :: ictxt,np,me, nresp integer(psb_ipk_) :: ictxt,np,me, nresp
logical, parameter :: gettime=.false., use_adj=.true.
real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx
character(len=20) :: name character(len=20) :: name
@ -103,8 +98,6 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info)
goto 9999 goto 9999
end if end if
if (use_adj) then
! !
! Reuse the other version by tricking it with an adjcncy list ! Reuse the other version by tricking it with an adjcncy list
! that contains everybody but ME. ! that contains everybody but ME.
@ -115,194 +108,6 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info)
tmpadj(me+1:np-1) = [(i,i=me+1,np-1)] tmpadj(me+1:np-1) = [(i,i=me+1,np-1)]
call psi_adjcncy_fnd_owner(idx,iprc,tmpadj,idxmap,info) call psi_adjcncy_fnd_owner(idx,iprc,tmpadj,idxmap,info)
else
if (gettime) then
t0 = psb_wtime()
end if
nv = size(idx)
call psb_realloc(nv,iprc,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc')
goto 9999
end if
!
! The basic idea is very simple.
! First we collect (to all) all the requests.
Allocate(hidx(np+1),hsz(np),&
& sdsz(0:np-1),sdidx(0:np-1),&
& rvsz(0:np-1),rvidx(0:np-1),&
& stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
end if
hsz = 0
hsz(me+1) = nv
call psb_amx(iictxt,hsz)
hidx(1) = 0
do i=1, np
hidx(i+1) = hidx(i) + hsz(i)
end do
hsize = hidx(np+1)
Allocate(helem(hsize),hproc(hsize),stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
end if
if (gettime) then
t3 = psb_wtime()
end if
call mpi_allgatherv(idx,hsz(me+1),psb_mpi_lpk_,&
& hproc,hsz,hidx,psb_mpi_lpk_,&
& icomm,minfo)
if (gettime) then
tamx = psb_wtime() - t3
end if
! Second, we figure out locally whether we own the indices (whoever is
! asking for them).
if (gettime) then
t3 = psb_wtime()
end if
call idxmap%g2l(hproc(1:hsize),helem(1:hsize),info,owned=.true.)
if (gettime) then
tidx = psb_wtime()-t3
end if
if (info == psb_err_iarray_outside_bounds_) info = psb_success_
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_idx_cnv')
goto 9999
end if
! Third: we build the answers for those indices we own,
! with a section for each process asking.
hidx = hidx +1
j = 0
do ip = 0, np-1
sdidx(ip) = j
sdsz(ip) = 0
do i=hidx(ip+1), hidx(ip+1+1)-1
if ((0 < helem(i)).and. (helem(i) <= n_row)) then
j = j + 1
hproc(j) = hproc(i)
sdsz(ip) = sdsz(ip) + 1
end if
end do
end do
if (gettime) then
t3 = psb_wtime()
end if
! Collect all the answers with alltoallv (need sizes)
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
& rvsz,1,psb_mpi_mpk_,icomm,minfo)
isz = sum(rvsz)
allocate(answers(isz,2),idxsrch(nv,2),stat=info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
goto 9999
end if
j = 0
do ip=0, np-1
rvidx(ip) = j
j = j + rvsz(ip)
end do
call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_lpk_,&
& answers(:,1),rvsz,rvidx,psb_mpi_lpk_,&
& icomm,minfo)
if (gettime) then
tamx = psb_wtime() - t3 + tamx
end if
j = 1
do ip = 0,np-1
do k=1,rvsz(ip)
answers(j,2) = ip
j = j + 1
end do
end do
! Sort the answers and the requests, so we can
! match them efficiently
call psb_msort(answers(:,1),ix=answers(:,2),&
& flag=psb_sort_keep_idx_)
idxsrch(1:nv,1) = idx(1:nv)
call psb_msort(idxsrch(1:nv,1),ix=idxsrch(1:nv,2))
! Now extract the answers for our local query
last_ih = -1
last_j = -1
j = 1
do i=1, nv
ih = idxsrch(i,1)
if (ih == last_ih) then
iprc(idxsrch(i,2)) = answers(last_j,2)
else
do
if (j > size(answers,1)) then
! Last resort attempt.
j = psb_bsrch(ih,size(answers,1,kind=psb_ipk_),answers(:,1))
if (j == -1) then
write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, &
& 'not found : ',size(answers,1),':',answers(:,1)
info = psb_err_internal_error_
call psb_errpush(psb_err_internal_error_,&
& name,a_err='out bounds srch ih')
goto 9999
end if
end if
if (answers(j,1) == ih) exit
if (answers(j,1) > ih) then
k = j
j = psb_bsrch(ih,k,answers(1:k,1))
if (j == -1) then
write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, &
& 'not found : ',size(answers,1),':',answers(:,1)
info = psb_err_internal_error_
call psb_errpush(psb_err_internal_error_,name,a_err='out bounds srch ih')
goto 9999
end if
end if
j = j + 1
end do
! Note that the answers here are given in order
! of sending process, so we are implicitly getting
! the max process index in case of overlap.
last_ih = ih
do
last_j = j
iprc(idxsrch(i,2)) = answers(j,2)
j = j + 1
if (j > size(answers,1)) exit
if (answers(j,1) /= ih) exit
end do
end if
end do
if (gettime) then
call psb_barrier(ictxt)
t1 = psb_wtime()
t1 = t1 -t0 - tamx - tidx
call psb_amx(ictxt,tamx)
call psb_amx(ictxt,tidx)
call psb_amx(ictxt,t1)
if (me == psb_root_) then
write(psb_out_unit,'(" a2a_owner idx time : ",es10.4)') tidx
write(psb_out_unit,'(" a2a_owner amx time : ",es10.4)') tamx
write(psb_out_unit,'(" a2a_owner remainedr : ",es10.4)') t1
endif
end if
end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

@ -235,7 +235,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
end do end do
do j=1, nadj do j=1, nadj
if (nidx > 0) then if (nidx > 0) then
!call psb_snd(ictxt,idx(1:nidx),adj(j))
prc = psb_get_rank(ictxt,adj(j)) prc = psb_get_rank(ictxt,adj(j))
p2ptag = psb_long_swap_tag p2ptag = psb_long_swap_tag
!write(0,*) me, ' First send to ',adj(j),nidx, prc !write(0,*) me, ' First send to ',adj(j),nidx, prc
@ -244,12 +243,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
& p2ptag, icomm,iret) & p2ptag, icomm,iret)
end if end if
end do end do
!!$ do i = 0, np-1
!!$ if (rvsz(i)>0) then
!!$ ! write(0,*) me, ' First receive from ',i,rvsz(i)
!!$ call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i)
!!$ end if
!!$ end do
call mpi_waitall(np,rvhd,p2pstat,iret) call mpi_waitall(np,rvhd,p2pstat,iret)
! !
@ -267,7 +260,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
do j=1, nadj do j=1, nadj
!write(0,*) me, ' First send to ',adj(j),nidx !write(0,*) me, ' First send to ',adj(j),nidx
if (nidx > 0) then if (nidx > 0) then
!call psb_snd(ictxt,idx(1:nidx),adj(j))
prc = psb_get_rank(ictxt,adj(j)) prc = psb_get_rank(ictxt,adj(j))
p2ptag = psb_int_swap_tag p2ptag = psb_int_swap_tag
!write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc !write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc
@ -282,7 +274,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
! !
do i = 0, np-1 do i = 0, np-1
if (rvsz(i)>0) then if (rvsz(i)>0) then
!call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i)
prc = psb_get_rank(ictxt,i) prc = psb_get_rank(ictxt,i)
p2ptag = psb_int_swap_tag p2ptag = psb_int_swap_tag
!write(0,*) me, ' Second send to ',i,rvsz(i), prc !write(0,*) me, ' Second send to ',i,rvsz(i), prc
@ -297,8 +288,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
! !
call mpi_waitall(np,rvhd,p2pstat,iret) call mpi_waitall(np,rvhd,p2pstat,iret)
do j = 1, nadj do j = 1, nadj
!write(0,*) me, ' Second receive from ',adj(j), nidx
!if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j))
iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx)) iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx))
end do end do
if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx) if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx)

@ -69,8 +69,8 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info)
integer(psb_lpk_), allocatable :: tidx(:) integer(psb_lpk_), allocatable :: tidx(:)
integer(psb_ipk_), allocatable :: tprc(:), tsmpl(:), ladj(:) integer(psb_ipk_), allocatable :: tprc(:), tsmpl(:), ladj(:)
integer(psb_mpk_) :: icomm, minfo, iictxt integer(psb_mpk_) :: icomm, minfo, iictxt
integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j,ipnt, nsampl_out,& integer(psb_ipk_) :: i,n_row,n_col,err_act,ip,j,ipnt, nsampl_out,&
& last_ih, last_j, nv, n_answers, n_rest, nsampl_in, locr_max, & & nv, n_answers, n_rest, nsampl_in, locr_max, &
& nrest_max, nadj, maxspace, mxnsin & nrest_max, nadj, maxspace, mxnsin
integer(psb_lpk_) :: mglob, ih integer(psb_lpk_) :: mglob, ih
integer(psb_ipk_) :: ictxt,np,me, nresp integer(psb_ipk_) :: ictxt,np,me, nresp

Loading…
Cancel
Save