diff --git a/base/internals/psi_a2a_fnd_owner.F90 b/base/internals/psi_a2a_fnd_owner.F90 index 231bc87b..3ee6ef9a 100644 --- a/base/internals/psi_a2a_fnd_owner.F90 +++ b/base/internals/psi_a2a_fnd_owner.F90 @@ -66,16 +66,11 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info) integer(psb_ipk_), intent(out) :: info - integer(psb_lpk_), allocatable :: answers(:,:), idxsrch(:,:), hproc(:) - integer(psb_ipk_), allocatable :: helem(:), hhidx(:), tmpadj(:) - integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & - & sdsz(:),sdidx(:), rvsz(:), rvidx(:) + integer(psb_ipk_), allocatable :: tmpadj(:) integer(psb_mpk_) :: icomm, minfo, iictxt - integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, k,& - & last_ih, last_j, nv + integer(psb_ipk_) :: i,n_row,n_col,err_act,nv integer(psb_lpk_) :: mglob, ih integer(psb_ipk_) :: ictxt,np,me, nresp - logical, parameter :: gettime=.false., use_adj=.true. real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx character(len=20) :: name @@ -103,205 +98,15 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info) goto 9999 end if - - if (use_adj) then - ! - ! Reuse the other version by tricking it with an adjcncy list - ! that contains everybody but ME. - ! - nv = size(idx) - call psb_realloc(np-1,tmpadj,info) - tmpadj(1:me) = [(i,i=0,me-1)] - tmpadj(me+1:np-1) = [(i,i=me+1,np-1)] - call psi_adjcncy_fnd_owner(idx,iprc,tmpadj,idxmap,info) - - else - if (gettime) then - t0 = psb_wtime() - end if - nv = size(idx) - call psb_realloc(nv,iprc,info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc') - goto 9999 - end if - - - ! - ! The basic idea is very simple. - ! First we collect (to all) all the requests. - Allocate(hidx(np+1),hsz(np),& - & sdsz(0:np-1),sdidx(0:np-1),& - & rvsz(0:np-1),rvidx(0:np-1),& - & stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - hsz = 0 - hsz(me+1) = nv - call psb_amx(iictxt,hsz) - hidx(1) = 0 - do i=1, np - hidx(i+1) = hidx(i) + hsz(i) - end do - hsize = hidx(np+1) - Allocate(helem(hsize),hproc(hsize),stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - if (gettime) then - t3 = psb_wtime() - end if - - call mpi_allgatherv(idx,hsz(me+1),psb_mpi_lpk_,& - & hproc,hsz,hidx,psb_mpi_lpk_,& - & icomm,minfo) - if (gettime) then - tamx = psb_wtime() - t3 - end if - - ! Second, we figure out locally whether we own the indices (whoever is - ! asking for them). - if (gettime) then - t3 = psb_wtime() - end if - - call idxmap%g2l(hproc(1:hsize),helem(1:hsize),info,owned=.true.) - if (gettime) then - tidx = psb_wtime()-t3 - end if - if (info == psb_err_iarray_outside_bounds_) info = psb_success_ - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_idx_cnv') - goto 9999 - end if - - ! Third: we build the answers for those indices we own, - ! with a section for each process asking. - hidx = hidx +1 - j = 0 - do ip = 0, np-1 - sdidx(ip) = j - sdsz(ip) = 0 - do i=hidx(ip+1), hidx(ip+1+1)-1 - if ((0 < helem(i)).and. (helem(i) <= n_row)) then - j = j + 1 - hproc(j) = hproc(i) - sdsz(ip) = sdsz(ip) + 1 - end if - end do - end do - - if (gettime) then - t3 = psb_wtime() - end if - - ! Collect all the answers with alltoallv (need sizes) - call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& - & rvsz,1,psb_mpi_mpk_,icomm,minfo) - - isz = sum(rvsz) - - allocate(answers(isz,2),idxsrch(nv,2),stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - j = 0 - do ip=0, np-1 - rvidx(ip) = j - j = j + rvsz(ip) - end do - call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_lpk_,& - & answers(:,1),rvsz,rvidx,psb_mpi_lpk_,& - & icomm,minfo) - if (gettime) then - tamx = psb_wtime() - t3 + tamx - end if - j = 1 - do ip = 0,np-1 - do k=1,rvsz(ip) - answers(j,2) = ip - j = j + 1 - end do - end do - ! Sort the answers and the requests, so we can - ! match them efficiently - call psb_msort(answers(:,1),ix=answers(:,2),& - & flag=psb_sort_keep_idx_) - idxsrch(1:nv,1) = idx(1:nv) - call psb_msort(idxsrch(1:nv,1),ix=idxsrch(1:nv,2)) - - ! Now extract the answers for our local query - last_ih = -1 - last_j = -1 - j = 1 - do i=1, nv - ih = idxsrch(i,1) - if (ih == last_ih) then - iprc(idxsrch(i,2)) = answers(last_j,2) - else - - do - if (j > size(answers,1)) then - ! Last resort attempt. - j = psb_bsrch(ih,size(answers,1,kind=psb_ipk_),answers(:,1)) - if (j == -1) then - write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, & - & 'not found : ',size(answers,1),':',answers(:,1) - info = psb_err_internal_error_ - call psb_errpush(psb_err_internal_error_,& - & name,a_err='out bounds srch ih') - goto 9999 - end if - end if - if (answers(j,1) == ih) exit - if (answers(j,1) > ih) then - k = j - j = psb_bsrch(ih,k,answers(1:k,1)) - if (j == -1) then - write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, & - & 'not found : ',size(answers,1),':',answers(:,1) - info = psb_err_internal_error_ - call psb_errpush(psb_err_internal_error_,name,a_err='out bounds srch ih') - goto 9999 - end if - end if - - j = j + 1 - end do - ! Note that the answers here are given in order - ! of sending process, so we are implicitly getting - ! the max process index in case of overlap. - last_ih = ih - do - last_j = j - iprc(idxsrch(i,2)) = answers(j,2) - j = j + 1 - if (j > size(answers,1)) exit - if (answers(j,1) /= ih) exit - end do - end if - end do - - if (gettime) then - call psb_barrier(ictxt) - t1 = psb_wtime() - t1 = t1 -t0 - tamx - tidx - call psb_amx(ictxt,tamx) - call psb_amx(ictxt,tidx) - call psb_amx(ictxt,t1) - if (me == psb_root_) then - write(psb_out_unit,'(" a2a_owner idx time : ",es10.4)') tidx - write(psb_out_unit,'(" a2a_owner amx time : ",es10.4)') tamx - write(psb_out_unit,'(" a2a_owner remainedr : ",es10.4)') t1 - endif - end if - end if + ! + ! Reuse the other version by tricking it with an adjcncy list + ! that contains everybody but ME. + ! + nv = size(idx) + call psb_realloc(np-1,tmpadj,info) + tmpadj(1:me) = [(i,i=0,me-1)] + tmpadj(me+1:np-1) = [(i,i=me+1,np-1)] + call psi_adjcncy_fnd_owner(idx,iprc,tmpadj,idxmap,info) call psb_erractionrestore(err_act) return diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 index 36f8ff20..fd1422f1 100644 --- a/base/internals/psi_adjcncy_fnd_owner.F90 +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -235,7 +235,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) end do do j=1, nadj if (nidx > 0) then - !call psb_snd(ictxt,idx(1:nidx),adj(j)) prc = psb_get_rank(ictxt,adj(j)) p2ptag = psb_long_swap_tag !write(0,*) me, ' First send to ',adj(j),nidx, prc @@ -244,12 +243,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) & p2ptag, icomm,iret) end if end do -!!$ do i = 0, np-1 -!!$ if (rvsz(i)>0) then -!!$ ! write(0,*) me, ' First receive from ',i,rvsz(i) -!!$ call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) -!!$ end if -!!$ end do call mpi_waitall(np,rvhd,p2pstat,iret) ! @@ -267,7 +260,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) do j=1, nadj !write(0,*) me, ' First send to ',adj(j),nidx if (nidx > 0) then - !call psb_snd(ictxt,idx(1:nidx),adj(j)) prc = psb_get_rank(ictxt,adj(j)) p2ptag = psb_int_swap_tag !write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc @@ -282,7 +274,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) ! do i = 0, np-1 if (rvsz(i)>0) then - !call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) prc = psb_get_rank(ictxt,i) p2ptag = psb_int_swap_tag !write(0,*) me, ' Second send to ',i,rvsz(i), prc @@ -297,8 +288,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) ! call mpi_waitall(np,rvhd,p2pstat,iret) do j = 1, nadj - !write(0,*) me, ' Second receive from ',adj(j), nidx - !if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx)) end do if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx) diff --git a/base/internals/psi_graph_fnd_owner.F90 b/base/internals/psi_graph_fnd_owner.F90 index 971fd1be..7800e3de 100644 --- a/base/internals/psi_graph_fnd_owner.F90 +++ b/base/internals/psi_graph_fnd_owner.F90 @@ -66,13 +66,12 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) integer(psb_ipk_), intent(out) :: info - integer(psb_lpk_), allocatable :: answers(:,:), idxsrch(:,:), hproc(:), tidx(:) - integer(psb_ipk_), allocatable :: helem(:), hhidx(:), tprc(:), tsmpl(:), ladj(:) - integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & - & sdsz(:),sdidx(:), rvsz(:), rvidx(:) + integer(psb_lpk_), allocatable :: tidx(:) + integer(psb_ipk_), allocatable :: tprc(:), tsmpl(:), ladj(:) + integer(psb_mpk_), allocatable :: hsz(:),hidx(:) integer(psb_mpk_) :: icomm, minfo, iictxt - integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j,ipnt, nsampl_out,& - & last_ih, last_j, nv, n_answers, n_rest, nsampl_in, locr_max, & + integer(psb_ipk_) :: i,n_row,n_col,err_act,ip,j,ipnt, nsampl_out,& + & nv, n_answers, n_rest, nsampl_in, locr_max, & & nrest_max, nadj, maxspace, mxnsin integer(psb_lpk_) :: mglob, ih integer(psb_ipk_) :: ictxt,np,me, nresp