diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 index 1995253d..a599cc05 100644 --- a/base/internals/psi_adjcncy_fnd_owner.F90 +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -36,16 +36,8 @@ ! Figure out who owns global indices. ! ! Arguments: -! nv - integer Number of indices required on the calling -! process -! idx(:) - integer Required indices on the calling process. -! Note: the indices should be unique! -! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding -! indices -! desc_a - type(psb_desc_type). The communication descriptor. -! info - integer. return code. ! -subroutine psi_adjcncy_fnd_owner(idx,iprc,ladj,idxmap,info) +subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) use psb_serial_mod use psb_const_mod use psb_error_mod @@ -61,21 +53,21 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,ladj,idxmap,info) include 'mpif.h' #endif integer(psb_lpk_), intent(in) :: idx(:) - integer(psb_ipk_), allocatable, intent(out) :: iprc(:) - integer(psb_ipk_), intent(in) :: ladj(:) + integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + integer(psb_ipk_), allocatable, intent(inout) :: adj(:) class(psb_indx_map), intent(in) :: idxmap integer(psb_ipk_), intent(out) :: info integer(psb_lpk_), allocatable :: answers(:,:), idxsrch(:,:), rmtidx(:) - integer(psb_ipk_), allocatable :: tproc(:), lclidx(:) + integer(psb_ipk_), allocatable :: tproc(:), lclidx(:), ladj(:) integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & & sdsz(:),sdidx(:), rvsz(:), rvidx(:) integer(psb_mpk_) :: icomm, minfo, iictxt integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, k,& - & last_ih, last_j, nv + & last_ih, last_j, nidx, nrecv, nadj integer(psb_lpk_) :: mglob, ih - integer(psb_ipk_) :: ictxt,np,me, nresp + integer(psb_ipk_) :: ictxt,np,me logical, parameter :: gettime=.false. real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx character(len=20) :: name @@ -108,14 +100,15 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,ladj,idxmap,info) t0 = psb_wtime() end if - nv = size(idx) - call psb_realloc(nv,iprc,info) + nadj = size(adj) + nidx = size(idx) + call psb_realloc(nidx,iprc,info) if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc') goto 9999 end if iprc = -1 - ! write(0,*) me,name,' Going through ',nv,size(ladj) + ! write(0,*) me,name,' Going through ',nidx,nadj Allocate(hidx(0:np),hsz(np),& & sdsz(0:np-1),sdidx(0:np-1),& @@ -125,35 +118,37 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,ladj,idxmap,info) ! First, send sizes according to adjcncy list ! sdsz = 0 - do j=1, size(ladj) - sdsz(ladj(j)) = nv + do j=1, nadj + sdsz(adj(j)) = nidx end do - !write(0,*)me,' Check on sizes into a2a:',ladj(:),size(ladj),':',sdsz(:) + !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& & rvsz,1,psb_mpi_mpk_,icomm,minfo) + nrecv = 0 hidx(0) = 0 do i=0, np-1 + if (rvsz(i)>0) nrecv = nrecv + 1 hidx(i+1) = hidx(i) + rvsz(i) end do hsize = hidx(np) - write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) + ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) ! ! Second, allocate buffers and exchange data ! - Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nv)),stat=info) + Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info) if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') goto 9999 end if - do j=1, size(ladj) - write(0,*) me, ' First send to ',ladj(j),nv - if (nv > 0) call psb_snd(ictxt,idx(1:nv),ladj(j)) + do j=1, nadj + !write(0,*) me, ' First send to ',adj(j),nidx + if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j)) end do do i = 0, np-1 if (rvsz(i)>0) then - write(0,*) me, ' First receive from ',i,rvsz(i) + ! write(0,*) me, ' First receive from ',i,rvsz(i) call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) end if end do @@ -172,7 +167,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,ladj,idxmap,info) ! do i = 0, np-1 if (rvsz(i)>0) then - write(0,*) me, ' Second send to ',i,rvsz(i) + !write(0,*) me, ' Second send to ',i,rvsz(i) call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) end if end do @@ -180,11 +175,26 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,ladj,idxmap,info) ! Fifth: receive and combine. MAX works because default ! answer is -1. Reuse tproc ! - do j = 1, size(ladj) - write(0,*) me, ' Second receive from ',ladj(j), nv - if (nv > 0) call psb_rcv(ictxt,tproc(1:nv),ladj(j)) - iprc(1:nv) = max(iprc(1:nv), tproc(1:nv)) + do j = 1, nadj + !write(0,*) me, ' Second receive from ',adj(j), nidx + if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) + iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) end do + + ! + ! Now fix adj to be symmetric + ! + call psb_realloc(nadj+nrecv,ladj,info) + ladj(1:nadj) = adj(1:nadj) + do i=0, np-1 + if (rvsz(i)>0) then + nadj = nadj + 1 + ladj(nadj+1:nadj+nrecv) = i + end if + end do + call psb_msort_unique(ladj,nadj) + call psb_realloc(nadj,adj,info) + adj(1:nadj) = ladj(1:nadj) call psb_erractionrestore(err_act) return diff --git a/base/internals/psi_graph_fnd_owner.F90 b/base/internals/psi_graph_fnd_owner.F90 index 99f90fab..e5265f1f 100644 --- a/base/internals/psi_graph_fnd_owner.F90 +++ b/base/internals/psi_graph_fnd_owner.F90 @@ -71,8 +71,8 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & & sdsz(:),sdidx(:), rvsz(:), rvidx(:) integer(psb_mpk_) :: icomm, minfo, iictxt - integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, k,& - & last_ih, last_j, nv, n_answers, n_rest, n_samples, locr_max, nrest_max, nadj, maxspace + integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, nsampl_out,& + & last_ih, last_j, nv, n_answers, n_rest, nsampl_in, locr_max, nrest_max, nadj, maxspace integer(psb_lpk_) :: mglob, ih integer(psb_ipk_) :: ictxt,np,me, nresp logical, parameter :: gettime=.false. @@ -136,53 +136,59 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) ! searching through all processes and searching ! in the neighbourood. ! - ! 1. Select a sample to be sent to all processes; sample - ! size is such that the total size is <= maxspace + ! 1. Select a sample such that the total size is <= maxspace + ! sample query is then sent to all processes ! if (me == 0) write(0,*) 'Looping in graph_fnd_owner: ', nrest_max - n_samples = min(n_rest,max(1,(maxspace+np-1)/np)) + nsampl_in = min(n_rest,max(1,(maxspace+np-1)/np)) ! - ! Choose a sample, should it be done in this simplistic way? + ! Choose a sample, should it be done in this simplistic way? + ! Note: nsampl_in is a hint, not an absolute, hence nsampl_out ! - write(0,*) me,' Into first sampling ',n_samples - call psi_get_sample(idx,iprc,tidx,tsmpl,n_samples,k) - n_samples = min(k,n_samples) - write(0,*) me,' From first sampling ',n_samples + write(0,*) me,' Into first sampling ',nsampl_in + call psi_get_sample(idx,iprc,tidx,tsmpl,nsampl_in,nsampl_out) + nsampl_in = min(nsampl_out,nsampl_in) + write(0,*) me,' From first sampling ',nsampl_in ! ! 2. Do a search on all processes; this is supposed to find ! the owning process for all inputs; ! - call psi_a2a_fnd_owner(tidx(1:n_samples),tprc,idxmap,info) - call psi_cpy_out(iprc,tprc,tsmpl,n_samples,k) - if (k /= n_samples) then - write(0,*) me,'Warning: indices not found by a2a_fnd_owner ',k,n_samples + call psi_a2a_fnd_owner(tidx(1:nsampl_in),tprc,idxmap,info) + call psi_cpy_out(iprc,tprc,tsmpl,nsampl_in,nsampl_out) + if (nsampl_out /= nsampl_in) then + write(0,*) me,'Warning: indices not found by a2a_fnd_owner ',nsampl_out,nsampl_in end if - n_answers = n_answers + k + n_answers = n_answers + nsampl_out n_rest = nv - n_answers ! ! 3. Extract the resulting adjacency list and add it to the ! indxmap; ! - ladj = tprc(1:n_samples) + ladj = tprc(1:nsampl_in) call psb_msort_unique(ladj,nadj) + call psb_realloc(nadj,ladj,info) + ! - ! NOTE: should symmetrize the list... + ! 4. Extract again a sample and do a neighbourhood search + ! so that the total size is <= maxspace + ! (will not be exact since nadj varies with process) ! - call idxmap%xtnd_p_adjcncy(ladj(1:nadj)) - + nsampl_in = min(n_rest,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) + write(0,*) me,' Into second sampling ',nsampl_in + call psi_get_sample(idx,iprc,tidx,tsmpl,nsampl_in,nsampl_out) + nsampl_in = min(nsampl_out,nsampl_in) + write(0,*) me,' From second sampling ',nsampl_in ! - ! 4. Extract a sample and do a neighbourhood search so that the total - ! size is <= maxspace - ! (will not be exact since nadj varies with process) + ! NOTE: the obvious place to symmetrize ladj is inside + ! adjcncy_fnd_owner since there we have the + ! data exchange. + ! Hence, the call to idxmap%xtnd only after this one. ! - n_samples = min(n_rest,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) - write(0,*) me,' Into second sampling ',n_samples - call psi_get_sample(idx,iprc,tidx,tsmpl,n_samples,k) - n_samples = min(k,n_samples) - write(0,*) me,' From second sampling ',n_samples - call psi_adjcncy_fnd_owner(tidx(1:n_samples),tprc,ladj(1:nadj),idxmap,info) - call psi_cpy_out(iprc,tprc,tsmpl,n_samples,k) - n_answers = n_answers + k + call psi_adjcncy_fnd_owner(tidx(1:nsampl_in),tprc,ladj,idxmap,info) + call psi_cpy_out(iprc,tprc,tsmpl,nsampl_in,nsampl_out) + call idxmap%xtnd_p_adjcncy(ladj) + + n_answers = n_answers + nsampl_out n_rest = nv - n_answers nrest_max = n_rest call psb_max(ictxt,nrest_max) diff --git a/base/modules/desc/psb_indx_map_mod.f90 b/base/modules/desc/psb_indx_map_mod.f90 index 318929db..dfb660fc 100644 --- a/base/modules/desc/psb_indx_map_mod.f90 +++ b/base/modules/desc/psb_indx_map_mod.f90 @@ -289,12 +289,12 @@ module psb_indx_map_mod end interface interface - subroutine psi_adjcncy_fnd_owner(idx,iprc,ladj,idxmap,info) + subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) import :: psb_indx_map, psb_ipk_, psb_lpk_ implicit none integer(psb_lpk_), intent(in) :: idx(:) - integer(psb_ipk_), allocatable, intent(out) :: iprc(:) - integer(psb_ipk_), intent(in) :: ladj(:) + integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + integer(psb_ipk_), allocatable, intent(inout) :: adj(:) class(psb_indx_map), intent(in) :: idxmap integer(psb_ipk_), intent(out) :: info end subroutine psi_adjcncy_fnd_owner