diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 index 56f208d8..42fc2a8c 100644 --- a/base/internals/psi_adjcncy_fnd_owner.F90 +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -53,16 +53,16 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) include 'mpif.h' #endif integer(psb_lpk_), intent(in) :: idx(:) - integer(psb_ipk_), allocatable, intent(out) :: iprc(:) - integer(psb_ipk_), allocatable, intent(inout) :: adj(:) + integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + integer(psb_ipk_), intent(in) :: adj(:) class(psb_indx_map), intent(in) :: idxmap integer(psb_ipk_), intent(out) :: info - integer(psb_lpk_), allocatable :: answers(:,:), idxsrch(:,:), rmtidx(:) - integer(psb_ipk_), allocatable :: tproc(:), lclidx(:), ladj(:) + integer(psb_lpk_), allocatable :: rmtidx(:) + integer(psb_ipk_), allocatable :: tproc(:), lclidx(:) integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & - & sdsz(:),sdidx(:), rvsz(:), rvidx(:) + & sdsz(:), rvsz(:) integer(psb_mpk_) :: icomm, minfo, iictxt integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, k,& & last_ih, last_j, nidx, nrecv, nadj @@ -111,9 +111,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) ! write(0,*) me,name,' Going through ',nidx,nadj Allocate(hidx(0:np),hsz(np),& - & sdsz(0:np-1),sdidx(0:np-1),& - & rvsz(0:np-1),rvidx(0:np-1),& - & stat=info) + & sdsz(0:np-1),rvsz(0:np-1),stat=info) ! ! First, send sizes according to adjcncy list ! @@ -125,7 +123,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& & rvsz,1,psb_mpi_mpk_,icomm,minfo) - hidx(0) = 0 + hidx(0) = 0 do i=0, np-1 hidx(i+1) = hidx(i) + rvsz(i) end do @@ -178,12 +176,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) end do - - ! - ! Now fix adj to be symmetric - ! - call psi_symm_dep_list(rvsz,adj,idxmap,info,flag=psi_symm_flag_inrv_) - if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/base/internals/psi_graph_fnd_owner.F90 b/base/internals/psi_graph_fnd_owner.F90 index e5265f1f..db22f2f8 100644 --- a/base/internals/psi_graph_fnd_owner.F90 +++ b/base/internals/psi_graph_fnd_owner.F90 @@ -71,8 +71,9 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & & sdsz(:),sdidx(:), rvsz(:), rvidx(:) integer(psb_mpk_) :: icomm, minfo, iictxt - integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, nsampl_out,& - & last_ih, last_j, nv, n_answers, n_rest, nsampl_in, locr_max, nrest_max, nadj, maxspace + integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j,ipnt, nsampl_out,& + & last_ih, last_j, nv, n_answers, n_rest, nsampl_in, locr_max, & + & nrest_max, nadj, maxspace, mxnsin integer(psb_lpk_) :: mglob, ih integer(psb_ipk_) :: ictxt,np,me, nresp logical, parameter :: gettime=.false. @@ -103,12 +104,20 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) goto 9999 end if - locr_max = n_row - call psb_max(ictxt,locr_max) ! ! Choice of maxspace should be adjusted to account for a default - ! "sensible" size and/or a user-specified value - maxspace = 2*locr_max + ! "sensible" size and/or a user-specified value + ! + block + integer(psb_ipk_), parameter :: nt=4 + integer(psb_ipk_) :: v(2) + v(1) = n_row + v(2) = psb_cd_get_maxspace() + call psb_max(ictxt,v) + locr_max = v(1) + maxspace = min(nt*locr_max,v(2)) + maxspace = max(maxspace,np) + end block nv = size(idx) call psb_realloc(nv,iprc,info) @@ -139,16 +148,17 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) ! 1. Select a sample such that the total size is <= maxspace ! sample query is then sent to all processes ! - if (me == 0) write(0,*) 'Looping in graph_fnd_owner: ', nrest_max + ! if (me == 0) write(0,*) 'Looping in graph_fnd_owner: ', nrest_max nsampl_in = min(n_rest,max(1,(maxspace+np-1)/np)) ! ! Choose a sample, should it be done in this simplistic way? ! Note: nsampl_in is a hint, not an absolute, hence nsampl_out ! - write(0,*) me,' Into first sampling ',nsampl_in - call psi_get_sample(idx,iprc,tidx,tsmpl,nsampl_in,nsampl_out) + ipnt = 1 +!!$ write(0,*) me,' Into first sampling ',nsampl_in + call psi_get_sample(ipnt, idx,iprc,tidx,tsmpl,nsampl_in,nsampl_out) nsampl_in = min(nsampl_out,nsampl_in) - write(0,*) me,' From first sampling ',nsampl_in +!!$ write(0,*) me,' From first sampling ',nsampl_in ! ! 2. Do a search on all processes; this is supposed to find ! the owning process for all inputs; @@ -172,23 +182,27 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) ! 4. Extract again a sample and do a neighbourhood search ! so that the total size is <= maxspace ! (will not be exact since nadj varies with process) - ! - nsampl_in = min(n_rest,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) - write(0,*) me,' Into second sampling ',nsampl_in - call psi_get_sample(idx,iprc,tidx,tsmpl,nsampl_in,nsampl_out) - nsampl_in = min(nsampl_out,nsampl_in) - write(0,*) me,' From second sampling ',nsampl_in + ! Need to set up a proper loop here to have a complete + ! sweep over the input vector. ! - ! NOTE: the obvious place to symmetrize ladj is inside - ! adjcncy_fnd_owner since there we have the - ! data exchange. - ! Hence, the call to idxmap%xtnd only after this one. - ! - call psi_adjcncy_fnd_owner(tidx(1:nsampl_in),tprc,ladj,idxmap,info) - call psi_cpy_out(iprc,tprc,tsmpl,nsampl_in,nsampl_out) +!!$ write(0,*) me,' After a2a ',n_rest + nsampl_in = min(n_rest,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) + mxnsin = nsampl_in + call psb_max(ictxt,mxnsin) +!!$ write(0,*) me, ' mxnsin ',mxnsin + if (.false.) then + write(0,*) me,' Into second sampling ',nsampl_in + call psi_get_sample(ipnt, idx,iprc,tidx,tsmpl,nsampl_in,nsampl_out) + nsampl_in = min(nsampl_out,nsampl_in) + write(0,*) me,' From second sampling ',nsampl_in + call psi_adjcncy_fnd_owner(tidx(1:nsampl_in),tprc,ladj,idxmap,info) + call psi_cpy_out(iprc,tprc,tsmpl,nsampl_in,nsampl_out) + n_answers = n_answers + nsampl_out + else + if (mxnsin>0) call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers) + end if call idxmap%xtnd_p_adjcncy(ladj) - n_answers = n_answers + nsampl_out n_rest = nv - n_answers nrest_max = n_rest call psb_max(ictxt,nrest_max) @@ -205,28 +219,40 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) return contains - subroutine psi_get_sample(idx,iprc,tidx,tsmpl,ns_in,ns_out) + + subroutine psi_get_sample(ipntidx,idx,iprc,tidx,tsmpl,ns_in,ns_out) implicit none - integer(psb_lpk_), intent(in) :: idx(:) - integer(psb_ipk_), intent(in) :: ns_in, iprc(:) - integer(psb_lpk_), intent(out) :: tidx(:) - integer(psb_ipk_), intent(out) :: tsmpl(:), ns_out + integer(psb_ipk_), intent(inout) :: ipntidx + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), intent(in) :: ns_in, iprc(:) + integer(psb_lpk_), intent(out) :: tidx(:) + integer(psb_ipk_), intent(out) :: tsmpl(:), ns_out ! - integer(psb_ipk_) :: j, nv + integer(psb_ipk_) :: nv, ns nv = size(idx) ! ! Choose a sample, should it be done in this simplistic way? ! + ns = ns_in + ! + ! ns_in == 0 means that on the outside we figure there's + ! nothing left, but we are here because we have to synchronize. + ! Make sure we sweep through the entire vector immediately + ! + if (ns == 0) ns = nv ns_out = 0 - do j=1, nv - if (iprc(j) == -1) then - ns_out = ns_out + 1 - tsmpl(ns_out) = j - tidx(ns_out) = idx(j) + + do while (ipntidx<= nv) + if (iprc(ipntidx) == -1) then + ns_out = ns_out + 1 + tsmpl(ns_out) = ipntidx + tidx(ns_out) = idx(ipntidx) end if - if (ns_out >= ns_in) exit + ipntidx = ipntidx + 1 + if (ns_out >= ns) exit end do + end subroutine psi_get_sample subroutine psi_cpy_out(iprc,tprc,tsmpl,ns_in,ns_out) @@ -246,5 +272,42 @@ contains end if end do end subroutine psi_cpy_out + + subroutine psi_adj_fnd_sweep(idx,iprc,adj,idxmap,n_samples,n_answers) + implicit none + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), intent(in) :: n_samples + integer(psb_ipk_), intent(inout) :: iprc(:), n_answers + integer(psb_ipk_), intent(in) :: adj(:) + class(psb_indx_map), intent(inout) :: idxmap + ! + integer(psb_ipk_) :: ipnt, ns_in, ns_out, n_rem, ictxt, me, np, isw + integer(psb_lpk_), allocatable :: tidx(:) + integer(psb_ipk_), allocatable :: tsmpl(:) + + ictxt = idxmap%get_ctxt() + call psb_info(ictxt,me,np) + call psb_realloc(n_samples,tidx,info) + call psb_realloc(n_samples,tsmpl,info) + ipnt = 1 + isw = 1 + do + !write(0,*) me,' Into sampling ',n_samples + call psi_get_sample(ipnt, idx,iprc,tidx,tsmpl,n_samples,ns_out) + ns_in = min(n_samples,ns_out) + !write(0,*) me,' From second sampling ',ns_out + call psi_adjcncy_fnd_owner(tidx(1:ns_in),tprc,ladj,idxmap,info) + call psi_cpy_out(iprc,tprc,tsmpl,ns_in,ns_out) + write(0,*) me,' Sweep ',isw,' answers:',ns_out + n_answers = n_answers + ns_out + n_rem = size(idx)-ipnt + call psb_max(ictxt,n_rem) + write(0,*) me,' Sweep ',isw,n_rem, ipnt, n_samples + if (n_rem <= 0) exit + isw = isw + 1 + end do + + + end subroutine psi_adj_fnd_sweep end subroutine psi_graph_fnd_owner diff --git a/base/modules/desc/psb_desc_mod.F90 b/base/modules/desc/psb_desc_mod.F90 index f4984cd5..9728e970 100644 --- a/base/modules/desc/psb_desc_mod.F90 +++ b/base/modules/desc/psb_desc_mod.F90 @@ -285,6 +285,14 @@ module psb_desc_mod module procedure psb_cdfree end interface psb_free + interface psb_cd_set_maxspace + module procedure psb_cd_set_maxspace + end interface psb_cd_set_maxspace + + interface psb_cd_get_maxspace + module procedure psb_cd_get_maxspace + end interface psb_cd_get_maxspace + interface psb_cd_set_large_threshold module procedure psb_i_cd_set_large_threshold end interface psb_cd_set_large_threshold @@ -301,7 +309,8 @@ module psb_desc_mod & cd_g2ls2_ins, cd_g2lv1_ins, cd_g2lv2_ins, cd_fnd_owner - integer(psb_lpk_), private, save :: cd_large_threshold=psb_default_large_threshold + integer(psb_lpk_), private, save :: cd_large_threshold = psb_default_large_threshold + integer(psb_ipk_), private, save :: cd_maxspace = 1000*1000 contains @@ -350,10 +359,25 @@ contains function psb_cd_get_large_threshold() result(val) implicit none - integer(psb_ipk_) :: val + integer(psb_lpk_) :: val val = cd_large_threshold end function psb_cd_get_large_threshold + + subroutine psb_cd_set_maxspace(ith) + implicit none + integer(psb_ipk_), intent(in) :: ith + if (ith > 0) then + cd_maxspace = ith + end if + end subroutine psb_cd_set_maxspace + + function psb_cd_get_maxspace() result(val) + implicit none + integer(psb_ipk_) :: val + val = cd_maxspace + end function psb_cd_get_maxspace + function psb_cd_is_large_size(m) result(val) use psb_penv_mod diff --git a/base/modules/desc/psb_indx_map_mod.f90 b/base/modules/desc/psb_indx_map_mod.f90 index 35cca97a..eed22819 100644 --- a/base/modules/desc/psb_indx_map_mod.f90 +++ b/base/modules/desc/psb_indx_map_mod.f90 @@ -317,10 +317,10 @@ module psb_indx_map_mod subroutine psi_symm_dep_list(rvsz,adj,idxmap,info,flag) import :: psb_indx_map, psb_ipk_, psb_lpk_, psb_mpk_ implicit none - integer(psb_mpk_), intent(inout) :: rvsz(:) - integer(psb_ipk_), allocatable, intent(inout) :: adj(:) - class(psb_indx_map), intent(in) :: idxmap - integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_), intent(inout) :: rvsz(:) + integer(psb_ipk_), intent(in) :: adj(:) + class(psb_indx_map), intent(in) :: idxmap + integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: flag end subroutine psi_symm_dep_list end interface diff --git a/test/pargen/psb_d_pde3d.f90 b/test/pargen/psb_d_pde3d.f90 index 74508a87..ec7e1f1b 100644 --- a/test/pargen/psb_d_pde3d.f90 +++ b/test/pargen/psb_d_pde3d.f90 @@ -236,6 +236,7 @@ contains call psb_info(ictxt, iam, np) call psb_cd_set_large_threshold(1000) + call psb_cd_set_maxspace(-1) if (present(f)) then f_ => f