diff --git a/base/internals/Makefile b/base/internals/Makefile index 471fc383..1ebdbcc0 100644 --- a/base/internals/Makefile +++ b/base/internals/Makefile @@ -3,10 +3,11 @@ include ../../Make.inc FOBJS = psi_compute_size.o psi_crea_bnd_elem.o psi_crea_index.o \ psi_crea_ovr_elem.o psi_bld_tmpovrl.o psi_dl_check.o \ psi_bld_tmphalo.o psi_sort_dl.o \ - psi_desc_impl.o psi_list_search.o psi_srtlist.o + psi_indx_map_fnd_owner.o \ + psi_desc_impl.o psi_hash_impl.o psi_list_search.o psi_srtlist.o -MPFOBJS = psi_desc_index.o psi_extrct_dl.o \ - psi_fnd_owner.o psb_indx_map_fnd_owner.o +MPFOBJS = psi_desc_index.o psi_extrct_dl.o psi_fnd_owner.o psi_a2a_fnd_owner.o \ + psi_graph_fnd_owner.o psi_adjcncy_fnd_owner.o psi_symm_dep_list.o LIBDIR=.. INCDIR=.. diff --git a/base/internals/psb_indx_map_fnd_owner.F90 b/base/internals/psb_indx_map_fnd_owner.F90 deleted file mode 100644 index b6b870fa..00000000 --- a/base/internals/psb_indx_map_fnd_owner.F90 +++ /dev/null @@ -1,325 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -! -! File: psi_fnd_owner.f90 -! -! Subroutine: psi_fnd_owner -! Figure out who owns global indices. -! -! Arguments: -! nv - integer Number of indices required on the calling -! process -! idx(:) - integer Required indices on the calling process. -! Note: the indices should be unique! -! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding -! indices -! desc_a - type(psb_desc_type). The communication descriptor. -! info - integer. return code. -! -subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info) - use psb_serial_mod - use psb_const_mod - use psb_error_mod - use psb_penv_mod - use psb_realloc_mod - use psb_indx_map_mod, psb_protect_name => psb_indx_map_fnd_owner -#ifdef MPI_MOD - use mpi -#endif - - implicit none -#ifdef MPI_H - include 'mpif.h' -#endif - integer(psb_lpk_), intent(in) :: idx(:) - integer(psb_ipk_), allocatable, intent(out) :: iprc(:) - class(psb_indx_map), intent(in) :: idxmap - integer(psb_ipk_), intent(out) :: info - - - integer(psb_lpk_), allocatable :: answers(:,:), idxsrch(:,:), hproc(:) - integer(psb_ipk_), allocatable :: helem(:), hhidx(:) - integer(psb_mpk_), allocatable :: hsz(:),hidx(:), & - & sdsz(:),sdidx(:), rvsz(:), rvidx(:) - integer(psb_mpk_) :: icomm, minfo, iictxt - integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, k,& - & last_ih, last_j, nv - integer(psb_lpk_) :: mglob, ih - integer(psb_ipk_) :: ictxt,np,me, nresp - logical, parameter :: gettime=.false. - real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx - character(len=20) :: name - - info = psb_success_ - name = 'psb_indx_map_fnd_owner' - call psb_erractionsave(err_act) - - ictxt = idxmap%get_ctxt() - icomm = idxmap%get_mpic() - mglob = idxmap%get_gr() - n_row = idxmap%get_lr() - n_col = idxmap%get_lc() - iictxt = ictxt - - call psb_info(ictxt, me, np) - - if (np == -1) then - info = psb_err_context_error_ - call psb_errpush(info,name) - goto 9999 - endif - - if (.not.(idxmap%is_valid())) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='invalid idxmap') - goto 9999 - end if - - if (gettime) then - t0 = psb_wtime() - end if - - nv = size(idx) - call psb_realloc(nv,iprc,info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc') - goto 9999 - end if - - if (associated(idxmap%parts)) then - ! Use function shortcut -!!$ write(0,*) me,trim(name),' indxmap%parts shortcut' - Allocate(hhidx(np), stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - do i=1, nv - call idxmap%parts(idx(i),mglob,np,hhidx,nresp) - if (nresp > 0) then - iprc(i) = hhidx(1) - else - iprc(i) = -1 - end if - end do - - else if (allocated(idxmap%tempvg)) then -!!$ write(0,*) me,trim(name),' indxmap%tempvg shortcut' - ! Use temporary vector - do i=1, nv - iprc(i) = idxmap%tempvg(idx(i)) - end do - - else - - ! - ! The basic idea is very simple. - ! First we collect (to all) all the requests. - Allocate(hidx(np+1),hsz(np),& - & sdsz(0:np-1),sdidx(0:np-1),& - & rvsz(0:np-1),rvidx(0:np-1),& - & stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - hsz = 0 - hsz(me+1) = nv - call psb_amx(iictxt,hsz) - hidx(1) = 0 - do i=1, np - hidx(i+1) = hidx(i) + hsz(i) - end do - hsize = hidx(np+1) - Allocate(helem(hsize),hproc(hsize),stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - if (gettime) then - t3 = psb_wtime() - end if - - call mpi_allgatherv(idx,hsz(me+1),psb_mpi_lpk_,& - & hproc,hsz,hidx,psb_mpi_lpk_,& - & icomm,minfo) - if (gettime) then - tamx = psb_wtime() - t3 - end if - - ! Second, we figure out locally whether we own the indices (whoever is - ! asking for them). - if (gettime) then - t3 = psb_wtime() - end if - - call idxmap%g2l(hproc(1:hsize),helem(1:hsize),info,owned=.true.) - if (gettime) then - tidx = psb_wtime()-t3 - end if - if (info == psb_err_iarray_outside_bounds_) info = psb_success_ - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_idx_cnv') - goto 9999 - end if - - ! Third: we build the answers for those indices we own, - ! with a section for each process asking. - hidx = hidx +1 - j = 0 - do ip = 0, np-1 - sdidx(ip) = j - sdsz(ip) = 0 - do i=hidx(ip+1), hidx(ip+1+1)-1 - if ((0 < helem(i)).and. (helem(i) <= n_row)) then - j = j + 1 - hproc(j) = hproc(i) - sdsz(ip) = sdsz(ip) + 1 - end if - end do - end do - - if (gettime) then - t3 = psb_wtime() - end if - - ! Collect all the answers with alltoallv (need sizes) - call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& - & rvsz,1,psb_mpi_mpk_,icomm,minfo) - - isz = sum(rvsz) - - allocate(answers(isz,2),idxsrch(nv,2),stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - j = 0 - do ip=0, np-1 - rvidx(ip) = j - j = j + rvsz(ip) - end do - call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_lpk_,& - & answers(:,1),rvsz,rvidx,psb_mpi_lpk_,& - & icomm,minfo) - if (gettime) then - tamx = psb_wtime() - t3 + tamx - end if - j = 1 - do ip = 0,np-1 - do k=1,rvsz(ip) - answers(j,2) = ip - j = j + 1 - end do - end do - ! Sort the answers and the requests, so we can - ! match them efficiently - call psb_msort(answers(:,1),ix=answers(:,2),& - & flag=psb_sort_keep_idx_) - idxsrch(1:nv,1) = idx(1:nv) - call psb_msort(idxsrch(1:nv,1),ix=idxsrch(1:nv,2)) - - ! Now extract the answers for our local query - last_ih = -1 - last_j = -1 - j = 1 - do i=1, nv - ih = idxsrch(i,1) - if (ih == last_ih) then - iprc(idxsrch(i,2)) = answers(last_j,2) - else - - do - if (j > size(answers,1)) then - ! Last resort attempt. - j = psb_bsrch(ih,size(answers,1,kind=psb_ipk_),answers(:,1)) - if (j == -1) then - write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, & - & 'not found : ',size(answers,1),':',answers(:,1) - info = psb_err_internal_error_ - call psb_errpush(psb_err_internal_error_,& - & name,a_err='out bounds srch ih') - goto 9999 - end if - end if - if (answers(j,1) == ih) exit - if (answers(j,1) > ih) then - k = j - j = psb_bsrch(ih,k,answers(1:k,1)) - if (j == -1) then - write(psb_err_unit,*) me,'psi_fnd_owner: searching for ',ih, & - & 'not found : ',size(answers,1),':',answers(:,1) - info = psb_err_internal_error_ - call psb_errpush(psb_err_internal_error_,name,a_err='out bounds srch ih') - goto 9999 - end if - end if - - j = j + 1 - end do - ! Note that the answers here are given in order - ! of sending process, so we are implicitly getting - ! the max process index in case of overlap. - last_ih = ih - do - last_j = j - iprc(idxsrch(i,2)) = answers(j,2) - j = j + 1 - if (j > size(answers,1)) exit - if (answers(j,1) /= ih) exit - end do - end if - end do - end if - if (gettime) then - call psb_barrier(ictxt) - t1 = psb_wtime() - t1 = t1 -t0 - tamx - tidx - call psb_amx(ictxt,tamx) - call psb_amx(ictxt,tidx) - call psb_amx(ictxt,t1) - if (me == psb_root_) then - write(psb_out_unit,'(" fnd_owner idx time : ",es10.4)') tidx - write(psb_out_unit,'(" fnd_owner amx time : ",es10.4)') tamx - write(psb_out_unit,'(" fnd_owner remainedr : ",es10.4)') t1 - endif - end if - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(ictxt,err_act) - - return - -end subroutine psb_indx_map_fnd_owner diff --git a/base/internals/psi_a2a_fnd_owner.F90 b/base/internals/psi_a2a_fnd_owner.F90 new file mode 100644 index 00000000..3ee6ef9a --- /dev/null +++ b/base/internals/psi_a2a_fnd_owner.F90 @@ -0,0 +1,118 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! File: psi_fnd_owner.f90 +! +! Subroutine: psi_fnd_owner +! Figure out who owns global indices. +! +! Arguments: +! nv - integer Number of indices required on the calling +! process +! idx(:) - integer Required indices on the calling process. +! Note: the indices should be unique! +! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding +! indices +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. return code. +! +subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info) + use psb_serial_mod + use psb_const_mod + use psb_error_mod + use psb_penv_mod + use psb_realloc_mod + use psb_indx_map_mod, psb_protect_name => psi_a2a_fnd_owner +#ifdef MPI_MOD + use mpi +#endif + + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + class(psb_indx_map), intent(in) :: idxmap + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_), allocatable :: tmpadj(:) + integer(psb_mpk_) :: icomm, minfo, iictxt + integer(psb_ipk_) :: i,n_row,n_col,err_act,nv + integer(psb_lpk_) :: mglob, ih + integer(psb_ipk_) :: ictxt,np,me, nresp + real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx + character(len=20) :: name + + info = psb_success_ + name = 'psi_a2a_fnd_owner' + call psb_erractionsave(err_act) + + ictxt = idxmap%get_ctxt() + icomm = idxmap%get_mpic() + mglob = idxmap%get_gr() + n_row = idxmap%get_lr() + n_col = idxmap%get_lc() + iictxt = ictxt + + call psb_info(ictxt, me, np) + + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.(idxmap%is_valid())) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='invalid idxmap') + goto 9999 + end if + + ! + ! Reuse the other version by tricking it with an adjcncy list + ! that contains everybody but ME. + ! + nv = size(idx) + call psb_realloc(np-1,tmpadj,info) + tmpadj(1:me) = [(i,i=0,me-1)] + tmpadj(me+1:np-1) = [(i,i=me+1,np-1)] + call psi_adjcncy_fnd_owner(idx,iprc,tmpadj,idxmap,info) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +end subroutine psi_a2a_fnd_owner diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 new file mode 100644 index 00000000..fd1422f1 --- /dev/null +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -0,0 +1,373 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! File: psi_fnd_owner.f90 +! +! Subroutine: psi_fnd_owner +! Figure out who owns global indices. +! +! Arguments: +! +subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) + use psb_serial_mod + use psb_const_mod + use psb_error_mod + use psb_penv_mod + use psb_realloc_mod + use psb_indx_map_mod, psb_protect_name => psi_adjcncy_fnd_owner +#ifdef MPI_MOD + use mpi +#endif + + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + integer(psb_ipk_), intent(in) :: adj(:) + class(psb_indx_map), intent(in) :: idxmap + integer(psb_ipk_), intent(out) :: info + + + integer(psb_lpk_), allocatable :: rmtidx(:) + integer(psb_ipk_), allocatable :: tproc(:), lclidx(:) + integer(psb_mpk_), allocatable :: hsz(:),hidx(:), sdidx(:), rvidx(:),& + & sdsz(:), rvsz(:), sdhd(:), rvhd(:), p2pstat(:,:) + integer(psb_mpk_) :: prc, p2ptag, iret + integer(psb_mpk_) :: icomm, minfo, iictxt + integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, k,& + & last_ih, last_j, nidx, nrecv, nadj + integer(psb_lpk_) :: mglob, ih + integer(psb_ipk_) :: ictxt,np,me + logical, parameter :: gettime=.false., new_impl=.true. + logical, parameter :: a2av_impl=.true., debug=.false. + real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx + character(len=20) :: name + + info = psb_success_ + name = 'psi_adjcncy_fnd_owner' + call psb_erractionsave(err_act) + + ictxt = idxmap%get_ctxt() + icomm = idxmap%get_mpic() + mglob = idxmap%get_gr() + n_row = idxmap%get_lr() + n_col = idxmap%get_lc() + iictxt = ictxt + + call psb_info(ictxt, me, np) + + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.(idxmap%is_valid())) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='invalid idxmap') + goto 9999 + end if + + if (gettime) then + t0 = psb_wtime() + end if + + nadj = size(adj) + nidx = size(idx) + call psb_realloc(nidx,iprc,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc') + goto 9999 + end if + iprc = -1 + ! write(0,*) me,name,' Going through ',nidx,nadj + + if (a2av_impl) then + ! + ! First simple minded version with auxiliary arrays + ! dimensioned on NP. + ! Do the exchange with an alltoallv + ! + ! + Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1), & + & sdidx(0:np),rvidx(0:np),stat=info) + ! + ! Same send buffer for everybody + ! + sdidx(:) = 0 + ! + ! First, send sizes according to adjcncy list + ! + sdsz = 0 + do j=1, nadj + sdsz(adj(j)) = nidx + end do + !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) + + call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& + & rvsz,1,psb_mpi_mpk_,icomm,minfo) + rvidx(0) = 0 + do i=0, np-1 + rvidx(i+1) = rvidx(i) + rvsz(i) + end do + hsize = rvidx(np) + ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) + ! + ! Second, allocate buffers and exchange data + ! + Allocate(rmtidx(hsize),lclidx(max(hsize,nidx*nadj)),& + & tproc(max(hsize,nidx)),stat=info) + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + call mpi_alltoallv(idx,sdsz,sdidx,psb_mpi_lpk_,& + & rmtidx,rvsz,rvidx,psb_mpi_lpk_,icomm,iret) + + ! + ! Third, compute local answers + ! + call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) + do i=1, hsize + tproc(i) = -1 + if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + end do + + ! + ! Fourth, exchange the answers + ! + ! Adjust sdidx for reuse in receiving lclidx array + do i=0,np-1 + sdidx(i+1) = sdidx(i) + sdsz(i) + end do + call mpi_alltoallv(tproc,rvsz,rvidx,psb_mpi_ipk_,& + & lclidx,sdsz,sdidx,psb_mpi_ipk_,icomm,iret) + + do i=0, np-1 + if (sdsz(i)>0) then + ! Must be nidx == sdsz(i) + iprc(1:nidx) = max(iprc(1:nidx), lclidx(sdidx(i)+1:sdidx(i)+sdsz(i))) + end if + end do + + if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx) + else + if (new_impl) then + ! + ! First simple minded version with auxiliary arrays + ! dimensioned on NP. + ! Could it be improved with a loop based on the maximum length + ! of adj(:) ??? + ! + Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),& + & sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),& + & stat=info) + sdhd(:) = mpi_request_null + rvhd(:) = mpi_request_null + ! + ! First, send sizes according to adjcncy list + ! + sdsz = 0 + do j=1, nadj + sdsz(adj(j)) = nidx + end do + !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) + + call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& + & rvsz,1,psb_mpi_mpk_,icomm,minfo) + hidx(0) = 0 + do i=0, np-1 + hidx(i+1) = hidx(i) + rvsz(i) + end do + hsize = hidx(np) + ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) + ! + ! Second, allocate buffers and exchange data + ! + Allocate(rmtidx(hsize),lclidx(max(hsize,nidx*nadj)),tproc(max(hsize,nidx)),stat=info) + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + do i = 0, np-1 + if (rvsz(i)>0) then + ! write(0,*) me, ' First receive from ',i,rvsz(i) + prc = psb_get_rank(ictxt,i) + p2ptag = psb_long_swap_tag + !write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc + call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),& + & psb_mpi_lpk_,prc,& + & p2ptag, icomm,rvhd(i),iret) + end if + end do + do j=1, nadj + if (nidx > 0) then + prc = psb_get_rank(ictxt,adj(j)) + p2ptag = psb_long_swap_tag + !write(0,*) me, ' First send to ',adj(j),nidx, prc + call mpi_send(idx,nidx,& + & psb_mpi_lpk_,prc,& + & p2ptag, icomm,iret) + end if + end do + call mpi_waitall(np,rvhd,p2pstat,iret) + + ! + ! Third, compute local answers + ! + call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) + do i=1, hsize + tproc(i) = -1 + if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + end do + ! + ! At this point we can reuse lclidx to receive messages + ! + rvhd(:) = mpi_request_null + do j=1, nadj + !write(0,*) me, ' First send to ',adj(j),nidx + if (nidx > 0) then + prc = psb_get_rank(ictxt,adj(j)) + p2ptag = psb_int_swap_tag + !write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc + call mpi_irecv(lclidx((j-1)*nidx+1),nidx, & + & psb_mpi_ipk_,prc,& + & p2ptag, icomm,rvhd(j),iret) + end if + end do + + ! + ! Fourth, send data back; + ! + do i = 0, np-1 + if (rvsz(i)>0) then + prc = psb_get_rank(ictxt,i) + p2ptag = psb_int_swap_tag + !write(0,*) me, ' Second send to ',i,rvsz(i), prc + call mpi_send(tproc(hidx(i)+1),rvsz(i),& + & psb_mpi_ipk_,prc,& + & p2ptag, icomm,iret) + end if + end do + ! + ! Fifth: receive and combine. MAX works because default + ! answer is -1. + ! + call mpi_waitall(np,rvhd,p2pstat,iret) + do j = 1, nadj + iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx)) + end do + if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx) + + else + + Allocate(hidx(0:np),hsz(np),& + & sdsz(0:np-1),rvsz(0:np-1),stat=info) + ! + ! First, send sizes according to adjcncy list + ! + sdsz = 0 + do j=1, nadj + sdsz(adj(j)) = nidx + end do + !write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:) + + call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& + & rvsz,1,psb_mpi_mpk_,icomm,minfo) + hidx(0) = 0 + do i=0, np-1 + hidx(i+1) = hidx(i) + rvsz(i) + end do + hsize = hidx(np) + ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) + ! + ! Second, allocate buffers and exchange data + ! + Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info) + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + do j=1, nadj + !write(0,*) me, ' First send to ',adj(j),nidx + if (nidx > 0) call psb_snd(ictxt,idx(1:nidx),adj(j)) + end do + do i = 0, np-1 + if (rvsz(i)>0) then + ! write(0,*) me, ' First receive from ',i,rvsz(i) + call psb_rcv(ictxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) + end if + end do + + ! + ! Third, compute local answers + ! + call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) + do i=1, hsize + tproc(i) = -1 + if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + end do + + ! + ! Fourth, send data back; + ! + do i = 0, np-1 + if (rvsz(i)>0) then + !write(0,*) me, ' Second send to ',i,rvsz(i) + call psb_snd(ictxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) + end if + end do + ! + ! Fifth: receive and combine. MAX works because default + ! answer is -1. Reuse tproc + ! + do j = 1, nadj + !write(0,*) me, ' Second receive from ',adj(j), nidx + if (nidx > 0) call psb_rcv(ictxt,tproc(1:nidx),adj(j)) + iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) + end do + end if + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +end subroutine psi_adjcncy_fnd_owner diff --git a/base/internals/psi_crea_index.f90 b/base/internals/psi_crea_index.f90 index 56be421e..ec783899 100644 --- a/base/internals/psi_crea_index.f90 +++ b/base/internals/psi_crea_index.f90 @@ -85,17 +85,6 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) goto 9999 endif - ! allocate dependency list - ! This should be computed more efficiently to save space when - ! the number of processors becomes very high - dl_lda=np+1 - - allocate(dep_list(max(1,dl_lda),0:np),length_dl(0:np),stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - ! ...extract dependence list (ordered list of identifer process ! which every process must communcate with... if (debug_level >= psb_debug_inner_) & @@ -104,7 +93,7 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) call psi_extract_dep_list(ictxt,& & desc_a%is_bld(), desc_a%is_upd(),& - & index_in, dep_list,length_dl,np,max(1,dl_lda),mode,info) + & index_in, dep_list,length_dl,dl_lda,mode,info) if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='extrct_dl') goto 9999 @@ -117,7 +106,7 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) if (debug_level >= psb_debug_inner_) & & write(debug_unit,*) me,' ',trim(name),': root sorting dep list' - call psi_dl_check(dep_list,max(1,dl_lda),np,length_dl) + call psi_dl_check(dep_list,dl_lda,np,length_dl) ! ....now i can sort dependency lists. call psi_sort_dl(dep_list,length_dl,np,info) diff --git a/base/internals/psi_desc_impl.f90 b/base/internals/psi_desc_impl.f90 index 6cd294f8..b552a8ad 100644 --- a/base/internals/psi_desc_impl.f90 +++ b/base/internals/psi_desc_impl.f90 @@ -187,267 +187,6 @@ subroutine psi_i_cnv_dsc(halo_in,ovrlap_in,ext_in,cdesc, info, mold) end subroutine psi_i_cnv_dsc - -subroutine psi_i_inner_cnvs(x,hashmask,hashv,glb_lc) - use psi_mod, psi_protect_name => psi_i_inner_cnvs - - integer(psb_ipk_), intent(in) :: hashmask,hashv(0:),glb_lc(:,:) - integer(psb_ipk_), intent(inout) :: x - - integer(psb_ipk_) :: i, ih, key, idx,nh,tmp,lb,ub,lm - ! - ! When a large descriptor is assembled the indices - ! are kept in a (hashed) list of ordered lists. - ! Thus we first hash the index, then we do a binary search on the - ! ordered sublist. The hashing is based on the low-order bits - ! for a width of psb_hash_bits - ! - - key = x - ih = iand(key,hashmask) - idx = hashv(ih) - nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then - x = glb_lc(tmp,2) - else - x = tmp - end if -end subroutine psi_i_inner_cnvs - -subroutine psi_i_inner_cnvs2(x,y,hashmask,hashv,glb_lc) - use psi_mod, psi_protect_name => psi_i_inner_cnvs2 - integer(psb_ipk_), intent(in) :: hashmask,hashv(0:),glb_lc(:,:) - integer(psb_ipk_), intent(in) :: x - integer(psb_ipk_), intent(out) :: y - - integer(psb_ipk_) :: i, ih, key, idx,nh,tmp,lb,ub,lm - ! - ! When a large descriptor is assembled the indices - ! are kept in a (hashed) list of ordered lists. - ! Thus we first hash the index, then we do a binary search on the - ! ordered sublist. The hashing is based on the low-order bits - ! for a width of psb_hash_bits - ! - - key = x - ih = iand(key,hashmask) - idx = hashv(ih) - nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then - y = glb_lc(tmp,2) - else - y = tmp - end if -end subroutine psi_i_inner_cnvs2 - - -subroutine psi_i_inner_cnv1(n,x,hashmask,hashv,glb_lc,mask) - use psi_mod, psi_protect_name => psi_i_inner_cnv1 - integer(psb_ipk_), intent(in) :: n,hashmask,hashv(0:),glb_lc(:,:) - logical, intent(in), optional :: mask(:) - integer(psb_ipk_), intent(inout) :: x(:) - - integer(psb_ipk_) :: i, ih, key, idx,nh,tmp,lb,ub,lm - ! - ! When a large descriptor is assembled the indices - ! are kept in a (hashed) list of ordered lists. - ! Thus we first hash the index, then we do a binary search on the - ! ordered sublist. The hashing is based on the low-order bits - ! for a width of psb_hash_bits - ! - if (present(mask)) then - do i=1, n - if (mask(i)) then - key = x(i) - ih = iand(key,hashmask) - idx = hashv(ih) - nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then - x(i) = glb_lc(tmp,2) - else - x(i) = tmp - end if - end if - end do - else - do i=1, n - key = x(i) - ih = iand(key,hashmask) - idx = hashv(ih) - nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then - x(i) = glb_lc(tmp,2) - else - x(i) = tmp - end if - end do - end if -end subroutine psi_i_inner_cnv1 - -subroutine psi_i_inner_cnv2(n,x,y,hashmask,hashv,glb_lc,mask) - use psi_mod, psi_protect_name => psi_i_inner_cnv2 - integer(psb_ipk_), intent(in) :: n, hashmask,hashv(0:),glb_lc(:,:) - logical, intent(in),optional :: mask(:) - integer(psb_ipk_), intent(in) :: x(:) - integer(psb_ipk_), intent(out) :: y(:) - - integer(psb_ipk_) :: i, ih, key, idx,nh,tmp,lb,ub,lm - ! - ! When a large descriptor is assembled the indices - ! are kept in a (hashed) list of ordered lists. - ! Thus we first hash the index, then we do a binary search on the - ! ordered sublist. The hashing is based on the low-order bits - ! for a width of psb_hash_bits - ! - if (present(mask)) then - do i=1, n - if (mask(i)) then - key = x(i) - ih = iand(key,hashmask) - if (ih > ubound(hashv,1) ) then - write(psb_err_unit,*) ' In inner cnv: ',ih,ubound(hashv) - end if - idx = hashv(ih) - nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then - y(i) = glb_lc(tmp,2) - else - y(i) = tmp - end if - end if - end do - else - do i=1, n - key = x(i) - ih = iand(key,hashmask) - if (ih > ubound(hashv,1) ) then - write(psb_err_unit,*) ' In inner cnv: ',ih,ubound(hashv) - end if - idx = hashv(ih) - nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then - y(i) = glb_lc(tmp,2) - else - y(i) = tmp - end if - end do - end if -end subroutine psi_i_inner_cnv2 - subroutine psi_i_bld_ovr_mst(me,ovrlap_elem,mst_idx,info) use psi_mod, psi_protect_name => psi_i_bld_ovr_mst diff --git a/base/internals/psi_extrct_dl.F90 b/base/internals/psi_extrct_dl.F90 index 3f32d9af..ef004dfc 100644 --- a/base/internals/psi_extrct_dl.F90 +++ b/base/internals/psi_extrct_dl.F90 @@ -30,7 +30,7 @@ ! ! subroutine psi_i_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,& - & length_dl,np,dl_lda,mode,info) + & length_dl,dl_lda,mode,info) ! internal routine ! == = ============= @@ -131,22 +131,21 @@ subroutine psi_i_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,& include 'mpif.h' #endif ! ....scalar parameters... - logical :: is_bld, is_upd - integer(psb_ipk_) :: ictxt - integer(psb_ipk_) :: np,dl_lda,mode, info - - ! ....array parameters.... - integer(psb_ipk_) :: desc_str(*) - integer(psb_ipk_) :: dep_list(dl_lda,0:np),length_dl(0:np) - integer(psb_ipk_), allocatable :: itmp(:) + logical, intent(in) :: is_bld, is_upd + integer(psb_ipk_), intent(in) :: ictxt,mode + integer(psb_ipk_), intent(out) :: dl_lda + integer(psb_ipk_), intent(in) :: desc_str(*) + integer(psb_ipk_), allocatable, intent(out) :: dep_list(:,:),length_dl(:) + integer(psb_ipk_), intent(out) :: info ! .....local arrays.... integer(psb_ipk_) :: int_err(5) + integer(psb_ipk_), allocatable :: itmp(:) ! .....local scalars... integer(psb_ipk_) :: i,pointer_dep_list,proc,j,err_act integer(psb_ipk_) :: err integer(psb_ipk_) :: debug_level, debug_unit - integer(psb_mpk_) :: iictxt, icomm, me, npr, dl_mpi, minfo + integer(psb_mpk_) :: iictxt, icomm, me, np, dl_mpi, minfo character name*20 name='psi_extrct_dl' @@ -156,7 +155,12 @@ subroutine psi_i_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,& iictxt = ictxt info = psb_success_ - call psb_info(iictxt,me,npr) + call psb_info(iictxt,me,np) + allocate(itmp(np+1),length_dl(0:np),stat=info) + if (info /= psb_success_) then + info=psb_err_alloc_dealloc_ + goto 9999 + end if do i=0,np length_dl(i) = 0 enddo @@ -176,41 +180,39 @@ subroutine psi_i_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,& if ((desc_str(i+1) /= 0).or.(desc_str(i+2) /= 0)) then ! ..if number of element to be exchanged !=0 proc=desc_str(i) - if ((proc < 0).or.(proc >= npr)) then + if ((proc < 0).or.(proc >= np)) then if (debug_level >= psb_debug_inner_)& & write(debug_unit,*) me,' ',trim(name),': error ',i,desc_str(i) info = 9999 int_err(1) = i int_err(2) = desc_str(i) - goto 998 + goto 9999 endif ! if((me == 1).and.(proc == 3))write(psb_err_unit,*)'found 3' if (mode == 1) then ! ...search if already exist proc - ! in dep_list(*,me)... + ! in itmp(*)... j=1 do while ((j < pointer_dep_list).and.& - & (dep_list(j,me) /= proc)) + & (itmp(j) /= proc)) j=j+1 enddo if (j == pointer_dep_list) then ! ...if not found..... - dep_list(pointer_dep_list,me)=proc + itmp(pointer_dep_list)=proc pointer_dep_list=pointer_dep_list+1 endif else if (mode == 0) then - if (pointer_dep_list > dl_lda) then - info = psb_err_alloc_dealloc_ - goto 998 - endif - dep_list(pointer_dep_list,me)=proc + itmp(pointer_dep_list)=proc pointer_dep_list=pointer_dep_list+1 endif endif i=i+desc_str(i+1)+2 enddo + else if (is_upd) then + do while (desc_str(i) /= -1) if (debug_level >= psb_debug_inner_) & & write(debug_unit,*) me,' ',trim(name),': looping ',i,desc_str(i) @@ -226,24 +228,16 @@ subroutine psi_i_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,& ! ...search if already exist proc.... j=1 do while ((j < pointer_dep_list).and.& - & (dep_list(j,me) /= proc)) + & (itmp(j) /= proc)) j=j+1 enddo if (j == pointer_dep_list) then ! ...if not found..... - if (pointer_dep_list > dl_lda) then - info = psb_err_alloc_dealloc_ - goto 998 - endif - dep_list(pointer_dep_list,me)=proc + itmp(pointer_dep_list)=proc pointer_dep_list=pointer_dep_list+1 endif else if (mode == 0) then - if (pointer_dep_list > dl_lda) then - info = psb_err_alloc_dealloc_ - goto 998 - endif - dep_list(pointer_dep_list,me)=proc + itmp(pointer_dep_list)=proc pointer_dep_list=pointer_dep_list+1 endif endif @@ -255,26 +249,26 @@ subroutine psi_i_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,& endif length_dl(me)=pointer_dep_list-1 - - ! ... check for errors... -998 continue - if (debug_level >= psb_debug_inner_)& - & write(debug_unit,*) me,' ',trim(name),': info ',info - err = info - - if (err /= 0) goto 9999 + dl_lda = max(length_dl(me),1) + call psb_max(iictxt, dl_lda) + ! + ! This doubling of DL_LDA is not 100% safe, + ! but should work most of the time. + ! Will need to be improved later, perhaps move + ! from a 2D allocation (ellpack style) to + ! a 1D allocation (csr like). + ! + dl_lda = min(2*dl_lda,np+1) + allocate(dep_list(dl_lda,0:np),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if call psb_sum(iictxt,length_dl(0:np)) - icomm = psb_get_mpicomm(iictxt) - allocate(itmp(dl_lda),stat=info) - if (info /= psb_success_) then - info=psb_err_alloc_dealloc_ - goto 9999 - endif - itmp(1:dl_lda) = dep_list(1:dl_lda,me) - dl_mpi = dl_lda - call mpi_allgather(itmp,dl_mpi,psb_mpi_ipk_,& - & dep_list,dl_mpi,psb_mpi_ipk_,icomm,minfo) + icomm = psb_get_mpicomm(iictxt) + call mpi_allgather(itmp,dl_lda,psb_mpi_ipk_,& + & dep_list,dl_lda,psb_mpi_ipk_,icomm,minfo) info = minfo if (info == 0) deallocate(itmp,stat=info) if (info /= psb_success_) then diff --git a/base/internals/psi_fnd_owner.F90 b/base/internals/psi_fnd_owner.F90 index 456dd4d4..f2a22e0e 100644 --- a/base/internals/psi_fnd_owner.F90 +++ b/base/internals/psi_fnd_owner.F90 @@ -62,11 +62,11 @@ subroutine psi_fnd_owner(nv,idx,iprc,desc,info) #ifdef MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: nv - integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), intent(in) :: nv + integer(psb_lpk_), intent(in) :: idx(:) integer(psb_ipk_), allocatable, intent(out) :: iprc(:) - type(psb_desc_type), intent(in) :: desc - integer(psb_ipk_), intent(out) :: info + type(psb_desc_type), intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), allocatable :: hsz(:),hidx(:),helem(:),hproc(:),& diff --git a/base/internals/psi_graph_fnd_owner.F90 b/base/internals/psi_graph_fnd_owner.F90 new file mode 100644 index 00000000..fa7fe47b --- /dev/null +++ b/base/internals/psi_graph_fnd_owner.F90 @@ -0,0 +1,319 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! File: psi_fnd_owner.f90 +! +! Subroutine: psi_fnd_owner +! Figure out who owns global indices. +! +! Arguments: +! nv - integer Number of indices required on the calling +! process +! idx(:) - integer Required indices on the calling process. +! Note: the indices should be unique! +! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding +! indices +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. return code. +! +subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) + use psb_serial_mod + use psb_const_mod + use psb_error_mod + use psb_penv_mod + use psb_realloc_mod + use psb_desc_mod, psb_protect_name => psi_graph_fnd_owner +#ifdef MPI_MOD + use mpi +#endif + + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_ipk_), intent(out) :: info + + + integer(psb_lpk_), allocatable :: tidx(:) + integer(psb_ipk_), allocatable :: tprc(:), tsmpl(:), ladj(:) + integer(psb_mpk_) :: icomm, minfo, iictxt + integer(psb_ipk_) :: i,n_row,n_col,err_act,ip,j,ipnt, nsampl_out,& + & nv, n_answers, n_rest, nsampl_in, locr_max, & + & nrest_max, nadj, maxspace, mxnsin + integer(psb_lpk_) :: mglob, ih + integer(psb_ipk_) :: ictxt,np,me, nresp + integer(psb_ipk_), parameter :: nt=4 + integer(psb_ipk_) :: tmpv(2) + logical, parameter :: gettime=.false., trace=.false. + real(psb_dpk_) :: t0, t1, t2, t3, t4 + character(len=20) :: name + + info = psb_success_ + name = 'psi_graph_fnd_owner' + call psb_erractionsave(err_act) + + ictxt = idxmap%get_ctxt() + icomm = idxmap%get_mpic() + mglob = idxmap%get_gr() + n_row = idxmap%get_lr() + n_col = idxmap%get_lc() + iictxt = ictxt + + call psb_info(ictxt, me, np) + + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.(idxmap%is_valid())) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='invalid idxmap') + goto 9999 + end if + + ! + ! Choice of maxspace should be adjusted to account for a default + ! "sensible" size and/or a user-specified value + ! + tmpv(1) = n_row + tmpv(2) = psb_cd_get_maxspace() + call psb_max(ictxt,tmpv) + locr_max = tmpv(1) + maxspace = min(nt*locr_max,tmpv(2)) + maxspace = max(maxspace,np) + if (trace.and.(me == 0)) write(0,*) ' Through graph_fnd_owner with maxspace:',maxspace + ! + ! + ! + nv = size(idx) + call psb_realloc(nv,iprc,info) + if (info == psb_success_) call psb_realloc(nv,tidx,info) + if (info == psb_success_) call psb_realloc(nv,tprc,info) + if (info == psb_success_) call psb_realloc(nv,tsmpl,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc') + goto 9999 + end if + iprc(:) = -1 + n_answers = 0 + ! + ! Start from the adjacncy list + ! + call psb_safe_ab_cpy(idxmap%p_adjcncy,ladj,info) + nadj = psb_size(ladj) + ! This makes ladj allocated with size 0 just in case + call psb_realloc(nadj,ladj,info) + n_rest = nv - n_answers + nrest_max = n_rest + + tmpv(1) = nadj + tmpv(2) = nrest_max + call psb_max(ictxt,tmpv) + if ((tmpv(1) > 0).and.(tmpv(2) >0)) then + ! + ! Do a preliminary run on the user-defined adjacency lists + ! + if (trace.and.(me == 0)) write(0,*) ' Initial sweep on user-defined topology' + nsampl_in = min(n_rest,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) + call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers) + call idxmap%xtnd_p_adjcncy(ladj) + n_rest = nv - n_answers + nrest_max = n_rest + call psb_max(ictxt,nrest_max) + if (trace.and.(me == 0)) write(0,*) ' After initial sweep:',nrest_max + end if + + + fnd_owner_loop: do while (nrest_max>0) + ! + ! The basic idea of this loop is to alternate between + ! searching through all processes and searching + ! in the neighbourood. + ! + ! 1. Select a sample such that the total size is <= maxspace + ! sample query is then sent to all processes + ! + ! if (trace.and.(me == 0)) write(0,*) 'Looping in graph_fnd_owner: ', nrest_max + nsampl_in = min(n_rest,max(1,(maxspace+np-1)/np)) + ! + ! Choose a sample, should it be done in this simplistic way? + ! Note: nsampl_in is a hint, not an absolute, hence nsampl_out + ! + ipnt = 1 +!!$ write(0,*) me,' Into first sampling ',nsampl_in + call psi_get_sample(ipnt, idx,iprc,tidx,tsmpl,nsampl_in,nsampl_out) + nsampl_in = min(nsampl_out,nsampl_in) +!!$ write(0,*) me,' From first sampling ',nsampl_in + ! + ! 2. Do a search on all processes; this is supposed to find + ! the owning process for all inputs; + ! + call psi_a2a_fnd_owner(tidx(1:nsampl_in),tprc,idxmap,info) + call psi_cpy_out(iprc,tprc,tsmpl,nsampl_in,nsampl_out) + if (nsampl_out /= nsampl_in) then + write(0,*) me,'Warning: indices not found by a2a_fnd_owner ',nsampl_out,nsampl_in + end if + n_answers = n_answers + nsampl_out + n_rest = nv - n_answers + ! + ! 3. Extract the resulting adjacency list and add it to the + ! indxmap; + ! + ladj = tprc(1:nsampl_in) + call psb_msort_unique(ladj,nadj) + call psb_realloc(nadj,ladj,info) + + ! + ! 4. Extract again a sample and do a neighbourhood search + ! so that the total size is <= maxspace + ! (will not be exact since nadj varies with process) + ! Need to set up a proper loop here to have a complete + ! sweep over the input vector. Done inside adj_fnd_sweep. + ! +!!$ write(0,*) me,' After a2a ',n_rest + nsampl_in = min(n_rest,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) + mxnsin = nsampl_in + call psb_max(ictxt,mxnsin) +!!$ write(0,*) me, ' mxnsin ',mxnsin + if (mxnsin>0) call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers) + call idxmap%xtnd_p_adjcncy(ladj) + + n_rest = nv - n_answers + nrest_max = n_rest + call psb_max(ictxt,nrest_max) + if (trace.and.(me == 0)) write(0,*) ' fnd_owner_loop remaining:',nrest_max + end do fnd_owner_loop + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +contains + + subroutine psi_get_sample(ipntidx,idx,iprc,tidx,tsmpl,ns_in,ns_out) + implicit none + integer(psb_ipk_), intent(inout) :: ipntidx + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), intent(in) :: ns_in, iprc(:) + integer(psb_lpk_), intent(out) :: tidx(:) + integer(psb_ipk_), intent(out) :: tsmpl(:), ns_out + ! + integer(psb_ipk_) :: nv, ns + + nv = size(idx) + ! + ! Choose a sample, should it be done in this simplistic way? + ! + ns = ns_in + ! + ! ns_in == 0 means that on the outside we figure there's + ! nothing left, but we are here because we have to synchronize. + ! Make sure we sweep through the entire vector immediately + ! + if (ns == 0) ns = nv + ns_out = 0 + + do while (ipntidx<= nv) + if (iprc(ipntidx) == -1) then + ns_out = ns_out + 1 + tsmpl(ns_out) = ipntidx + tidx(ns_out) = idx(ipntidx) + end if + ipntidx = ipntidx + 1 + if (ns_out >= ns) exit + end do + + end subroutine psi_get_sample + + subroutine psi_cpy_out(iprc,tprc,tsmpl,ns_in,ns_out) + implicit none + integer(psb_ipk_), intent(out) :: iprc(:) + integer(psb_ipk_), intent(in) :: ns_in + integer(psb_ipk_), intent(in) :: tprc(:), tsmpl(:) + integer(psb_ipk_), intent(out) :: ns_out + + integer(psb_ipk_) :: j + + ns_out = 0 + do j=1, ns_in + if (tprc(j) /= -1) then + ns_out = ns_out + 1 + iprc(tsmpl(j)) = tprc(j) + end if + end do + end subroutine psi_cpy_out + + subroutine psi_adj_fnd_sweep(idx,iprc,adj,idxmap,n_samples,n_answers) + implicit none + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), intent(in) :: n_samples + integer(psb_ipk_), intent(inout) :: iprc(:), n_answers + integer(psb_ipk_), intent(in) :: adj(:) + class(psb_indx_map), intent(inout) :: idxmap + ! + integer(psb_ipk_) :: ipnt, ns_in, ns_out, n_rem, ictxt, me, np, isw + integer(psb_lpk_), allocatable :: tidx(:) + integer(psb_ipk_), allocatable :: tsmpl(:) + + ictxt = idxmap%get_ctxt() + call psb_info(ictxt,me,np) + call psb_realloc(n_samples,tidx,info) + call psb_realloc(n_samples,tsmpl,info) + ipnt = 1 + isw = 1 + do + !write(0,*) me,' Into sampling ',n_samples + call psi_get_sample(ipnt, idx,iprc,tidx,tsmpl,n_samples,ns_out) + ns_in = min(n_samples,ns_out) + !write(0,*) me,' From second sampling ',ns_out + call psi_adjcncy_fnd_owner(tidx(1:ns_in),tprc,ladj,idxmap,info) + call psi_cpy_out(iprc,tprc,tsmpl,ns_in,ns_out) + !write(0,*) me,' Sweep ',isw,' answers:',ns_out + n_answers = n_answers + ns_out + n_rem = size(idx)-ipnt + call psb_max(ictxt,n_rem) + !write(0,*) me,' Sweep ',isw,n_rem, ipnt, n_samples + if (n_rem <= 0) exit + isw = isw + 1 + end do + + + end subroutine psi_adj_fnd_sweep + +end subroutine psi_graph_fnd_owner diff --git a/base/internals/psi_hash_impl.f90 b/base/internals/psi_hash_impl.f90 new file mode 100644 index 00000000..ed650e62 --- /dev/null +++ b/base/internals/psi_hash_impl.f90 @@ -0,0 +1,291 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! + +subroutine psi_i_inner_cnvs(x,hashmask,hashv,glb_lc) + use psi_mod, psi_protect_name => psi_i_inner_cnvs + + integer(psb_ipk_), intent(in) :: hashmask,hashv(0:),glb_lc(:,:) + integer(psb_ipk_), intent(inout) :: x + + integer(psb_ipk_) :: i, ih, key, idx,nh,tmp,lb,ub,lm + ! + ! When a large descriptor is assembled the indices + ! are kept in a (hashed) list of ordered lists. + ! Thus we first hash the index, then we do a binary search on the + ! ordered sublist. The hashing is based on the low-order bits + ! for a width of psb_hash_bits + ! + + key = x + ih = iand(key,hashmask) + idx = hashv(ih) + nh = hashv(ih+1) - hashv(ih) + if (nh > 0) then + tmp = -1 + lb = idx + ub = idx+nh-1 + do + if (lb>ub) exit + lm = (lb+ub)/2 + if (key == glb_lc(lm,1)) then + tmp = lm + exit + else if (key 0) then + x = glb_lc(tmp,2) + else + x = tmp + end if +end subroutine psi_i_inner_cnvs + +subroutine psi_i_inner_cnvs2(x,y,hashmask,hashv,glb_lc) + use psi_mod, psi_protect_name => psi_i_inner_cnvs2 + integer(psb_ipk_), intent(in) :: hashmask,hashv(0:),glb_lc(:,:) + integer(psb_ipk_), intent(in) :: x + integer(psb_ipk_), intent(out) :: y + + integer(psb_ipk_) :: i, ih, key, idx,nh,tmp,lb,ub,lm + ! + ! When a large descriptor is assembled the indices + ! are kept in a (hashed) list of ordered lists. + ! Thus we first hash the index, then we do a binary search on the + ! ordered sublist. The hashing is based on the low-order bits + ! for a width of psb_hash_bits + ! + + key = x + ih = iand(key,hashmask) + idx = hashv(ih) + nh = hashv(ih+1) - hashv(ih) + if (nh > 0) then + tmp = -1 + lb = idx + ub = idx+nh-1 + do + if (lb>ub) exit + lm = (lb+ub)/2 + if (key == glb_lc(lm,1)) then + tmp = lm + exit + else if (key 0) then + y = glb_lc(tmp,2) + else + y = tmp + end if +end subroutine psi_i_inner_cnvs2 + + +subroutine psi_i_inner_cnv1(n,x,hashmask,hashv,glb_lc,mask) + use psi_mod, psi_protect_name => psi_i_inner_cnv1 + integer(psb_ipk_), intent(in) :: n,hashmask,hashv(0:),glb_lc(:,:) + logical, intent(in), optional :: mask(:) + integer(psb_ipk_), intent(inout) :: x(:) + + integer(psb_ipk_) :: i, ih, key, idx,nh,tmp,lb,ub,lm + ! + ! When a large descriptor is assembled the indices + ! are kept in a (hashed) list of ordered lists. + ! Thus we first hash the index, then we do a binary search on the + ! ordered sublist. The hashing is based on the low-order bits + ! for a width of psb_hash_bits + ! + if (present(mask)) then + do i=1, n + if (mask(i)) then + key = x(i) + ih = iand(key,hashmask) + idx = hashv(ih) + nh = hashv(ih+1) - hashv(ih) + if (nh > 0) then + tmp = -1 + lb = idx + ub = idx+nh-1 + do + if (lb>ub) exit + lm = (lb+ub)/2 + if (key == glb_lc(lm,1)) then + tmp = lm + exit + else if (key 0) then + x(i) = glb_lc(tmp,2) + else + x(i) = tmp + end if + end if + end do + else + do i=1, n + key = x(i) + ih = iand(key,hashmask) + idx = hashv(ih) + nh = hashv(ih+1) - hashv(ih) + if (nh > 0) then + tmp = -1 + lb = idx + ub = idx+nh-1 + do + if (lb>ub) exit + lm = (lb+ub)/2 + if (key == glb_lc(lm,1)) then + tmp = lm + exit + else if (key 0) then + x(i) = glb_lc(tmp,2) + else + x(i) = tmp + end if + end do + end if +end subroutine psi_i_inner_cnv1 + +subroutine psi_i_inner_cnv2(n,x,y,hashmask,hashv,glb_lc,mask) + use psi_mod, psi_protect_name => psi_i_inner_cnv2 + integer(psb_ipk_), intent(in) :: n, hashmask,hashv(0:),glb_lc(:,:) + logical, intent(in),optional :: mask(:) + integer(psb_ipk_), intent(in) :: x(:) + integer(psb_ipk_), intent(out) :: y(:) + + integer(psb_ipk_) :: i, ih, key, idx,nh,tmp,lb,ub,lm + ! + ! When a large descriptor is assembled the indices + ! are kept in a (hashed) list of ordered lists. + ! Thus we first hash the index, then we do a binary search on the + ! ordered sublist. The hashing is based on the low-order bits + ! for a width of psb_hash_bits + ! + if (present(mask)) then + do i=1, n + if (mask(i)) then + key = x(i) + ih = iand(key,hashmask) + if (ih > ubound(hashv,1) ) then + write(psb_err_unit,*) ' In inner cnv: ',ih,ubound(hashv) + end if + idx = hashv(ih) + nh = hashv(ih+1) - hashv(ih) + if (nh > 0) then + tmp = -1 + lb = idx + ub = idx+nh-1 + do + if (lb>ub) exit + lm = (lb+ub)/2 + if (key == glb_lc(lm,1)) then + tmp = lm + exit + else if (key 0) then + y(i) = glb_lc(tmp,2) + else + y(i) = tmp + end if + end if + end do + else + do i=1, n + key = x(i) + ih = iand(key,hashmask) + if (ih > ubound(hashv,1) ) then + write(psb_err_unit,*) ' In inner cnv: ',ih,ubound(hashv) + end if + idx = hashv(ih) + nh = hashv(ih+1) - hashv(ih) + if (nh > 0) then + tmp = -1 + lb = idx + ub = idx+nh-1 + do + if (lb>ub) exit + lm = (lb+ub)/2 + if (key == glb_lc(lm,1)) then + tmp = lm + exit + else if (key 0) then + y(i) = glb_lc(tmp,2) + else + y(i) = tmp + end if + end do + end if +end subroutine psi_i_inner_cnv2 diff --git a/base/internals/psi_indx_map_fnd_owner.F90 b/base/internals/psi_indx_map_fnd_owner.F90 new file mode 100644 index 00000000..014306ff --- /dev/null +++ b/base/internals/psi_indx_map_fnd_owner.F90 @@ -0,0 +1,162 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! File: psi_fnd_owner.f90 +! +! Subroutine: psi_fnd_owner +! Figure out who owns global indices. +! +! Arguments: +! nv - integer Number of indices required on the calling +! process +! idx(:) - integer Required indices on the calling process. +! Note: the indices should be unique! +! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding +! indices +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. return code. +! +subroutine psi_indx_map_fnd_owner(idx,iprc,idxmap,info) + use psb_serial_mod + use psb_const_mod + use psb_error_mod + use psb_penv_mod + use psb_realloc_mod + use psb_indx_map_mod, psb_protect_name => psi_indx_map_fnd_owner +#ifdef MPI_MOD + use mpi +#endif + + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_), allocatable :: hhidx(:) + integer(psb_mpk_) :: icomm, minfo, iictxt + integer(psb_ipk_) :: i, err_act, hsize, nv + integer(psb_lpk_) :: mglob + integer(psb_ipk_) :: ictxt,np,me, nresp + logical, parameter :: gettime=.false. + real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx + character(len=20) :: name + + info = psb_success_ + name = 'psb_indx_map_fnd_owner' + call psb_erractionsave(err_act) + + ictxt = idxmap%get_ctxt() + icomm = idxmap%get_mpic() + mglob = idxmap%get_gr() + iictxt = ictxt + + call psb_info(ictxt, me, np) + + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.(idxmap%is_valid())) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='invalid idxmap') + goto 9999 + end if + + if (gettime) then + t0 = psb_wtime() + end if + + nv = size(idx) + call psb_realloc(nv,iprc,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc') + goto 9999 + end if + + if (associated(idxmap%parts)) then + ! Use function shortcut +!!$ write(0,*) me,trim(name),' indxmap%parts shortcut' + Allocate(hhidx(np), stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + do i=1, nv + call idxmap%parts(idx(i),mglob,np,hhidx,nresp) + if (nresp > 0) then + iprc(i) = hhidx(1) + else + iprc(i) = -1 + end if + end do + + else if (allocated(idxmap%tempvg)) then +!!$ write(0,*) me,trim(name),' indxmap%tempvg shortcut' + ! Use temporary vector + do i=1, nv + iprc(i) = idxmap%tempvg(idx(i)) + end do + + else + + call psi_graph_fnd_owner(idx,iprc,idxmap,info) + + end if + + if (gettime) then + call psb_barrier(ictxt) + t1 = psb_wtime() + t1 = t1 -t0 - tamx - tidx + call psb_amx(ictxt,tamx) + call psb_amx(ictxt,tidx) + call psb_amx(ictxt,t1) + if (me == psb_root_) then + write(psb_out_unit,'(" fnd_owner idx time : ",es10.4)') tidx + write(psb_out_unit,'(" fnd_owner amx time : ",es10.4)') tamx + write(psb_out_unit,'(" fnd_owner remainedr : ",es10.4)') t1 + endif + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +end subroutine psi_indx_map_fnd_owner diff --git a/base/internals/psi_symm_dep_list.F90 b/base/internals/psi_symm_dep_list.F90 new file mode 100644 index 00000000..c25f61e5 --- /dev/null +++ b/base/internals/psi_symm_dep_list.F90 @@ -0,0 +1,162 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! File: psi_fnd_owner.f90 +! +! Subroutine: psi_fnd_owner +! Figure out who owns global indices. +! +! Arguments: +! +subroutine psi_symm_dep_list(rvsz,adj,idxmap,info,flag) + use psb_serial_mod + use psb_const_mod + use psb_error_mod + use psb_penv_mod + use psb_realloc_mod + use psb_indx_map_mod, psb_protect_name => psi_symm_dep_list +#ifdef MPI_MOD + use mpi +#endif + + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + integer(psb_mpk_), intent(inout) :: rvsz(0:) + integer(psb_ipk_), allocatable, intent(inout) :: adj(:) + class(psb_indx_map), intent(in) :: idxmap + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: flag + + ! + integer(psb_ipk_), allocatable :: ladj(:) + integer(psb_mpk_), allocatable :: sdsz(:) + integer(psb_mpk_) :: icomm, minfo, iictxt + integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,& + & last_ih, last_j, nidx, nrecv, nadj, flag_ + integer(psb_lpk_) :: mglob, ih + integer(psb_ipk_) :: ictxt,np,me + character(len=20) :: name + + info = psb_success_ + name = 'psi_symm_dep_list' + call psb_erractionsave(err_act) + + ictxt = idxmap%get_ctxt() + icomm = idxmap%get_mpic() + mglob = idxmap%get_gr() + n_row = idxmap%get_lr() + n_col = idxmap%get_lc() + iictxt = ictxt + + call psb_info(ictxt, me, np) + + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.(idxmap%is_valid())) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='invalid idxmap') + goto 9999 + end if + + + nadj = size(adj) + if (size(rvsz) 0) nrecv = nrecv + 1 + end do + + ! + ! Now fix adj to be symmetric + ! + call psb_realloc(nadj+nrecv,ladj,info) + ladj(1:nadj) = adj(1:nadj) + do ip=0, np-1 + if (rvsz(ip)>0) then + nadj = nadj + 1 + ladj(nadj+1:nadj+nrecv) = ip + end if + end do + call psb_msort_unique(ladj,nadj) + call psb_realloc(nadj,adj,info) + adj(1:nadj) = ladj(1:nadj) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +end subroutine psi_symm_dep_list diff --git a/base/modules/Makefile b/base/modules/Makefile index b6ab52ea..21525a3c 100644 --- a/base/modules/Makefile +++ b/base/modules/Makefile @@ -261,7 +261,7 @@ psi_z_mod.o: desc/psb_desc_mod.o serial/psb_z_vect_mod.o comm/psi_z_comm_a_mod.o psi_mod.o: psb_penv_mod.o desc/psb_desc_mod.o auxil/psi_serial_mod.o serial/psb_serial_mod.o\ psi_i_mod.o psi_l_mod.o psi_s_mod.o psi_d_mod.o psi_c_mod.o psi_z_mod.o -desc/psb_indx_map_mod.o: desc/psb_desc_const_mod.o psb_error_mod.o psb_penv_mod.o psb_realloc_mod.o +desc/psb_indx_map_mod.o: desc/psb_desc_const_mod.o psb_error_mod.o psb_penv_mod.o psb_realloc_mod.o auxil/psb_sort_mod.o desc/psb_hash_map_mod.o desc/psb_list_map_mod.o desc/psb_repl_map_mod.o desc/psb_gen_block_map_mod.o:\ desc/psb_indx_map_mod.o desc/psb_desc_const_mod.o \ auxil/psb_sort_mod.o psb_penv_mod.o diff --git a/base/modules/desc/psb_desc_mod.F90 b/base/modules/desc/psb_desc_mod.F90 index 7170af96..9728e970 100644 --- a/base/modules/desc/psb_desc_mod.F90 +++ b/base/modules/desc/psb_desc_mod.F90 @@ -233,6 +233,9 @@ module psb_desc_mod procedure, pass(desc) :: get_global_rows => psb_cd_get_global_rows procedure, pass(desc) :: get_global_cols => psb_cd_get_global_cols procedure, pass(desc) :: get_global_indices => psb_cd_get_global_indices + procedure, pass(desc) :: get_p_adjcncy => cd_get_p_adjcncy + procedure, pass(desc) :: set_p_adjcncy => cd_set_p_adjcncy + procedure, pass(desc) :: xtnd_p_adjcncy => cd_xtnd_p_adjcncy procedure, pass(desc) :: a_get_list => psb_cd_get_list procedure, pass(desc) :: v_get_list => psb_cd_v_get_list generic, public :: get_list => a_get_list, v_get_list @@ -282,6 +285,14 @@ module psb_desc_mod module procedure psb_cdfree end interface psb_free + interface psb_cd_set_maxspace + module procedure psb_cd_set_maxspace + end interface psb_cd_set_maxspace + + interface psb_cd_get_maxspace + module procedure psb_cd_get_maxspace + end interface psb_cd_get_maxspace + interface psb_cd_set_large_threshold module procedure psb_i_cd_set_large_threshold end interface psb_cd_set_large_threshold @@ -298,7 +309,8 @@ module psb_desc_mod & cd_g2ls2_ins, cd_g2lv1_ins, cd_g2lv2_ins, cd_fnd_owner - integer(psb_lpk_), private, save :: cd_large_threshold=psb_default_large_threshold + integer(psb_lpk_), private, save :: cd_large_threshold = psb_default_large_threshold + integer(psb_ipk_), private, save :: cd_maxspace = 1000*1000 contains @@ -347,10 +359,25 @@ contains function psb_cd_get_large_threshold() result(val) implicit none - integer(psb_ipk_) :: val + integer(psb_lpk_) :: val val = cd_large_threshold end function psb_cd_get_large_threshold + + subroutine psb_cd_set_maxspace(ith) + implicit none + integer(psb_ipk_), intent(in) :: ith + if (ith > 0) then + cd_maxspace = ith + end if + end subroutine psb_cd_set_maxspace + + function psb_cd_get_maxspace() result(val) + implicit none + integer(psb_ipk_) :: val + val = cd_maxspace + end function psb_cd_get_maxspace + function psb_cd_is_large_size(m) result(val) use psb_penv_mod @@ -557,9 +584,7 @@ contains end function psb_cd_get_global_indices - - - + function cd_get_fmt(desc) result(val) implicit none character(len=5) :: val @@ -620,6 +645,35 @@ contains end function psb_cd_get_mpic + function cd_get_p_adjcncy(desc) result(val) + implicit none + integer(psb_ipk_), allocatable :: val(:) + class(psb_desc_type), intent(in) :: desc + + if (allocated(desc%indxmap)) then + val = desc%indxmap%get_p_adjcncy() + endif + + end function cd_get_p_adjcncy + + subroutine cd_set_p_adjcncy(desc,val) + implicit none + class(psb_desc_type), intent(inout) :: desc + integer(psb_ipk_), intent(in) :: val(:) + if (allocated(desc%indxmap)) then + call desc%indxmap%xtnd_p_adjcncy(val) + endif + end subroutine cd_set_p_adjcncy + + subroutine cd_xtnd_p_adjcncy(desc,val) + implicit none + class(psb_desc_type), intent(inout) :: desc + integer(psb_ipk_), intent(in) :: val(:) + if (allocated(desc%indxmap)) then + call desc%indxmap%xtnd_p_adjcncy(val) + endif + end subroutine cd_xtnd_p_adjcncy + subroutine psb_cd_set_ovl_asb(desc,info) ! ! Change state of a descriptor into ovl_build. @@ -1716,10 +1770,10 @@ contains subroutine cd_fnd_owner(idx,iprc,desc,info) use psb_error_mod implicit none - integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_lpk_), intent(in) :: idx(:) integer(psb_ipk_), allocatable, intent(out) :: iprc(:) - class(psb_desc_type), intent(in) :: desc - integer(psb_ipk_), intent(out) :: info + class(psb_desc_type), intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act character(len=20) :: name='cd_fnd_owner' logical, parameter :: debug=.false. diff --git a/base/modules/desc/psb_gen_block_map_mod.F90 b/base/modules/desc/psb_gen_block_map_mod.F90 index 39be9e64..258a69a6 100644 --- a/base/modules/desc/psb_gen_block_map_mod.F90 +++ b/base/modules/desc/psb_gen_block_map_mod.F90 @@ -1934,10 +1934,10 @@ contains subroutine block_fnd_owner(idx,iprc,idxmap,info) use psb_penv_mod implicit none - integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_lpk_), intent(in) :: idx(:) integer(psb_ipk_), allocatable, intent(out) :: iprc(:) - class(psb_gen_block_map), intent(in) :: idxmap - integer(psb_ipk_), intent(out) :: info + class(psb_gen_block_map), intent(inout) :: idxmap + integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: ictxt, iam, np, nv, ip, i integer(psb_lpk_) :: tidx diff --git a/base/modules/desc/psb_glist_map_mod.f90 b/base/modules/desc/psb_glist_map_mod.f90 index 36f0c030..e894d898 100644 --- a/base/modules/desc/psb_glist_map_mod.f90 +++ b/base/modules/desc/psb_glist_map_mod.f90 @@ -154,10 +154,10 @@ contains use psb_penv_mod use psb_sort_mod implicit none - integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_lpk_), intent(in) :: idx(:) integer(psb_ipk_), allocatable, intent(out) :: iprc(:) - class(psb_glist_map), intent(in) :: idxmap - integer(psb_ipk_), intent(out) :: info + class(psb_glist_map), intent(inout) :: idxmap + integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: ictxt, iam, np integer(psb_lpk_) :: nv, i, ngp diff --git a/base/modules/desc/psb_indx_map_mod.f90 b/base/modules/desc/psb_indx_map_mod.f90 index 12c8d242..eed22819 100644 --- a/base/modules/desc/psb_indx_map_mod.f90 +++ b/base/modules/desc/psb_indx_map_mod.f90 @@ -127,7 +127,9 @@ module psb_indx_map_mod integer(psb_ipk_), allocatable :: oracle(:,:) !> Halo owners integer(psb_ipk_), allocatable :: halo_owner(:) - + !> Adjacency list for processes + integer(psb_ipk_), allocatable :: p_adjcncy(:) + contains procedure, pass(idxmap) :: get_state => base_get_state @@ -144,6 +146,8 @@ module psb_indx_map_mod procedure, pass(idxmap) :: get_lr => base_get_lr procedure, pass(idxmap) :: get_lc => base_get_lc + procedure, pass(idxmap) :: get_p_adjcncy => base_get_p_adjcncy + procedure, pass(idxmap) :: set_gri => base_set_gri procedure, pass(idxmap) :: set_gci => base_set_gci @@ -155,6 +159,9 @@ module psb_indx_map_mod procedure, pass(idxmap) :: set_lr => base_set_lr procedure, pass(idxmap) :: set_lc => base_set_lc + procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy + procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy + procedure, pass(idxmap) :: set_ctxt => base_set_ctxt procedure, pass(idxmap) :: set_mpic => base_set_mpic procedure, pass(idxmap) :: get_ctxt => base_get_ctxt @@ -216,7 +223,7 @@ module psb_indx_map_mod procedure, pass(idxmap) :: fnd_halo_owner_v => base_fnd_halo_owner_v generic, public :: fnd_halo_owner => fnd_halo_owner_s, fnd_halo_owner_v - procedure, pass(idxmap) :: fnd_owner => psb_indx_map_fnd_owner + procedure, pass(idxmap) :: fnd_owner => psi_indx_map_fnd_owner procedure, pass(idxmap) :: init_vl => base_init_vl generic, public :: init => init_vl @@ -238,9 +245,10 @@ module psb_indx_map_mod & base_lg2lv2_ins, base_init_vl, base_is_null,& & base_row_extendable, base_clone, base_cpy, base_reinit, & & base_set_halo_owner, base_get_halo_owner, & - & base_fnd_halo_owner_s, base_fnd_halo_owner_v + & base_fnd_halo_owner_s, base_fnd_halo_owner_v,& + & base_get_p_adjcncy, base_set_p_adjcncy, base_xtnd_p_adjcncy - !> Function: psb_indx_map_fnd_owner + !> Function: psi_indx_map_fnd_owner !! \memberof psb_indx_map !! \brief Find the process owning indices !! @@ -259,14 +267,62 @@ module psb_indx_map_mod !! interface - subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info) + subroutine psi_indx_map_fnd_owner(idx,iprc,idxmap,info) import :: psb_indx_map, psb_ipk_, psb_lpk_ implicit none - integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_lpk_), intent(in) :: idx(:) integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_ipk_), intent(out) :: info + end subroutine psi_indx_map_fnd_owner + end interface + + interface + subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info) + import :: psb_indx_map, psb_ipk_, psb_lpk_ + implicit none + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + class(psb_indx_map), intent(in) :: idxmap + integer(psb_ipk_), intent(out) :: info + end subroutine psi_a2a_fnd_owner + end interface + + interface + subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) + import :: psb_indx_map, psb_ipk_, psb_lpk_ + implicit none + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + integer(psb_ipk_), allocatable, intent(inout) :: adj(:) class(psb_indx_map), intent(in) :: idxmap - integer(psb_ipk_), intent(out) :: info - end subroutine psb_indx_map_fnd_owner + integer(psb_ipk_), intent(out) :: info + end subroutine psi_adjcncy_fnd_owner + end interface + + interface + subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) + import :: psb_indx_map, psb_ipk_, psb_lpk_ + implicit none + integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_ipk_), allocatable, intent(out) :: iprc(:) + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_ipk_), intent(out) :: info + end subroutine psi_graph_fnd_owner + end interface + + integer, parameter :: psi_symm_flag_norv_ = 0 + integer, parameter :: psi_symm_flag_inrv_ = 1 + interface + subroutine psi_symm_dep_list(rvsz,adj,idxmap,info,flag) + import :: psb_indx_map, psb_ipk_, psb_lpk_, psb_mpk_ + implicit none + integer(psb_mpk_), intent(inout) :: rvsz(:) + integer(psb_ipk_), intent(in) :: adj(:) + class(psb_indx_map), intent(in) :: idxmap + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: flag + end subroutine psi_symm_dep_list end interface contains @@ -331,6 +387,16 @@ contains end function base_get_lc + function base_get_p_adjcncy(idxmap) result(val) + use psb_realloc_mod + implicit none + class(psb_indx_map), intent(in) :: idxmap + integer(psb_ipk_), allocatable :: val(:) + integer(psb_ipk_) :: info + + call psb_safe_ab_cpy(idxmap%p_adjcncy,val,info) + + end function base_get_p_adjcncy function base_get_ctxt(idxmap) result(val) implicit none @@ -416,6 +482,35 @@ contains idxmap%local_cols = val end subroutine base_set_lc + subroutine base_set_p_adjcncy(idxmap,val) + use psb_realloc_mod + use psb_sort_mod + implicit none + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_ipk_), intent(in) :: val(:) + + call idxmap%xtnd_p_adjcncy(val) + + end subroutine base_set_p_adjcncy + + subroutine base_xtnd_p_adjcncy(idxmap,val) + use psb_realloc_mod + use psb_sort_mod + implicit none + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_) :: info, nv, nx + + nv = size(val) + nx = psb_size(idxmap%p_adjcncy) + call psb_realloc(nv+nx,idxmap%p_adjcncy,info) + idxmap%p_adjcncy(nx+1:nx+nv) = val(1:nv) + nx = size(idxmap%p_adjcncy) + call psb_msort_unique(idxmap%p_adjcncy,nx,dir=psb_sort_up_) + call psb_realloc(nx,idxmap%p_adjcncy,info) + + end subroutine base_xtnd_p_adjcncy + subroutine base_set_mpic(idxmap,val) implicit none class(psb_indx_map), intent(inout) :: idxmap @@ -1244,6 +1339,8 @@ contains & call psb_safe_ab_cpy(idxmap%oracle,outmap%oracle,info) if (info == psb_success_)& & call psb_safe_ab_cpy(idxmap%halo_owner,outmap%halo_owner,info) + if (info == psb_success_)& + & call psb_safe_ab_cpy(idxmap%p_adjcncy,outmap%p_adjcncy,info) if (info /= 0) goto 9999 diff --git a/base/modules/desc/psb_repl_map_mod.f90 b/base/modules/desc/psb_repl_map_mod.f90 index e3003547..29a1eb4d 100644 --- a/base/modules/desc/psb_repl_map_mod.f90 +++ b/base/modules/desc/psb_repl_map_mod.f90 @@ -699,10 +699,10 @@ contains subroutine repl_fnd_owner(idx,iprc,idxmap,info) use psb_penv_mod implicit none - integer(psb_lpk_), intent(in) :: idx(:) + integer(psb_lpk_), intent(in) :: idx(:) integer(psb_ipk_), allocatable, intent(out) :: iprc(:) - class(psb_repl_map), intent(in) :: idxmap - integer(psb_ipk_), intent(out) :: info + class(psb_repl_map), intent(inout) :: idxmap + integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nv integer(psb_mpk_) :: ictxt, iam, np diff --git a/base/modules/psi_i_mod.F90 b/base/modules/psi_i_mod.F90 index b41f20b5..e8222753 100644 --- a/base/modules/psi_i_mod.F90 +++ b/base/modules/psi_i_mod.F90 @@ -115,15 +115,15 @@ module psi_i_mod interface psi_extract_dep_list subroutine psi_i_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,& - & length_dl,np,dl_lda,mode,info) + & length_dl,dl_lda,mode,info) import implicit none - logical :: is_bld, is_upd - integer(psb_ipk_) :: ictxt - integer(psb_ipk_) :: dl_lda,mode - integer(psb_ipk_) :: desc_str(*),dep_list(dl_lda,0:np),length_dl(0:np) - integer(psb_ipk_) :: np - integer(psb_ipk_) :: info + logical, intent(in) :: is_bld, is_upd + integer(psb_ipk_), intent(in) :: ictxt,mode + integer(psb_ipk_), intent(out) :: dl_lda + integer(psb_ipk_), intent(in) :: desc_str(*) + integer(psb_ipk_), allocatable, intent(out) :: dep_list(:,:),length_dl(:) + integer(psb_ipk_), intent(out) :: info end subroutine psi_i_extract_dep_list end interface @@ -131,11 +131,11 @@ module psi_i_mod subroutine psi_i_fnd_owner(nv,idx,iprc,desc,info) import implicit none - integer(psb_ipk_), intent(in) :: nv - integer(psb_ipk_), intent(in) :: idx(:) + integer(psb_ipk_), intent(in) :: nv + integer(psb_ipk_), intent(in) :: idx(:) integer(psb_ipk_), allocatable, intent(out) :: iprc(:) - type(psb_desc_type), intent(in) :: desc - integer(psb_ipk_), intent(out) :: info + type(psb_desc_type), intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info end subroutine psi_i_fnd_owner end interface psi_fnd_owner diff --git a/test/cdasb/Makefile b/test/cdasb/Makefile new file mode 100644 index 00000000..a4a677f5 --- /dev/null +++ b/test/cdasb/Makefile @@ -0,0 +1,38 @@ +INSTALLDIR=../.. +INCDIR=$(INSTALLDIR)/include +MODDIR=$(INSTALLDIR)/modules/ +include $(INCDIR)/Make.inc.psblas +# +# Libraries used +LIBDIR=$(INSTALLDIR)/lib +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +CCOPT= -g +FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). + + +EXEDIR=./runs + +all: exed psb_d_pde3d + +exed: + (if test ! -d $(EXEDIR) ; then mkdir $(EXEDIR); fi) + +psb_d_pde3d: psb_d_pde3d.o + $(FLINK) psb_d_pde3d.o -o psb_d_pde3d $(PSBLAS_LIB) $(LDLIBS) + /bin/mv psb_d_pde3d $(EXEDIR) + + + +clean: + /bin/rm -f psb_d_pde3d.o *$(.mod) $(EXEDIR)/psb_d_pde3d +verycleanlib: + (cd ../..; make veryclean) +lib: + (cd ../../; make library) + + + diff --git a/test/cdasb/psb_d_pde3d.f90 b/test/cdasb/psb_d_pde3d.f90 new file mode 100644 index 00000000..c14f8446 --- /dev/null +++ b/test/cdasb/psb_d_pde3d.f90 @@ -0,0 +1,848 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_d_pde3d.f90 +! +! Program: psb_d_pde3d +! This sample program solves a linear system obtained by discretizing a +! PDE with Dirichlet BCs. +! +! +! The PDE is a general second order equation in 3d +! +! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) +! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f +! dxdx dydy dzdz dx dy dz +! +! with Dirichlet boundary conditions +! u = g +! +! on the unit cube 0<=x,y,z<=1. +! +! +! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. +! +! There are three choices available for data distribution: +! 1. A simple BLOCK distribution +! 2. A ditribution based on arbitrary assignment of indices to processes, +! typically from a graph partitioner +! 3. A 3D distribution in which the unit cube is partitioned +! into subcubes, each one assigned to a process. +! +! +module psb_d_pde3d_mod + + + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_dspmat_type, psb_d_vect_type, dzero,& + & psb_d_base_sparse_mat, psb_d_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function d_func_3d(x,y,z) result(val) + import :: psb_dpk_ + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + end function d_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_d_gen_pde3d + end interface psb_gen_pde3d + +contains + + function d_null_func_3d(x,y,z) result(val) + + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + + val = dzero + + end function d_null_func_3d + ! + ! functions parametrizing the differential equation + ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! + function b1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b1 + real(psb_dpk_), intent(in) :: x,y,z + b1=dzero + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b2 + real(psb_dpk_), intent(in) :: x,y,z + b2=dzero + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b3 + real(psb_dpk_), intent(in) :: x,y,z + b3=dzero + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: c + real(psb_dpk_), intent(in) :: x,y,z + c=dzero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a1 + real(psb_dpk_), intent(in) :: x,y,z + a1=done/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a2 + real(psb_dpk_), intent(in) :: x,y,z + a2=done/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a3 + real(psb_dpk_), intent(in) :: x,y,z + a3=done/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: g + real(psb_dpk_), intent(in) :: x,y,z + g = dzero + if (x == done) then + g = done + else if (x == dzero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_d_gen_pde3d(ictxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_dspmat_type) :: a + type(psb_d_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + integer(psb_ipk_) :: ictxt, info + character(len=*) :: afmt + procedure(d_func_3d), optional :: f + class(psb_d_base_sparse_mat), optional :: amold + class(psb_d_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_d_csc_sparse_mat) :: acsc + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat) :: acsr + real(psb_dpk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_mpk_) :: npx,npy,npz, iamx,iamy,iamz + integer(psb_ipk_) :: mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_dpk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_dpk_) :: deltah, sqdeltah, deltah2 + real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(d_func_3d), pointer :: f_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ictxt, iam, np) + call psb_cd_set_large_threshold(1000) + call psb_cd_set_maxspace(10000) + + if (present(f)) then + f_ => f + else + f_ => d_null_func_3d + end if + + deltah = done/(idim+1) + sqdeltah = deltah*deltah + deltah2 = (2*done)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ictxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ictxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ictxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ictxt,desc_a,info,vl=myidx) + + + block + ! + ! Test adjcncy methods + ! + integer(psb_mpk_), allocatable :: neighbours(:) + integer(psb_mpk_) :: cnt + logical, parameter :: debug_adj=.false. + if (debug_adj.and.(np > 1)) then + cnt = 0 + allocate(neighbours(np)) + if (iamx < npx-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx+1,iamy,iamz,npx,npy,npz,base=0) + end if + if (iamy < npy-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy+1,iamz,npx,npy,npz,base=0) + end if + if (iamz < npz-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy,iamz+1,npx,npy,npz,base=0) + end if + if (iamx >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx-1,iamy,iamz,npx,npy,npz,base=0) + end if + if (iamy >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy-1,iamz,npx,npy,npz,base=0) + end if + if (iamz >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy,iamz-1,npx,npy,npz,base=0) + end if + call psb_realloc(cnt, neighbours,info) + call desc_a%set_p_adjcncy(neighbours) + write(0,*) iam,' Check on neighbours: ',desc_a%get_p_adjcncy() + end if + end block + + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ictxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ictxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=dzero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ictxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ictxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,mold=amold) + else + call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt) + end if + end if + call psb_barrier(ictxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ictxt) + ttot = psb_wtime() - t0 + + call psb_amx(ictxt,talc) + call psb_amx(ictxt,tgen) + call psb_amx(ictxt,tasb) + call psb_amx(ictxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + end subroutine psb_d_gen_pde3d + + +end module psb_d_pde3d_mod + +program psb_d_pde3d + use psb_base_mod + use psb_prec_mod + use psb_krylov_mod + use psb_util_mod + use psb_d_pde3d_mod + implicit none + + ! input parameters + character(len=20) :: kmethd, ptype + character(len=5) :: afmt + integer(psb_ipk_) :: idim + integer(psb_epk_) :: system_size + + ! miscellaneous + real(psb_dpk_), parameter :: one = done + real(psb_dpk_) :: t1, t2, tprec + + ! sparse matrix and preconditioner + type(psb_dspmat_type) :: a + type(psb_dprec_type) :: prec + ! descriptor + type(psb_desc_type) :: desc_a + ! dense vectors + type(psb_d_vect_type) :: xxv,bv + ! parallel environment + integer(psb_ipk_) :: ictxt, iam, np + + ! solver parameters + integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart + integer(psb_epk_) :: amatsize, precsize, descsize, d2size + real(psb_dpk_) :: err, eps + + ! other variables + integer(psb_ipk_) :: info, i + character(len=20) :: name,ch_err + character(len=40) :: fname + + info=psb_success_ + + + call psb_init(ictxt) + call psb_info(ictxt,iam,np) + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ictxt) + stop + endif + if(psb_errstatus_fatal()) goto 9999 + name='pde3d90' + call psb_set_errverbosity(itwo) + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if + ! + ! get parameters + ! + call get_parms(ictxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart) + + ! + ! allocate and fill in the coefficient matrix, rhs and initial guess + ! + call psb_barrier(ictxt) + t1 = psb_wtime() + call psb_gen_pde3d(ictxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart) + call psb_barrier(ictxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_gen_pde3d' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + ! + ! cleanup storage and exit + ! + call psb_gefree(bv,desc_a,info) + call psb_gefree(xxv,desc_a,info) + call psb_spfree(a,desc_a,info) + call psb_cdfree(desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='free routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_exit(ictxt) + stop + +9999 call psb_error(ictxt) + + stop + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ictxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart) + integer(psb_ipk_) :: ictxt + character(len=*) :: kmethd, ptype, afmt + integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: ip, inp_unit + character(len=1024) :: filename + + call psb_info(ictxt, iam, np) + + if (iam == 0) then + if (command_argument_count()>0) then + call get_command_argument(1,filename) + inp_unit = 30 + open(inp_unit,file=filename,action='read',iostat=info) + if (info /= 0) then + write(psb_err_unit,*) 'Could not open file ',filename,' for input' + call psb_abort(ictxt) + stop + else + write(psb_err_unit,*) 'Opened file ',trim(filename),' for input' + end if + else + inp_unit=psb_inp_unit + end if + read(inp_unit,*) ip + if (ip >= 3) then + read(inp_unit,*) kmethd + read(inp_unit,*) ptype + read(inp_unit,*) afmt + + read(inp_unit,*) idim + if (ip >= 4) then + read(inp_unit,*) ipart + else + ipart = 3 + endif + if (ip >= 5) then + read(inp_unit,*) istopc + else + istopc=1 + endif + if (ip >= 6) then + read(inp_unit,*) itmax + else + itmax=500 + endif + if (ip >= 7) then + read(inp_unit,*) itrace + else + itrace=-1 + endif + if (ip >= 8) then + read(inp_unit,*) irst + else + irst=1 + endif + + write(psb_out_unit,'("Solving matrix : ell1")') + write(psb_out_unit,& + & '("Grid dimensions : ",i4," x ",i4," x ",i4)') & + & idim,idim,idim + write(psb_out_unit,'("Number of processors : ",i0)')np + select case(ipart) + case(1) + write(psb_out_unit,'("Data distribution : BLOCK")') + case(3) + write(psb_out_unit,'("Data distribution : 3D")') + case default + ipart = 3 + write(psb_out_unit,'("Unknown data distrbution, defaulting to 3D")') + end select + write(psb_out_unit,'("Preconditioner : ",a)') ptype + write(psb_out_unit,'("Iterative method : ",a)') kmethd + write(psb_out_unit,'(" ")') + else + ! wrong number of parameter, print an error message and exit + call pr_usage(izero) + call psb_abort(ictxt) + stop 1 + endif + if (inp_unit /= psb_inp_unit) then + close(inp_unit) + end if + + end if + ! broadcast parameters to all processors + call psb_bcast(ictxt,kmethd) + call psb_bcast(ictxt,afmt) + call psb_bcast(ictxt,ptype) + call psb_bcast(ictxt,idim) + call psb_bcast(ictxt,ipart) + call psb_bcast(ictxt,istopc) + call psb_bcast(ictxt,itmax) + call psb_bcast(ictxt,itrace) + call psb_bcast(ictxt,irst) + + return + + end subroutine get_parms + ! + ! print an error message + ! + subroutine pr_usage(iout) + integer(psb_ipk_) :: iout + write(iout,*)'incorrect parameter(s) found' + write(iout,*)' usage: pde3d90 methd prec dim & + &[istop itmax itrace]' + write(iout,*)' where:' + write(iout,*)' methd: cgstab cgs rgmres bicgstabl' + write(iout,*)' prec : bjac diag none' + write(iout,*)' dim number of points along each axis' + write(iout,*)' the size of the resulting linear ' + write(iout,*)' system is dim**3' + write(iout,*)' ipart data partition 1 3 ' + write(iout,*)' istop stopping criterion 1, 2 ' + write(iout,*)' itmax maximum number of iterations [500] ' + write(iout,*)' itrace <=0 (no tracing, default) or ' + write(iout,*)' >= 1 do tracing every itrace' + write(iout,*)' iterations ' + end subroutine pr_usage + +end program psb_d_pde3d + + diff --git a/test/cdasb/runs/tcd.inp b/test/cdasb/runs/tcd.inp new file mode 100644 index 00000000..ae5c50cf --- /dev/null +++ b/test/cdasb/runs/tcd.inp @@ -0,0 +1,12 @@ +8 Number of entries below this +BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR +BJAC Preconditioner NONE DIAG BJAC +CSR Storage format for matrix A: CSR COO +100 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +3 Partition: 1 BLOCK 3 3D +2 Stopping criterion 1 2 +0100 MAXIT +-1 ITRACE +002 IRST restart for RGMRES and BiCGSTABL + + diff --git a/test/pargen/psb_d_pde3d.f90 b/test/pargen/psb_d_pde3d.f90 index 429e9a0e..60cb9e9e 100644 --- a/test/pargen/psb_d_pde3d.f90 +++ b/test/pargen/psb_d_pde3d.f90 @@ -214,7 +214,8 @@ contains ! Note: integer control variables going directly into an MPI call ! must be 4 bytes, i.e. psb_mpk_ integer(psb_mpk_) :: npdims(3), npp, minfo - integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_mpk_) :: npx,npy,npz, iamx,iamy,iamz + integer(psb_ipk_) :: mynx,myny,mynz integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) ! Process grid integer(psb_ipk_) :: np, iam @@ -235,8 +236,9 @@ contains call psb_erractionsave(err_act) call psb_info(ictxt, iam, np) - - + call psb_cd_set_large_threshold(1000) + call psb_cd_set_maxspace(-1) + if (present(f)) then f_ => f else @@ -371,7 +373,49 @@ contains ! the set of global indices it owns. ! call psb_cdall(ictxt,desc_a,info,vl=myidx) + + block + ! + ! Test adjcncy methods + ! + integer(psb_mpk_), allocatable :: neighbours(:) + integer(psb_mpk_) :: cnt + logical, parameter :: debug_adj=.false. + if (debug_adj.and.(np > 1)) then + cnt = 0 + allocate(neighbours(np)) + if (iamx < npx-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx+1,iamy,iamz,npx,npy,npz,base=0) + end if + if (iamy < npy-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy+1,iamz,npx,npy,npz,base=0) + end if + if (iamz < npz-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy,iamz+1,npx,npy,npz,base=0) + end if + if (iamx >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx-1,iamy,iamz,npx,npy,npz,base=0) + end if + if (iamy >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy-1,iamz,npx,npy,npz,base=0) + end if + if (iamz >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy,iamz-1,npx,npy,npz,base=0) + end if + call psb_realloc(cnt, neighbours,info) + call desc_a%set_p_adjcncy(neighbours) + write(0,*) iam,' Check on neighbours: ',desc_a%get_p_adjcncy() + end if + end block + + case default write(psb_err_unit,*) iam, 'Initialization error: should not get here' info = -1