|
|
|
!
|
|
|
|
! Parallel Sparse BLAS version 3.5
|
|
|
|
! (C) Copyright 2006-2018
|
|
|
|
! Salvatore Filippone
|
|
|
|
! Alfredo Buttari
|
|
|
|
!
|
|
|
|
! Redistribution and use in source and binary forms, with or without
|
|
|
|
! modification, are permitted provided that the following conditions
|
|
|
|
! are met:
|
|
|
|
! 1. Redistributions of source code must retain the above copyright
|
|
|
|
! notice, this list of conditions and the following disclaimer.
|
|
|
|
! 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
! notice, this list of conditions, and the following disclaimer in the
|
|
|
|
! documentation and/or other materials provided with the distribution.
|
|
|
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
|
|
|
! not be used to endorse or promote products derived from this
|
|
|
|
! software without specific written permission.
|
|
|
|
!
|
|
|
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
|
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
|
|
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
! POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
!
|
|
|
|
!
|
|
|
|
!
|
|
|
|
! File: psi_a2a_fnd_owner.f90
|
|
|
|
!
|
|
|
|
! Subroutine: psi_a2a_fnd_owner
|
|
|
|
! Figure out who owns global indices.
|
|
|
|
!
|
|
|
|
! Arguments:
|
|
|
|
! idx(:) - integer Required indices on the calling process.
|
|
|
|
! Note: the indices should be unique!
|
|
|
|
! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding
|
|
|
|
! indices
|
|
|
|
! idxmap - class(psb_indx_map). The index map
|
|
|
|
! info - integer. return code.
|
|
|
|
!
|
|
|
|
! This version does not assume any prior knowledge about the process topology,
|
|
|
|
! so it goes for an all-to-all by building
|
|
|
|
! There is a choice bewteen building an auxiliary neighbours list and
|
|
|
|
! reusing the neighbour version, and going for a stratight MPI alltoall (default).
|
|
|
|
!
|
|
|
|
subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info,samesize)
|
|
|
|
use psb_serial_mod
|
|
|
|
use psb_const_mod
|
|
|
|
use psb_error_mod
|
|
|
|
use psb_penv_mod
|
|
|
|
use psb_realloc_mod
|
|
|
|
use psb_indx_map_mod, psb_protect_name => psi_a2a_fnd_owner
|
|
|
|
#ifdef MPI_MOD
|
|
|
|
use mpi
|
|
|
|
#endif
|
|
|
|
|
|
|
|
implicit none
|
|
|
|
#ifdef MPI_H
|
|
|
|
include 'mpif.h'
|
|
|
|
#endif
|
|
|
|
integer(psb_lpk_), intent(in) :: idx(:)
|
|
|
|
integer(psb_ipk_), allocatable, intent(out) :: iprc(:)
|
|
|
|
class(psb_indx_map), intent(in) :: idxmap
|
|
|
|
integer(psb_ipk_), intent(out) :: info
|
|
|
|
logical, intent(in), optional :: samesize
|
|
|
|
|
|
|
|
|
|
|
|
integer(psb_ipk_), allocatable :: tmpadj(:)
|
|
|
|
integer(psb_lpk_), allocatable :: rmtidx(:)
|
|
|
|
integer(psb_ipk_), allocatable :: tproc(:), lclidx(:)
|
|
|
|
integer(psb_mpk_), allocatable :: hsz(:),hidx(:), sdidx(:), rvidx(:),&
|
|
|
|
& sdsz(:), rvsz(:), sdhd(:), rvhd(:), p2pstat(:,:)
|
|
|
|
integer(psb_mpk_) :: icomm, minfo, nv
|
|
|
|
integer(psb_ipk_) :: i,n_row,n_col,err_act,gsz
|
|
|
|
integer(psb_lpk_) :: mglob, ih
|
|
|
|
type(psb_ctxt_type) :: ctxt
|
|
|
|
integer(psb_ipk_) :: np,me, nresp
|
|
|
|
logical, parameter :: use_psi_adj=.false.
|
|
|
|
real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx
|
|
|
|
character(len=20) :: name
|
|
|
|
logical :: samesize_
|
|
|
|
|
|
|
|
info = psb_success_
|
|
|
|
name = 'psi_a2a_fnd_owner'
|
|
|
|
call psb_erractionsave(err_act)
|
|
|
|
|
|
|
|
ctxt = idxmap%get_ctxt()
|
|
|
|
icomm = idxmap%get_mpic()
|
|
|
|
mglob = idxmap%get_gr()
|
|
|
|
n_row = idxmap%get_lr()
|
|
|
|
n_col = idxmap%get_lc()
|
|
|
|
|
|
|
|
call psb_info(ctxt, me, np)
|
|
|
|
|
|
|
|
if (np == -1) then
|
|
|
|
info = psb_err_context_error_
|
|
|
|
call psb_errpush(info,name)
|
|
|
|
goto 9999
|
|
|
|
endif
|
|
|
|
|
|
|
|
if (.not.(idxmap%is_valid())) then
|
|
|
|
call psb_errpush(psb_err_from_subroutine_,name,a_err='invalid idxmap')
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
|
|
|
|
if (present(samesize)) then
|
|
|
|
samesize_ = samesize
|
|
|
|
else
|
|
|
|
samesize_ = .false.
|
|
|
|
end if
|
|
|
|
nv = size(idx)
|
|
|
|
!if (me == 0) write(0,*) me,name,' :',use_psi_adj,samesize_,nv
|
|
|
|
if (use_psi_adj) then
|
|
|
|
!
|
|
|
|
! Reuse the adjcncy version by tricking it with an adjcncy list
|
|
|
|
! that contains everybody but ME.
|
|
|
|
!
|
|
|
|
call psb_realloc(np-1,tmpadj,info)
|
|
|
|
tmpadj(1:me) = [(i,i=0,me-1)]
|
|
|
|
tmpadj(me+1:np-1) = [(i,i=me+1,np-1)]
|
|
|
|
call psi_adjcncy_fnd_owner(idx,iprc,tmpadj,idxmap,info)
|
|
|
|
|
|
|
|
else
|
|
|
|
if (samesize_) then
|
|
|
|
!
|
|
|
|
! Variant when IDX is guaranteed to have the same size on all
|
|
|
|
! processes. To be tested for performance: is it worth it?
|
|
|
|
! Probably yes.
|
|
|
|
!
|
|
|
|
gsz = nv*np
|
|
|
|
Allocate(rmtidx(gsz),lclidx(gsz),iprc(nv),stat=info)
|
|
|
|
if (info /= psb_success_) then
|
|
|
|
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
call mpi_allgather(idx,nv,psb_mpi_lpk_,rmtidx,nv,psb_mpi_lpk_,icomm,minfo)
|
|
|
|
call idxmap%g2l(rmtidx(1:gsz),lclidx(1:gsz),info,owned=.true.)
|
|
|
|
!
|
|
|
|
! Reuse lclidx to encode owning process
|
|
|
|
!
|
|
|
|
do i=1, gsz
|
|
|
|
if ((1<=lclidx(i)).and.(lclidx(i)<=n_row)) then
|
|
|
|
lclidx(i) = me
|
|
|
|
else
|
|
|
|
lclidx(i) = -1
|
|
|
|
end if
|
|
|
|
end do
|
|
|
|
call mpi_reduce_scatter_block(lclidx,iprc,nv,psb_mpi_ipk_,mpi_max,icomm,minfo)
|
|
|
|
|
|
|
|
else
|
|
|
|
! if (me == 0) write(0,*) 'a2a_fnd_owner : version 3'
|
|
|
|
!
|
|
|
|
! 1. allgetherv
|
|
|
|
! 2. local conversion
|
|
|
|
! 3. reduce_scatter
|
|
|
|
!
|
|
|
|
!
|
|
|
|
! The basic idea is very simple.
|
|
|
|
! First we collect (to all) all the requests.
|
|
|
|
Allocate(hidx(np+1),hsz(np),stat=info)
|
|
|
|
if (info /= psb_success_) then
|
|
|
|
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
|
|
|
|
call mpi_allgather(nv,1,psb_mpi_mpk_,hsz,1,psb_mpi_mpk_,icomm,minfo)
|
|
|
|
hidx(1) = 0
|
|
|
|
do i=1, np
|
|
|
|
hidx(i+1) = hidx(i) + hsz(i)
|
|
|
|
end do
|
|
|
|
gsz = hidx(np+1)
|
|
|
|
Allocate(rmtidx(gsz),lclidx(gsz),iprc(nv),stat=info)
|
|
|
|
if (info /= psb_success_) then
|
|
|
|
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
|
|
|
|
call mpi_allgatherv(idx,hsz(me+1),psb_mpi_lpk_,&
|
|
|
|
& rmtidx,hsz,hidx,psb_mpi_lpk_,&
|
|
|
|
& icomm,minfo)
|
|
|
|
|
|
|
|
call idxmap%g2l(rmtidx(1:gsz),lclidx(1:gsz),info,owned=.true.)
|
|
|
|
!
|
|
|
|
! Reuse lclidx to encode owning process
|
|
|
|
!
|
|
|
|
do i=1, gsz
|
|
|
|
if ((1<=lclidx(i)).and.(lclidx(i)<=n_row)) then
|
|
|
|
lclidx(i) = me
|
|
|
|
else
|
|
|
|
lclidx(i) = -1
|
|
|
|
end if
|
|
|
|
end do
|
|
|
|
call mpi_reduce_scatter(lclidx,iprc,hsz,psb_mpi_ipk_,mpi_max,icomm,minfo)
|
|
|
|
if (any(iprc(1:hsz(me+1))<0)) then
|
|
|
|
write(0,*) me,' a2a_fnd: missing answers',count(iprc(1:hsz(me+1))<0),&
|
|
|
|
& gsz,hsz(me+1)
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
call psb_erractionrestore(err_act)
|
|
|
|
return
|
|
|
|
|
|
|
|
9999 call psb_error_handler(ctxt,err_act)
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
end subroutine psi_a2a_fnd_owner
|