You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
442 lines
14 KiB
Fortran
442 lines
14 KiB
Fortran
!
|
|
! Parallel Sparse BLAS version 3.5
|
|
! (C) Copyright 2006-2018
|
|
! Salvatore Filippone
|
|
! Alfredo Buttari
|
|
!
|
|
! Redistribution and use in source and binary forms, with or without
|
|
! modification, are permitted provided that the following conditions
|
|
! are met:
|
|
! 1. Redistributions of source code must retain the above copyright
|
|
! notice, this list of conditions and the following disclaimer.
|
|
! 2. Redistributions in binary form must reproduce the above copyright
|
|
! notice, this list of conditions, and the following disclaimer in the
|
|
! documentation and/or other materials provided with the distribution.
|
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
|
! not be used to endorse or promote products derived from this
|
|
! software without specific written permission.
|
|
!
|
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
! POSSIBILITY OF SUCH DAMAGE.
|
|
!
|
|
!
|
|
!
|
|
!
|
|
! File: psi_adjcncy_fnd_owner.f90
|
|
!
|
|
! Subroutine: psi_adjcncy_fnd_owner
|
|
! Figure out who owns global indices.
|
|
!
|
|
! Arguments:
|
|
! idx(:) - integer Required indices on the calling process.
|
|
! Note: the indices should be unique!
|
|
! iprc(:) - integer(psb_ipk_), allocatable Output: process identifiers for the corresponding
|
|
! indices
|
|
! adj(:) - integer(psb_ipk_) Input: list of topological neighbours for current process.
|
|
!
|
|
! idxmap - class(psb_indx_map). The index map
|
|
! info - integer. return code.
|
|
!
|
|
! This version takes on input a list of processes that are assumed to
|
|
! be topological neighbours of the current one. Each process will send to all
|
|
! of its neighbours the list of indices for which it is trying to find the
|
|
! owner, prepare its own answers, and collect answers from others.
|
|
! There are three possibile implementations: using mpi_alltoallv, using mpi_isend/irecv,
|
|
! using psb_snd/psb_rcv. The default is mpi_alltoallv.
|
|
!
|
|
subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
|
|
use psb_serial_mod
|
|
use psb_const_mod
|
|
use psb_error_mod
|
|
use psb_penv_mod
|
|
use psb_realloc_mod
|
|
use psb_timers_mod
|
|
use psb_indx_map_mod, psb_protect_name => psi_adjcncy_fnd_owner
|
|
#ifdef MPI_MOD
|
|
use mpi
|
|
#endif
|
|
|
|
implicit none
|
|
#ifdef MPI_H
|
|
include 'mpif.h'
|
|
#endif
|
|
integer(psb_lpk_), intent(in) :: idx(:)
|
|
integer(psb_ipk_), allocatable, intent(out) :: iprc(:)
|
|
integer(psb_ipk_), intent(in) :: adj(:)
|
|
class(psb_indx_map), intent(in) :: idxmap
|
|
integer(psb_ipk_), intent(out) :: info
|
|
|
|
|
|
integer(psb_lpk_), allocatable :: rmtidx(:)
|
|
integer(psb_ipk_), allocatable :: tproc(:), lclidx(:)
|
|
integer(psb_mpk_), allocatable :: hsz(:),hidx(:), sdidx(:), rvidx(:),&
|
|
& sdsz(:), rvsz(:), sdhd(:), rvhd(:), p2pstat(:,:)
|
|
integer(psb_mpk_) :: prc, p2ptag, iret
|
|
integer(psb_mpk_) :: icomm, minfo
|
|
integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, k,&
|
|
& last_ih, last_j, nidx, nrecv, nadj
|
|
integer(psb_lpk_) :: mglob, ih
|
|
type(psb_ctxt_type) :: ctxt
|
|
integer(psb_ipk_) :: np,me
|
|
logical, parameter :: gettime=.true., debug=.false.
|
|
integer(psb_mpk_) :: xchg_alg
|
|
logical, parameter :: do_timings=.false.
|
|
integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1
|
|
integer(psb_ipk_), save :: idx_phase11=-1, idx_phase12=-1, idx_phase13=-1
|
|
real(psb_dpk_) :: t0, t1, t2, t3, t4, tamx, tidx
|
|
character(len=20) :: name
|
|
|
|
info = psb_success_
|
|
name = 'psi_adjcncy_fnd_owner'
|
|
call psb_erractionsave(err_act)
|
|
|
|
ctxt = idxmap%get_ctxt()
|
|
icomm = idxmap%get_mpic()
|
|
mglob = idxmap%get_gr()
|
|
n_row = idxmap%get_lr()
|
|
n_col = idxmap%get_lc()
|
|
|
|
if ((do_timings).and.(idx_phase1==-1)) &
|
|
& idx_phase1 = psb_get_timer_idx("ADJ_FND_OWN: phase1 ")
|
|
if ((do_timings).and.(idx_phase2==-1)) &
|
|
& idx_phase2 = psb_get_timer_idx("ADJ_FND_OWN: phase2")
|
|
if ((do_timings).and.(idx_phase3==-1)) &
|
|
& idx_phase3 = psb_get_timer_idx("ADJ_FND_OWN: phase3")
|
|
if ((do_timings).and.(idx_phase11==-1)) &
|
|
& idx_phase11 = psb_get_timer_idx("ADJ_FND_OWN: phase11 ")
|
|
if ((do_timings).and.(idx_phase12==-1)) &
|
|
& idx_phase12 = psb_get_timer_idx("ADJ_FND_OWN: phase12")
|
|
if ((do_timings).and.(idx_phase13==-1)) &
|
|
& idx_phase13 = psb_get_timer_idx("ADJ_FND_OWN: phase13")
|
|
|
|
|
|
call psb_info(ctxt, me, np)
|
|
|
|
if (np == -1) then
|
|
info = psb_err_context_error_
|
|
call psb_errpush(info,name)
|
|
goto 9999
|
|
endif
|
|
|
|
if (.not.(idxmap%is_valid())) then
|
|
call psb_errpush(psb_err_from_subroutine_,name,a_err='invalid idxmap')
|
|
goto 9999
|
|
end if
|
|
|
|
if (gettime) then
|
|
t0 = psb_wtime()
|
|
end if
|
|
|
|
nadj = size(adj)
|
|
nidx = size(idx)
|
|
call psb_realloc(nidx,iprc,info)
|
|
if (info /= psb_success_) then
|
|
call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc')
|
|
goto 9999
|
|
end if
|
|
iprc = -1
|
|
xchg_alg = psi_get_adj_alg()
|
|
!if (me == 0) write(0,*) me,'adj_fnd_owner alg: ',xchg_alg,' Going through ',nidx,nadj
|
|
select case(xchg_alg)
|
|
case(psi_adj_fnd_a2av_)
|
|
if (do_timings) call psb_tic(idx_phase1)
|
|
|
|
!
|
|
! First simple minded version with auxiliary arrays
|
|
! dimensioned on NP.
|
|
! Do the exchange with an alltoallv
|
|
!
|
|
!
|
|
Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1), &
|
|
& sdidx(0:np),rvidx(0:np),stat=info)
|
|
!
|
|
! Same send buffer for everybody
|
|
!
|
|
sdidx(:) = 0
|
|
!
|
|
! First, send sizes according to adjcncy list
|
|
!
|
|
if (do_timings) call psb_tic(idx_phase11)
|
|
sdsz = 0
|
|
do j=1, nadj
|
|
sdsz(adj(j)) = nidx
|
|
end do
|
|
!write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:)
|
|
|
|
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
|
|
& rvsz,1,psb_mpi_mpk_,icomm,minfo)
|
|
if (do_timings) call psb_toc(idx_phase11)
|
|
if (do_timings) call psb_tic(idx_phase12)
|
|
rvidx(0) = 0
|
|
do i=0, np-1
|
|
rvidx(i+1) = rvidx(i) + rvsz(i)
|
|
end do
|
|
hsize = rvidx(np)
|
|
|
|
! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:)
|
|
!
|
|
! Second, allocate buffers and exchange data
|
|
!
|
|
if (do_timings) call psb_toc(idx_phase12)
|
|
if (do_timings) call psb_tic(idx_phase13)
|
|
Allocate(rmtidx(hsize),lclidx(max(hsize,nidx*nadj)),&
|
|
& tproc(max(hsize,nidx)),stat=info)
|
|
|
|
if (info /= psb_success_) then
|
|
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
|
|
goto 9999
|
|
end if
|
|
|
|
call mpi_alltoallv(idx,sdsz,sdidx,psb_mpi_lpk_,&
|
|
& rmtidx,rvsz,rvidx,psb_mpi_lpk_,icomm,iret)
|
|
if (do_timings) call psb_toc(idx_phase13)
|
|
if (do_timings) call psb_toc(idx_phase1)
|
|
if (do_timings) call psb_tic(idx_phase2)
|
|
!
|
|
! Third, compute local answers
|
|
!
|
|
call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.)
|
|
do i=1, hsize
|
|
tproc(i) = -1
|
|
if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me
|
|
end do
|
|
if (do_timings) call psb_toc(idx_phase2)
|
|
if (do_timings) call psb_tic(idx_phase3)
|
|
|
|
!
|
|
! Fourth, exchange the answers
|
|
!
|
|
! Adjust sdidx for reuse in receiving lclidx array
|
|
do i=0,np-1
|
|
sdidx(i+1) = sdidx(i) + sdsz(i)
|
|
end do
|
|
call mpi_alltoallv(tproc,rvsz,rvidx,psb_mpi_ipk_,&
|
|
& lclidx,sdsz,sdidx,psb_mpi_ipk_,icomm,iret)
|
|
|
|
!
|
|
! Because IPRC has been initialized to -1, the MAX operation selects
|
|
! the answers.
|
|
!
|
|
do i=0, np-1
|
|
if (sdsz(i)>0) then
|
|
! Must be nidx == sdsz(i)
|
|
iprc(1:nidx) = max(iprc(1:nidx), lclidx(sdidx(i)+1:sdidx(i)+sdsz(i)))
|
|
end if
|
|
end do
|
|
if (do_timings) call psb_toc(idx_phase3)
|
|
|
|
if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx)
|
|
|
|
case(psi_adj_fnd_irecv_)
|
|
|
|
if (do_timings) call psb_tic(idx_phase1)
|
|
!
|
|
! First simple minded version with auxiliary arrays
|
|
! dimensioned on NP.
|
|
! Could it be improved with a loop based on the maximum length
|
|
! of adj(:) ???
|
|
!
|
|
Allocate(hidx(0:np),hsz(np),sdsz(0:np-1),rvsz(0:np-1),&
|
|
& sdhd(0:np-1), rvhd(0:np-1), p2pstat(mpi_status_size,0:np-1),&
|
|
& stat=info)
|
|
if (do_timings) call psb_tic(idx_phase11)
|
|
sdhd(:) = mpi_request_null
|
|
rvhd(:) = mpi_request_null
|
|
!
|
|
! First, send sizes according to adjcncy list
|
|
!
|
|
sdsz = 0
|
|
do j=1, nadj
|
|
sdsz(adj(j)) = nidx
|
|
end do
|
|
!write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:)
|
|
|
|
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
|
|
& rvsz,1,psb_mpi_mpk_,icomm,minfo)
|
|
hidx(0) = 0
|
|
do i=0, np-1
|
|
hidx(i+1) = hidx(i) + rvsz(i)
|
|
end do
|
|
hsize = hidx(np)
|
|
! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:)
|
|
!
|
|
! Second, allocate buffers and exchange data
|
|
!
|
|
Allocate(rmtidx(hsize),lclidx(max(hsize,nidx*nadj)),tproc(max(hsize,nidx)),stat=info)
|
|
|
|
if (info /= psb_success_) then
|
|
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
|
|
goto 9999
|
|
end if
|
|
do i = 0, np-1
|
|
if (rvsz(i)>0) then
|
|
! write(0,*) me, ' First receive from ',i,rvsz(i)
|
|
prc = psb_get_mpi_rank(ctxt,i)
|
|
p2ptag = psb_long_swap_tag
|
|
!write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc
|
|
call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),&
|
|
& psb_mpi_lpk_,prc,&
|
|
& p2ptag, icomm,rvhd(i),iret)
|
|
end if
|
|
end do
|
|
if (do_timings) call psb_toc(idx_phase11)
|
|
if (do_timings) call psb_tic(idx_phase12)
|
|
do j=1, nadj
|
|
if (nidx > 0) then
|
|
prc = psb_get_mpi_rank(ctxt,adj(j))
|
|
p2ptag = psb_long_swap_tag
|
|
!write(0,*) me, ' First send to ',adj(j),nidx, prc
|
|
call mpi_send(idx,nidx,&
|
|
& psb_mpi_lpk_,prc,&
|
|
& p2ptag, icomm,iret)
|
|
end if
|
|
end do
|
|
if (do_timings) call psb_toc(idx_phase12)
|
|
if (do_timings) call psb_tic(idx_phase13)
|
|
call mpi_waitall(np,rvhd,p2pstat,iret)
|
|
if (do_timings) call psb_toc(idx_phase13)
|
|
if (do_timings) call psb_toc(idx_phase1)
|
|
if (do_timings) call psb_tic(idx_phase2)
|
|
|
|
!
|
|
! Third, compute local answers
|
|
!
|
|
call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.)
|
|
do i=1, hsize
|
|
tproc(i) = -1
|
|
if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me
|
|
end do
|
|
if (do_timings) call psb_toc(idx_phase2)
|
|
if (do_timings) call psb_tic(idx_phase3)
|
|
!
|
|
! At this point we can reuse lclidx to receive messages
|
|
!
|
|
rvhd(:) = mpi_request_null
|
|
do j=1, nadj
|
|
!write(0,*) me, ' First send to ',adj(j),nidx
|
|
if (nidx > 0) then
|
|
prc = psb_get_mpi_rank(ctxt,adj(j))
|
|
p2ptag = psb_int_swap_tag
|
|
!write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc
|
|
call mpi_irecv(lclidx((j-1)*nidx+1),nidx, &
|
|
& psb_mpi_ipk_,prc,&
|
|
& p2ptag, icomm,rvhd(j),iret)
|
|
end if
|
|
end do
|
|
|
|
!
|
|
! Fourth, send data back;
|
|
!
|
|
do i = 0, np-1
|
|
if (rvsz(i)>0) then
|
|
prc = psb_get_mpi_rank(ctxt,i)
|
|
p2ptag = psb_int_swap_tag
|
|
!write(0,*) me, ' Second send to ',i,rvsz(i), prc
|
|
call mpi_send(tproc(hidx(i)+1),rvsz(i),&
|
|
& psb_mpi_ipk_,prc,&
|
|
& p2ptag, icomm,iret)
|
|
end if
|
|
end do
|
|
!
|
|
! Fifth: receive and combine. MAX works because default
|
|
! answer is -1.
|
|
!
|
|
call mpi_waitall(np,rvhd,p2pstat,iret)
|
|
do j = 1, nadj
|
|
iprc(1:nidx) = max(iprc(1:nidx), lclidx((j-1)*nidx+1:(j-1)*nidx+nidx))
|
|
end do
|
|
if (do_timings) call psb_toc(idx_phase3)
|
|
if (debug) write(0,*) me,' End of adjcncy_fnd ',iprc(1:nidx)
|
|
|
|
case(psi_adj_fnd_pbrcv_)
|
|
|
|
Allocate(hidx(0:np),hsz(np),&
|
|
& sdsz(0:np-1),rvsz(0:np-1),stat=info)
|
|
!
|
|
! First, send sizes according to adjcncy list
|
|
!
|
|
sdsz = 0
|
|
do j=1, nadj
|
|
sdsz(adj(j)) = nidx
|
|
end do
|
|
!write(0,*)me,' Check on sizes into a2a:',adj(:),nadj,':',sdsz(:)
|
|
|
|
call mpi_alltoall(sdsz,1,psb_mpi_mpk_,&
|
|
& rvsz,1,psb_mpi_mpk_,icomm,minfo)
|
|
hidx(0) = 0
|
|
do i=0, np-1
|
|
hidx(i+1) = hidx(i) + rvsz(i)
|
|
end do
|
|
hsize = hidx(np)
|
|
! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:)
|
|
!
|
|
! Second, allocate buffers and exchange data
|
|
!
|
|
Allocate(rmtidx(hsize),lclidx(hsize),tproc(max(hsize,nidx)),stat=info)
|
|
|
|
if (info /= psb_success_) then
|
|
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
|
|
goto 9999
|
|
end if
|
|
do j=1, nadj
|
|
!write(0,*) me, ' First send to ',adj(j),nidx
|
|
if (nidx > 0) call psb_snd(ctxt,idx(1:nidx),adj(j))
|
|
end do
|
|
do i = 0, np-1
|
|
if (rvsz(i)>0) then
|
|
! write(0,*) me, ' First receive from ',i,rvsz(i)
|
|
call psb_rcv(ctxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i)
|
|
end if
|
|
end do
|
|
|
|
!
|
|
! Third, compute local answers
|
|
!
|
|
call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.)
|
|
do i=1, hsize
|
|
tproc(i) = -1
|
|
if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me
|
|
end do
|
|
|
|
!
|
|
! Fourth, send data back;
|
|
!
|
|
do i = 0, np-1
|
|
if (rvsz(i)>0) then
|
|
!write(0,*) me, ' Second send to ',i,rvsz(i)
|
|
call psb_snd(ctxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i)
|
|
end if
|
|
end do
|
|
!
|
|
! Fifth: receive and combine. MAX works because default
|
|
! answer is -1. Reuse tproc
|
|
!
|
|
do j = 1, nadj
|
|
!write(0,*) me, ' Second receive from ',adj(j), nidx
|
|
if (nidx > 0) call psb_rcv(ctxt,tproc(1:nidx),adj(j))
|
|
iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx))
|
|
end do
|
|
case default
|
|
info = psb_err_internal_error_
|
|
call psb_errpush(info,name,a_err='invalid exchange alg choice')
|
|
goto 9999
|
|
end select
|
|
|
|
call psb_erractionrestore(err_act)
|
|
return
|
|
|
|
9999 call psb_error_handler(ctxt,err_act)
|
|
|
|
return
|
|
|
|
end subroutine psi_adjcncy_fnd_owner
|