You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psblas3/base/modules/comm/psb_neighbor_topology_mod.F90

423 lines
17 KiB
Fortran

!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! Module: psb_neighbor_topology_mod
! Provides a type to hold pre-built MPI neighborhood topology
! information for persistent/repeated halo exchanges via
! MPI_Neighbor_alltoallv (MPI >= 3.0).
!
! The topology is stored inside the vector type (psb_d_base_vect_type)
! and lazily created on the first psi_swapdata call with the
! neighbor-alltoallv communication mode. Once built it is reused
! for every subsequent halo exchange, avoiding the per-call overhead
! of re-scanning the index list and allocating temporary arrays.
!
! The graph communicator and per-neighbor counts/displacements
! are built once and reused.
!
! The gather/scatter index arrays (send_indexes, recv_indexes) record
! which local vector positions must be packed / unpacked.
!
module psb_neighbor_topology_mod
use psb_const_mod
use psb_desc_const_mod
use psb_error_mod
!
! Only import mpi_comm_null here (needed for type default initializer).
! Full MPI access is done inside each contained subroutine so that
! MPI symbols do NOT leak into modules that use psb_neighbor_topology_mod.
!
#ifdef PSB_MPI_MOD
use mpi, only: mpi_comm_null
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type :: psb_neighbor_topology_type
!
! MPI dist-graph communicator (only communicating neighbors).
!
integer(psb_mpk_) :: graph_comm = mpi_comm_null
!
! Number of neighbors (processes I exchange with, excluding self).
!
integer(psb_ipk_) :: num_neighbors = 0
!
! Per-neighbor send/recv counts and displacements (units of
! single elements; for n-column multivectors multiply by n).
! send_counts(i) = number of elements sent to i-th neighbor
! recv_counts(i) = number of elements received from i-th neighbor
! send_displs(i) = displacement into contiguous send buffer
! recv_displs(i) = displacement into contiguous recv buffer
!
integer(psb_mpk_), allocatable :: send_counts(:), recv_counts(:)
integer(psb_mpk_), allocatable :: send_displs(:), recv_displs(:)
!
! Gather indexes: the k-th element of the send buffer is
! y%v( send_indexes(k) )
! Scatter indexes: the k-th element of the recv buffer goes to
! y%v( recv_indexes(k) )
!
integer(psb_ipk_), allocatable :: send_indexes(:)
integer(psb_ipk_), allocatable :: recv_indexes(:)
!
! Total number of elements to send / receive (per single column),
! excluding self-exchange.
!
integer(psb_ipk_) :: total_send = 0
integer(psb_ipk_) :: total_recv = 0
!
! Initialization flag.
!
logical :: is_initialized = .false.
contains
procedure, pass(topology) :: init => neighbor_topology_init
procedure, pass(topology) :: free => neighbor_topology_free
procedure, pass(topology) :: sizeof => neighbor_topology_sizeof
end type psb_neighbor_topology_type
contains
! ---------------------------------------------------------------
! neighbor_topology_init
!
! Parse the halo index list (obtained via desc_a%get_list_p)
! and build:
! - MPI dist-graph communicator with only the true neighbors
! - per-neighbor send/recv counts and displacements
! - contiguous gather/scatter index arrays
!
! The topology is stored inside the vector and lazily built
! on the first psi_swapdata call that uses the neighbor-alltoallv
! communication mode.
!
! Arguments:
! topology - the persistent state (output, intent inout)
! halo_index - halo_index array (from get_list_p, intent in)
! num_neighbors - number of exchanges (from get_list_p)
! total_send_elems - total send count (from get_list_p)
! total_recv_elems - total recv count (from get_list_p)
! ctxt - PSBLAS context
! icomm - MPI communicator
! info - error code (output)
! ---------------------------------------------------------------
subroutine neighbor_topology_init(topology, halo_index, num_neighbors, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
class(psb_neighbor_topology_type), intent(inout) :: topology
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_neighbors, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: iret
integer(psb_ipk_) :: i, k, idx_ptr, num_elem_recv, num_elem_send, partner_proc
integer(psb_ipk_) :: nbr_count, send_offset, recv_offset
integer(psb_mpk_), allocatable :: source_ranks(:), dest_ranks(:)
integer(psb_mpk_), allocatable :: source_weights(:), dest_weights(:)
integer(psb_mpk_) :: in_degree, out_degree
character(len=40) :: name
integer(psb_ipk_) :: proc_id
integer(psb_ipk_) :: position
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'neighbor_topology_init'
call psb_erractionsave(err_act)
! Clean up any previous state
call topology%free(info)
! ----------------------------------------------------------
! First pass: count neighbors (excluding self) and totals
! ----------------------------------------------------------
topology%num_neighbors = 0
topology%total_send = 0
topology%total_recv = 0
if(size(halo_index) < 1) then
call psb_errpush(psb_err_topology_invalid_args_,name)
goto 9999
end if
allocate(source_ranks(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Source ranks allocation failed')
goto 9999
end if
allocate(dest_ranks(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Destination ranks allocation failed')
goto 9999
end if
allocate(source_weights(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Source weights allocation failed')
goto 9999
end if
allocate(dest_weights(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Destination weights allocation failed')
goto 9999
end if
allocate(topology%send_counts(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Send counts allocation failed')
goto 9999
end if
allocate(topology%recv_counts(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Receive counts allocation failed')
goto 9999
end if
allocate(topology%send_displs(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Send displacements allocation failed')
goto 9999
end if
allocate(topology%recv_displs(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Receive displacements allocation failed')
goto 9999
end if
! -----------------------------------------------------------
! Allocate the gather/scatter index arrays
! -----------------------------------------------------------
allocate(topology%send_indexes(total_send_elems), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Send indexes allocation failed')
goto 9999
end if
allocate(topology%recv_indexes(total_recv_elems), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Recv indexes allocation failed')
goto 9999
end if
! -----------------------------------------------------------
! Fill neighbor ranks, weights, counts, displacements,
! and gather/scatter index arrays.
!
! The halo_index layout per neighbor (starting at position):
! position + 0 : process id
! position + 1 : nerv (num recv elements)
! position + 2 .. +1+nerv : recv element indexes
! position + 2+nerv : nesd (num send elements)
! position + 3+nerv .. +2+nerv+nesd : send element indexes
! Total stride per neighbor: nerv + nesd + 3
! -----------------------------------------------------------
send_offset = 0
recv_offset = 0
position = 1
do i = 1, num_neighbors
proc_id = halo_index(position)
num_elem_recv = halo_index(position + 1)
num_elem_send = halo_index(position + num_elem_recv + 2)
! Fill source/destination ranks and weights (weights are all 1 for now)
source_ranks(i) = int(proc_id, psb_mpk_)
dest_ranks(i) = int(proc_id, psb_mpk_)
source_weights(i) = 1
dest_weights(i) = 1
! Counts and displacements (displs set BEFORE accumulating offset)
topology%send_counts(i) = int(num_elem_send, psb_mpk_)
topology%recv_counts(i) = int(num_elem_recv, psb_mpk_)
topology%send_displs(i) = int(send_offset, psb_mpk_)
topology%recv_displs(i) = int(recv_offset, psb_mpk_)
! Fill recv_indexes from halo_index(position+2 .. position+1+nerv)
do k = 1, num_elem_recv
topology%recv_indexes(recv_offset + k) = halo_index(position + psb_elem_recv_ + k - 1)
end do
! Fill send_indexes from halo_index(position+3+nerv .. position+2+nerv+nesd)
do k = 1, num_elem_send
topology%send_indexes(send_offset + k) = halo_index(position + num_elem_recv + psb_elem_send_ + k - 1)
end do
send_offset = send_offset + num_elem_send
recv_offset = recv_offset + num_elem_recv
topology%num_neighbors = topology%num_neighbors + 1
topology%total_send = topology%total_send + num_elem_send
topology%total_recv = topology%total_recv + num_elem_recv
position = position + num_elem_recv + num_elem_send + 3
end do
! ----------------------------------------------------------
! Sanity check: the totals computed from the neighbor list
! should match the totals returned by get_list_p.
! ----------------------------------------------------------
if (topology%total_send /= total_send_elems) then
info = psb_err_topology_args_mismatch_
call psb_errpush(info, name, a_err='Send elements mismatch')
goto 9999
end if
if (topology%total_recv /= total_recv_elems) then
info = psb_err_topology_args_mismatch_
call psb_errpush(info, name, a_err='Receive elements mismatch')
goto 9999
end if
if(topology%num_neighbors /= num_neighbors) then
info = psb_err_topology_args_mismatch_
call psb_errpush(info, name, a_err='Number of neighbors mismatch')
goto 9999
end if
! ----------------------------------------------------------
! Build the dist-graph communicator
! ----------------------------------------------------------
in_degree = topology%num_neighbors !! Just for clarity
out_degree = topology%num_neighbors !! Just for clarity
call mpi_dist_graph_create_adjacent(icomm, &
& in_degree, source_ranks, source_weights, &
& out_degree, dest_ranks, dest_weights, &
& mpi_info_null, .false., & ! Check this line for optimizations
& topology%graph_comm, info)
if (info /= mpi_success) then
info = psb_err_topology_error_
call psb_errpush(info, name)
goto 9999
end if
topology%is_initialized = .true.
! TODO: Is it safe to deallocate these temporary arrays here, or do we need them for the gather/scatter indexes?
! deallocate(source_ranks, dest_ranks, source_weights, dest_weights)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine neighbor_topology_init
! ---------------------------------------------------------------
! neighbor_topology_free
! Release all resources held by the persistent state.
! ---------------------------------------------------------------
subroutine neighbor_topology_free(topology, info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
class(psb_neighbor_topology_type), intent(inout) :: topology
integer(psb_ipk_), intent(out) :: info
integer(psb_mpk_) :: iret
info = psb_success_
if (topology%graph_comm /= mpi_comm_null) then
call mpi_comm_free(topology%graph_comm, iret)
topology%graph_comm = mpi_comm_null
end if
if (allocated(topology%send_counts)) deallocate(topology%send_counts)
if (allocated(topology%recv_counts)) deallocate(topology%recv_counts)
if (allocated(topology%send_displs)) deallocate(topology%send_displs)
if (allocated(topology%recv_displs)) deallocate(topology%recv_displs)
if (allocated(topology%send_indexes)) deallocate(topology%send_indexes)
if (allocated(topology%recv_indexes)) deallocate(topology%recv_indexes)
topology%num_neighbors = 0
topology%total_send = 0
topology%total_recv = 0
topology%is_initialized = .false.
end subroutine neighbor_topology_free
! ---------------------------------------------------------------
! neighbor_topology_sizeof
! Return approximate memory footprint in bytes.
! ---------------------------------------------------------------
function neighbor_topology_sizeof(topology) result(val)
implicit none
class(psb_neighbor_topology_type), intent(in) :: topology
integer(psb_epk_) :: val
val = 0
val = val + psb_sizeof_ip * 6 ! scalar integers + logicals
if (allocated(topology%send_counts)) val = val + psb_sizeof_ip * size(topology%send_counts)
if (allocated(topology%recv_counts)) val = val + psb_sizeof_ip * size(topology%recv_counts)
if (allocated(topology%send_displs)) val = val + psb_sizeof_ip * size(topology%send_displs)
if (allocated(topology%recv_displs)) val = val + psb_sizeof_ip * size(topology%recv_displs)
if (allocated(topology%send_indexes)) val = val + psb_sizeof_ip * size(topology%send_indexes)
if (allocated(topology%recv_indexes)) val = val + psb_sizeof_ip * size(topology%recv_indexes)
end function neighbor_topology_sizeof
end module psb_neighbor_topology_mod