[FIX] Fixed compilation for psi_dswapdata routine
parent
4ee8b847e0
commit
e7e8a69373
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,422 @@
|
||||
!
|
||||
! Parallel Sparse BLAS version 3.5
|
||||
! (C) Copyright 2006-2018
|
||||
! Salvatore Filippone
|
||||
! Alfredo Buttari
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
!
|
||||
! Module: psb_neighbor_topology_mod
|
||||
! Provides a type to hold pre-built MPI neighborhood topology
|
||||
! information for persistent/repeated halo exchanges via
|
||||
! MPI_Neighbor_alltoallv (MPI >= 3.0).
|
||||
!
|
||||
! The topology is stored inside the vector type (psb_d_base_vect_type)
|
||||
! and lazily created on the first psi_swapdata call with the
|
||||
! neighbor-alltoallv communication mode. Once built it is reused
|
||||
! for every subsequent halo exchange, avoiding the per-call overhead
|
||||
! of re-scanning the index list and allocating temporary arrays.
|
||||
!
|
||||
! The graph communicator and per-neighbor counts/displacements
|
||||
! are built once and reused.
|
||||
!
|
||||
! The gather/scatter index arrays (send_indexes, recv_indexes) record
|
||||
! which local vector positions must be packed / unpacked.
|
||||
!
|
||||
module psb_neighbor_topology_mod
|
||||
use psb_const_mod
|
||||
use psb_desc_const_mod
|
||||
use psb_error_mod
|
||||
!
|
||||
! Only import mpi_comm_null here (needed for type default initializer).
|
||||
! Full MPI access is done inside each contained subroutine so that
|
||||
! MPI symbols do NOT leak into modules that use psb_neighbor_topology_mod.
|
||||
!
|
||||
#ifdef PSB_MPI_MOD
|
||||
use mpi, only: mpi_comm_null
|
||||
#endif
|
||||
implicit none
|
||||
#ifdef PSB_MPI_H
|
||||
include 'mpif.h'
|
||||
#endif
|
||||
|
||||
type :: psb_neighbor_topology_type
|
||||
!
|
||||
! MPI dist-graph communicator (only communicating neighbors).
|
||||
!
|
||||
integer(psb_mpk_) :: graph_comm = mpi_comm_null
|
||||
!
|
||||
! Number of neighbors (processes I exchange with, excluding self).
|
||||
!
|
||||
integer(psb_ipk_) :: num_neighbors = 0
|
||||
!
|
||||
! Per-neighbor send/recv counts and displacements (units of
|
||||
! single elements; for n-column multivectors multiply by n).
|
||||
! send_counts(i) = number of elements sent to i-th neighbor
|
||||
! recv_counts(i) = number of elements received from i-th neighbor
|
||||
! send_displs(i) = displacement into contiguous send buffer
|
||||
! recv_displs(i) = displacement into contiguous recv buffer
|
||||
!
|
||||
integer(psb_mpk_), allocatable :: send_counts(:), recv_counts(:)
|
||||
integer(psb_mpk_), allocatable :: send_displs(:), recv_displs(:)
|
||||
!
|
||||
! Gather indexes: the k-th element of the send buffer is
|
||||
! y%v( send_indexes(k) )
|
||||
! Scatter indexes: the k-th element of the recv buffer goes to
|
||||
! y%v( recv_indexes(k) )
|
||||
!
|
||||
integer(psb_ipk_), allocatable :: send_indexes(:)
|
||||
integer(psb_ipk_), allocatable :: recv_indexes(:)
|
||||
!
|
||||
! Total number of elements to send / receive (per single column),
|
||||
! excluding self-exchange.
|
||||
!
|
||||
integer(psb_ipk_) :: total_send = 0
|
||||
integer(psb_ipk_) :: total_recv = 0
|
||||
!
|
||||
|
||||
! Initialization flag.
|
||||
!
|
||||
logical :: is_initialized = .false.
|
||||
contains
|
||||
procedure, pass(topology) :: init => neighbor_topology_init
|
||||
procedure, pass(topology) :: free => neighbor_topology_free
|
||||
procedure, pass(topology) :: sizeof => neighbor_topology_sizeof
|
||||
end type psb_neighbor_topology_type
|
||||
|
||||
contains
|
||||
|
||||
! ---------------------------------------------------------------
|
||||
! neighbor_topology_init
|
||||
!
|
||||
! Parse the halo index list (obtained via desc_a%get_list_p)
|
||||
! and build:
|
||||
! - MPI dist-graph communicator with only the true neighbors
|
||||
! - per-neighbor send/recv counts and displacements
|
||||
! - contiguous gather/scatter index arrays
|
||||
!
|
||||
! The topology is stored inside the vector and lazily built
|
||||
! on the first psi_swapdata call that uses the neighbor-alltoallv
|
||||
! communication mode.
|
||||
!
|
||||
! Arguments:
|
||||
! topology - the persistent state (output, intent inout)
|
||||
! halo_index - halo_index array (from get_list_p, intent in)
|
||||
! num_neighbors - number of exchanges (from get_list_p)
|
||||
! total_send_elems - total send count (from get_list_p)
|
||||
! total_recv_elems - total recv count (from get_list_p)
|
||||
! ctxt - PSBLAS context
|
||||
! icomm - MPI communicator
|
||||
! info - error code (output)
|
||||
! ---------------------------------------------------------------
|
||||
subroutine neighbor_topology_init(topology, halo_index, num_neighbors, &
|
||||
& total_send_elems, total_recv_elems, ctxt, icomm, info)
|
||||
#ifdef PSB_MPI_MOD
|
||||
use mpi
|
||||
#endif
|
||||
implicit none
|
||||
#ifdef PSB_MPI_H
|
||||
include 'mpif.h'
|
||||
#endif
|
||||
|
||||
class(psb_neighbor_topology_type), intent(inout) :: topology
|
||||
integer(psb_ipk_), intent(in) :: halo_index(:)
|
||||
integer(psb_ipk_), intent(in) :: num_neighbors, total_send_elems, total_recv_elems
|
||||
type(psb_ctxt_type), intent(in) :: ctxt
|
||||
integer(psb_mpk_), intent(in) :: icomm
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
! locals
|
||||
integer(psb_mpk_) :: iret
|
||||
integer(psb_ipk_) :: i, k, idx_ptr, num_elem_recv, num_elem_send, partner_proc
|
||||
integer(psb_ipk_) :: neighbor_count, send_offset, recv_offset
|
||||
integer(psb_mpk_), allocatable :: source_ranks(:), dest_ranks(:)
|
||||
integer(psb_mpk_), allocatable :: source_weights(:), dest_weights(:)
|
||||
integer(psb_mpk_) :: in_degree, out_degree
|
||||
character(len=40) :: name
|
||||
integer(psb_ipk_) :: proc_id
|
||||
integer(psb_ipk_) :: position
|
||||
integer(psb_ipk_) :: err_act
|
||||
|
||||
info = psb_success_
|
||||
name = 'neighbor_topology_init'
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
! Clean up any previous state
|
||||
call topology%free(info)
|
||||
|
||||
! ----------------------------------------------------------
|
||||
! First pass: count neighbors (excluding self) and totals
|
||||
! ----------------------------------------------------------
|
||||
topology%num_neighbors = 0
|
||||
topology%total_send = 0
|
||||
topology%total_recv = 0
|
||||
|
||||
if(size(halo_index) < 1) then
|
||||
call psb_errpush(psb_err_topology_invalid_args_,name)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(source_ranks(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Source ranks allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(dest_ranks(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Destination ranks allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(source_weights(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Source weights allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(dest_weights(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Destination weights allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(topology%send_counts(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Send counts allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(topology%recv_counts(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Receive counts allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(topology%send_displs(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Send displacements allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(topology%recv_displs(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Receive displacements allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
|
||||
! -----------------------------------------------------------
|
||||
! Allocate the gather/scatter index arrays
|
||||
! -----------------------------------------------------------
|
||||
allocate(topology%send_indexes(total_send_elems), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Send indexes allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(topology%recv_indexes(total_recv_elems), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Recv indexes allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
! -----------------------------------------------------------
|
||||
! Fill neighbor ranks, weights, counts, displacements,
|
||||
! and gather/scatter index arrays.
|
||||
!
|
||||
! The halo_index layout per neighbor (starting at position):
|
||||
! position + 0 : process id
|
||||
! position + 1 : nerv (num recv elements)
|
||||
! position + 2 .. +1+nerv : recv element indexes
|
||||
! position + 2+nerv : nesd (num send elements)
|
||||
! position + 3+nerv .. +2+nerv+nesd : send element indexes
|
||||
! Total stride per neighbor: nerv + nesd + 3
|
||||
! -----------------------------------------------------------
|
||||
send_offset = 0
|
||||
recv_offset = 0
|
||||
position = 1
|
||||
|
||||
do i = 1, num_neighbors
|
||||
proc_id = halo_index(position)
|
||||
num_elem_recv = halo_index(position + 1)
|
||||
num_elem_send = halo_index(position + num_elem_recv + 2)
|
||||
|
||||
! Fill source/destination ranks and weights (weights are all 1 for now)
|
||||
source_ranks(i) = int(proc_id, psb_mpk_)
|
||||
dest_ranks(i) = int(proc_id, psb_mpk_)
|
||||
source_weights(i) = 1
|
||||
dest_weights(i) = 1
|
||||
|
||||
! Counts and displacements (displs set BEFORE accumulating offset)
|
||||
topology%send_counts(i) = int(num_elem_send, psb_mpk_)
|
||||
topology%recv_counts(i) = int(num_elem_recv, psb_mpk_)
|
||||
topology%send_displs(i) = int(send_offset, psb_mpk_)
|
||||
topology%recv_displs(i) = int(recv_offset, psb_mpk_)
|
||||
|
||||
! Fill recv_indexes from halo_index(position+2 .. position+1+nerv)
|
||||
do k = 1, num_elem_recv
|
||||
topology%recv_indexes(recv_offset + k) = halo_index(position + psb_elem_recv_ + k - 1)
|
||||
end do
|
||||
|
||||
! Fill send_indexes from halo_index(position+3+nerv .. position+2+nerv+nesd)
|
||||
do k = 1, num_elem_send
|
||||
topology%send_indexes(send_offset + k) = halo_index(position + num_elem_recv + psb_elem_send_ + k - 1)
|
||||
end do
|
||||
|
||||
send_offset = send_offset + num_elem_send
|
||||
recv_offset = recv_offset + num_elem_recv
|
||||
|
||||
topology%num_neighbors = topology%num_neighbors + 1
|
||||
topology%total_send = topology%total_send + num_elem_send
|
||||
topology%total_recv = topology%total_recv + num_elem_recv
|
||||
|
||||
position = position + num_elem_recv + num_elem_send + 3
|
||||
end do
|
||||
|
||||
! ----------------------------------------------------------
|
||||
! Sanity check: the totals computed from the neighbor list
|
||||
! should match the totals returned by get_list_p.
|
||||
! ----------------------------------------------------------
|
||||
if (topology%total_send /= total_send_elems) then
|
||||
info = psb_err_topology_args_mismatch_
|
||||
call psb_errpush(info, name, a_err='Send elements mismatch')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (topology%total_recv /= total_recv_elems) then
|
||||
info = psb_err_topology_args_mismatch_
|
||||
call psb_errpush(info, name, a_err='Receive elements mismatch')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if(topology%num_neighbors /= num_neighbors) then
|
||||
info = psb_err_topology_args_mismatch_
|
||||
call psb_errpush(info, name, a_err='Number of neighbors mismatch')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
|
||||
! ----------------------------------------------------------
|
||||
! Build the dist-graph communicator
|
||||
! ----------------------------------------------------------
|
||||
in_degree = topology%num_neighbors !! Just for clarity
|
||||
out_degree = topology%num_neighbors !! Just for clarity
|
||||
|
||||
call mpi_dist_graph_create_adjacent(icomm, &
|
||||
& in_degree, source_ranks, source_weights, &
|
||||
& out_degree, dest_ranks, dest_weights, &
|
||||
& mpi_info_null, .false., & ! Check this line for optimizations
|
||||
& topology%graph_comm, info)
|
||||
if (info /= mpi_success) then
|
||||
info = psb_err_topology_error_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
topology%is_initialized = .true.
|
||||
|
||||
! TODO: Is it safe to deallocate these temporary arrays here, or do we need them for the gather/scatter indexes?
|
||||
! deallocate(source_ranks, dest_ranks, source_weights, dest_weights)
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(ctxt,err_act)
|
||||
|
||||
return
|
||||
end subroutine neighbor_topology_init
|
||||
|
||||
|
||||
! ---------------------------------------------------------------
|
||||
! neighbor_topology_free
|
||||
! Release all resources held by the persistent state.
|
||||
! ---------------------------------------------------------------
|
||||
subroutine neighbor_topology_free(topology, info)
|
||||
#ifdef PSB_MPI_MOD
|
||||
use mpi
|
||||
#endif
|
||||
implicit none
|
||||
#ifdef PSB_MPI_H
|
||||
include 'mpif.h'
|
||||
#endif
|
||||
class(psb_neighbor_topology_type), intent(inout) :: topology
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_mpk_) :: iret
|
||||
|
||||
info = psb_success_
|
||||
|
||||
if (topology%graph_comm /= mpi_comm_null) then
|
||||
call mpi_comm_free(topology%graph_comm, iret)
|
||||
topology%graph_comm = mpi_comm_null
|
||||
end if
|
||||
|
||||
if (allocated(topology%send_counts)) deallocate(topology%send_counts)
|
||||
if (allocated(topology%recv_counts)) deallocate(topology%recv_counts)
|
||||
if (allocated(topology%send_displs)) deallocate(topology%send_displs)
|
||||
if (allocated(topology%recv_displs)) deallocate(topology%recv_displs)
|
||||
if (allocated(topology%send_indexes)) deallocate(topology%send_indexes)
|
||||
if (allocated(topology%recv_indexes)) deallocate(topology%recv_indexes)
|
||||
|
||||
topology%num_neighbors = 0
|
||||
topology%total_send = 0
|
||||
topology%total_recv = 0
|
||||
topology%is_initialized = .false.
|
||||
|
||||
end subroutine neighbor_topology_free
|
||||
|
||||
|
||||
! ---------------------------------------------------------------
|
||||
! neighbor_topology_sizeof
|
||||
! Return approximate memory footprint in bytes.
|
||||
! ---------------------------------------------------------------
|
||||
function neighbor_topology_sizeof(topology) result(val)
|
||||
implicit none
|
||||
class(psb_neighbor_topology_type), intent(in) :: topology
|
||||
integer(psb_epk_) :: val
|
||||
|
||||
val = 0
|
||||
val = val + psb_sizeof_ip * 6 ! scalar integers + logicals
|
||||
if (allocated(topology%send_counts)) val = val + psb_sizeof_ip * size(topology%send_counts)
|
||||
if (allocated(topology%recv_counts)) val = val + psb_sizeof_ip * size(topology%recv_counts)
|
||||
if (allocated(topology%send_displs)) val = val + psb_sizeof_ip * size(topology%send_displs)
|
||||
if (allocated(topology%recv_displs)) val = val + psb_sizeof_ip * size(topology%recv_displs)
|
||||
if (allocated(topology%send_indexes)) val = val + psb_sizeof_ip * size(topology%send_indexes)
|
||||
if (allocated(topology%recv_indexes)) val = val + psb_sizeof_ip * size(topology%recv_indexes)
|
||||
|
||||
|
||||
end function neighbor_topology_sizeof
|
||||
|
||||
end module psb_neighbor_topology_mod
|
||||
Loading…
Reference in New Issue