[ADD] Added first version of MPI_Ineighbor_alltoallv call. For now it's just for double precision routines

communication
Stack-1 2 months ago
parent 3d92668973
commit a3ebde071d

@ -68,26 +68,32 @@ set(PSB_base_source_files
comm/internals/psi_cswaptran_a.F90
comm/internals/psi_sovrl_restr.f90
comm/psb_dhalo.f90
comm/psb_dhalo_new.f90
comm/psb_zgather_a.f90
comm/psb_zovrl.f90
comm/psb_mhalo_a.f90
comm/psb_zscatter_a.F90
comm/psb_chalo.f90
comm/psb_chalo_new.f90
comm/psb_zscatter.F90
comm/psb_cscatter_a.F90
comm/psb_cspgather.F90
comm/psb_cscatter.F90
comm/psb_shalo_a.f90
comm/psb_shalo_a_new.f90
comm/psb_cgather.f90
comm/psb_zhalo.f90
comm/psb_zhalo_new.f90
comm/psb_movrl_a.f90
comm/psb_chalo_a.f90
comm/psb_chalo_a_new.f90
# comm/psb_i2scatter_a.F90
comm/psb_sgather_a.f90
# comm/psb_i2ovrl_a.f90
comm/psb_zovrl_a.f90
comm/psb_covrl.f90
comm/psb_shalo.f90
comm/psb_shalo_new.f90
comm/psb_dscatter_a.F90
comm/psb_lgather.f90
comm/psb_iscatter.F90
@ -98,6 +104,7 @@ set(PSB_base_source_files
## comm/psb_lspgather.F90
## comm/psb_ispgather.F90
comm/psb_zhalo_a.f90
comm/psb_zhalo_a_new.f90
comm/psb_sscatter_a.F90
comm/psb_lscatter.F90
# comm/psb_i2gather_a.f90
@ -110,6 +117,7 @@ set(PSB_base_source_files
comm/psb_covrl_a.f90
comm/psb_sgather.f90
comm/psb_dhalo_a.f90
comm/psb_dhalo_a_new.f90
comm/psb_zgather.f90
comm/psb_igather.f90
comm/psb_sovrl.f90
@ -431,6 +439,7 @@ set(PSB_base_source_files
psblas/psb_zasum.f90
modules/comm/psi_z_comm_v_mod.f90
# modules/comm/psb_i2_comm_a_mod.f90
modules/comm/psb_neighbor_topology_mod.F90
modules/comm/psb_m_comm_a_mod.f90
modules/comm/psb_z_linmap_mod.f90
modules/comm/psi_s_comm_a_mod.f90

@ -1,12 +1,12 @@
include ../../Make.inc
OBJS = psb_dgather.o psb_dhalo.o psb_dovrl.o \
OBJS = psb_dgather.o psb_dhalo.o psb_dhalo_new.o psb_dovrl.o \
psb_sgather.o psb_shalo.o psb_sovrl.o \
psb_igather.o psb_ihalo.o psb_iovrl.o \
psb_lgather.o psb_lhalo.o psb_lovrl.o \
psb_cgather.o psb_chalo.o psb_covrl.o \
psb_zgather.o psb_zhalo.o psb_zovrl.o \
psb_dgather_a.o psb_dhalo_a.o psb_dovrl_a.o \
psb_dgather_a.o psb_dhalo_a.o psb_dhalo_a_new.o psb_dovrl_a.o \
psb_sgather_a.o psb_shalo_a.o psb_sovrl_a.o \
psb_mgather_a.o psb_mhalo_a.o psb_movrl_a.o \
psb_egather_a.o psb_ehalo_a.o psb_eovrl_a.o \

@ -74,10 +74,10 @@
!
!
! n - integer Number of columns in Y
! beta - real Choose overwrite or sum.
! beta - real Choose overwrite or sum.
! y - type(psb_@x@_vect_type) The data area
! desc_a - type(psb_desc_type). The communication descriptor.
! work(:) - real Buffer space. If not sufficient, will do
! desc_a - type(psb_desc_type). The communication descriptor.
! work(:) - real Buffer space. If not sufficient, will do
! our own internal allocation.
! info - integer. return code.
! data - integer which list is to be used to exchange data
@ -106,21 +106,21 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_) :: beta
real(psb_dpk_), target :: work(:)
type(psb_desc_type), target :: desc_a
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
info = psb_success_
name = 'psi_swap_datav'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -159,9 +159,11 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_dswapdata_vect
!
!
! Subroutine: psi_dswap_vidx_vect
@ -173,17 +175,20 @@ end subroutine psi_dswapdata_vect
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
! This is a wrapper function that calls different communication schemes depending
! on the flag variable.
!
!
subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
& totxch,totsnd,totrcv,work,info)
subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_dswap_vidx_vect
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_d_base_vect_mod
use psb_neighbor_topology_mod
#ifdef PSB_MPI_MOD
use mpi
#endif
@ -192,34 +197,126 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
! error handling variables
integer(psb_ipk_) :: err_act
integer(psb_mpk_) :: me, np
character(len=20) :: name
info=psb_success_
name='psi_dswap_vidx_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info=psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_dswap_baseline_vect(ctxt,icomm,flag,beta,y,idx,totxch,totsnd,totrcv,work,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_dswap_neighbor_topology_vect(ctxt,icomm,flag,beta,y,idx,totxch,totsnd,totrcv,work,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_dswap_vidx_vect
subroutine psi_dswap_baseline_vect(ctxt,icomm,flag,beta,y,idx, &
& totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_dswap_baseline_vect
use psb_d_base_vect_mod
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
! locals
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size),&
& iret, nesd, nerv
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size),&
& iret, nesd, nerv
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
& snd_pt, rcv_pt, pnti, n
& snd_pt, rcv_pt, pnti, n
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
& albf,do_send,do_recv
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
info = psb_success_
name = 'psi_dswap_baseline_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
@ -229,6 +326,7 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
@ -244,7 +342,7 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
! Unfinished communication? Something is wrong....
!
info=psb_err_mpi_error_
call psb_errpush(info,name,m_err=(/-2/))
call psb_errpush(info,name,a_err='Unfinished communication? Something is wrong....')
goto 9999
end if
end if
@ -266,8 +364,8 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
if (debug) write(*,*) me,'Posting receive from',prcid(i),rcv_pt
p2ptag = psb_double_swap_tag
call mpi_irecv(y%combuf(rcv_pt),nerv,&
& psb_mpi_r_dpk_,prcid(i),&
& p2ptag, icomm,y%comid(i,2),iret)
& psb_mpi_r_dpk_,prcid(i),&
& p2ptag, icomm,y%comid(i,2),iret)
end if
pnti = pnti + nerv + nesd + 3
end do
@ -309,8 +407,8 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
if ((nesd>0).and.(proc_to_comm /= me)) then
call mpi_isend(y%combuf(snd_pt),nesd,&
& psb_mpi_r_dpk_,prcid(i),&
& p2ptag,icomm,y%comid(i,1),iret)
& psb_mpi_r_dpk_,prcid(i),&
& p2ptag,icomm,y%comid(i,1),iret)
end if
if(iret /= mpi_success) then
@ -365,8 +463,8 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
else if (proc_to_comm == me) then
if (nesd /= nerv) then
write(psb_err_unit,*) &
& 'Fatal error in swapdata: mismatch on self send',&
& nerv,nesd
& 'Fatal error in swapdata: mismatch on self send',&
& nerv,nesd
end if
y%combuf(rcv_pt:rcv_pt+nerv-1) = y%combuf(snd_pt:snd_pt+nesd-1)
end if
@ -386,7 +484,7 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
rcv_pt = 1+pnti+psb_n_elem_recv_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(rcv_pt:rcv_pt+nerv-1)
& y%combuf(rcv_pt:rcv_pt+nerv-1)
call y%sct(rcv_pt,nerv,idx,beta)
pnti = pnti + nerv + nesd + 3
end do
@ -417,7 +515,173 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_dswap_vidx_vect
end subroutine psi_dswap_baseline_vect
subroutine psi_dswap_neighbor_topology_vect(ctxt,icomm,flag,beta,y,idx, &
& totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_dswap_neighbor_topology_vect
use psb_d_base_vect_mod
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_neighbor_topology_mod
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
! locals
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_dswap_nbr_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call idx%sync()
! ---------------------------------------------------------
! START phase: build topology (if needed), gather, post MPI
! ---------------------------------------------------------
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
! Lazy initialization: build the topology on first call
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology'
call y%neighbor_topology%init(idx%v, totxch, totsnd, totrcv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Buffer layout:
! combuf(1 : total_send) = send area
! combuf(total_send+1 : total_send+total_recv) = recv area
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
! Gather send data into contiguous send buffer (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
! Wait for device (important for GPU subclasses)
call y%device_wait()
! Post non-blocking neighborhood alltoallv
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_r_dpk_, &
& y%combuf(topology_total_send + 1), & ! recv buffer
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_r_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if ! do_start
! ---------------------------------------------------------
! WAIT phase: complete MPI, scatter received data
! ---------------------------------------------------------
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
! No matching start? Something is wrong
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Wait for the non-blocking collective to complete
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
! Scatter received data to local vector positions (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
! Clean up
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: done'
end if ! do_wait
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_dswap_neighbor_topology_vect
!
!
@ -444,10 +708,10 @@ subroutine psi_dswapdata_multivect(flag,beta,y,desc_a,work,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_multivect_type) :: y
real(psb_dpk_) :: beta
real(psb_dpk_), target :: work(:)
type(psb_desc_type), target :: desc_a
class(psb_d_base_multivect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
! locals
@ -535,7 +799,7 @@ subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, &
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_multivect_type) :: y
real(psb_dpk_) :: beta
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
@ -762,3 +1026,5 @@ subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, &
return
end subroutine psi_dswap_vidx_multivect

@ -0,0 +1,390 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_dhalo_a_new.f90
!
! Subroutine: psb_dhalom_new
! This subroutine performs the exchange of the halo elements in a
! distributed dense matrix between all the processes.
! The comm_type argument selects the communication scheme:
! psb_comm_type_isend_ (0) = classic irecv/send (default)
! psb_comm_type_neigh_a2av_ (1) = MPI_Alltoallv collective
!
! Arguments:
! x - real,dimension(:,:). The local part of the dense matrix.
! desc_a - type(psb_desc_type). The communication descriptor.
! info - integer. Return code
! comm_type - integer. Communication scheme selector
! jx - integer(optional). The starting column of the global matrix.
! ik - integer(optional). The number of columns to gather.
! work - real(optional). Work area.
! tran - character(optional). Transpose exchange.
! mode - integer(optional). Communication mode (see Swapdata)
! data - integer(optional). Which index list in desc_a should be used.
!
subroutine psb_dhalom_new(x,desc_a,info,comm_type,jx,ik,work,tran,mode,data)
use psb_base_mod, psb_protect_name => psb_dhalom_new
use psi_mod
implicit none
real(psb_dpk_), intent(inout), target :: x(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,jx,ik,data
character, intent(in), optional :: tran
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: np, me, k
integer(psb_ipk_) :: err_act, iix, jjx, maxk, nrow, imode, i,&
& liwork, data_, ldx
integer(psb_lpk_) :: m, n, ix, ijx
real(psb_dpk_), pointer :: iwork(:), xp(:,:)
character :: tran_
character(len=20) :: name, ch_err
logical :: aliw
name='psb_dhalom_new'
info=psb_success_
call psb_erractionsave(err_act)
if (psb_errstatus_fatal()) then
info = psb_err_internal_error_ ; goto 9999
end if
ctxt=desc_a%get_context()
! check on blacs grid
call psb_info(ctxt, me, np)
if (np == -1) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
ix = 1
if (present(jx)) then
ijx = jx
else
ijx = 1
endif
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
nrow = desc_a%get_local_rows()
maxk=size(x,2)-ijx+1
if(present(ik)) then
if(ik > maxk) then
k=maxk
else
k=ik
end if
else
k = maxk
end if
if (present(tran)) then
tran_ = psb_toupper(tran)
else
tran_ = 'N'
endif
if (present(data)) then
data_ = data
else
data_ = psb_comm_halo_
endif
!
! Select the communication mode based on comm_type
!
select case(comm_type)
case(0)
!
! Classic irecv/send scheme (default)
!
if (present(mode)) then
imode = mode
else
imode = IOR(psb_swap_send_,psb_swap_recv_)
endif
case(1)
if(present(mode)) then
imode = mode
else
imode = IOR(psb_swap_start_,psb_swap_wait_)
endif
case default
info = psb_err_input_value_invalid_i_
call psb_errpush(info,name,i_err=(/5_psb_ipk_,comm_type,izero,izero,izero/))
goto 9999
end select
ldx = size(x,1)
! check vector correctness
call psb_chkvect(m,lone,ldx,ix,ijx,desc_a,info,iix,jjx,check_halo=.true.)
if(info /= psb_success_) then
info=psb_err_from_subroutine_ ; ch_err='psb_chkvect'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
liwork=nrow
if (present(work)) then
if(size(work) >= liwork) then
aliw=.false.
iwork => work
else
aliw=.true.
allocate(iwork(liwork),stat=info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_realloc'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
end if
else
aliw=.true.
allocate(iwork(liwork),stat=info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_realloc'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
end if
! exchange halo elements
xp => x(iix:size(x,1),jjx:jjx+k-1)
if(tran_ == 'N') then
call psi_swapdata(imode,k,dzero,xp,&
& desc_a,iwork,info,data=data_)
else if((tran_ == 'T').or.(tran_ == 'C')) then
call psi_swaptran(imode,k,done,xp,&
&desc_a,iwork,info)
else
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid tran')
goto 9999
end if
if(info /= psb_success_) then
ch_err='PSI_swapdata'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if (aliw) deallocate(iwork)
nullify(iwork)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dhalom_new
!
! Subroutine: psb_dhalov_new
! This subroutine performs the exchange of the halo elements in a
! distributed dense vector between all the processes.
! The comm_type argument selects the communication scheme:
! psb_comm_type_isend_ (0) = classic irecv/send (default)
! psb_comm_type_neigh_a2av_ (1) = MPI_Alltoallv collective
!
! Arguments:
! x - real,dimension(:). The local part of the dense vector.
! desc_a - type(psb_desc_type). The communication descriptor.
! info - integer. Return code
! comm_type - integer. Communication scheme selector
! work - real(optional). Work area.
! tran - character(optional). Transpose exchange.
! mode - integer(optional). Communication mode (see Swapdata)
! data - integer(optional). Which index list in desc_a should be used.
!
subroutine psb_dhalov_new(x,desc_a,info,comm_type,work,tran,mode,data)
use psb_base_mod, psb_protect_name => psb_dhalov_new
use psi_mod
implicit none
real(psb_dpk_), intent(inout) :: x(:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: np, me
integer(psb_ipk_) :: err_act, ldx, iix, jjx, nrow, imode, liwork, data_
integer(psb_lpk_) :: m, n, ix, ijx
real(psb_dpk_), pointer :: iwork(:)
character :: tran_
character(len=20) :: name, ch_err
logical :: aliw
name='psb_dhalov_new'
info=psb_success_
call psb_erractionsave(err_act)
if (psb_errstatus_fatal()) then
info = psb_err_internal_error_ ; goto 9999
end if
ctxt=desc_a%get_context()
! check on blacs grid
call psb_info(ctxt, me, np)
if (np == -1) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
ix = 1
ijx = 1
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
nrow = desc_a%get_local_rows()
if (present(tran)) then
tran_ = psb_toupper(tran)
else
tran_ = 'N'
endif
if (present(data)) then
data_ = data
else
data_ = psb_comm_halo_
endif
!
! Select the communication mode based on comm_type
!
select case(comm_type)
case(0)
!
! Classic irecv/send scheme (default)
!
if (present(mode)) then
imode = mode
else
imode = IOR(psb_swap_send_,psb_swap_recv_)
endif
case(1)
if(present(mode)) then
imode = mode
else
imode = IOR(psb_swap_start_,psb_swap_wait_)
endif
case default
info = psb_err_input_value_invalid_i_
call psb_errpush(info,name,i_err=(/4_psb_ipk_,comm_type,izero,izero,izero/))
goto 9999
end select
ldx = size(x,1)
! check vector correctness
call psb_chkvect(m,lone,ldx,ix,ijx,desc_a,info,iix,jjx,check_halo=.true.)
if(info /= psb_success_) then
info=psb_err_from_subroutine_ ; ch_err='psb_chkvect'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
liwork=nrow
if (present(work)) then
if(size(work) >= liwork) then
aliw=.false.
iwork => work
else
aliw=.true.
allocate(iwork(liwork),stat=info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_realloc'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
end if
else
aliw=.true.
allocate(iwork(liwork),stat=info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_realloc'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
end if
! exchange halo elements
if(tran_ == 'N') then
call psi_swapdata(imode,dzero,x(iix:size(x)),&
& desc_a,iwork,info,data=data_)
else if((tran_ == 'T').or.(tran_ == 'C')) then
call psi_swaptran(imode,done,x(iix:size(x)),&
& desc_a,iwork,info)
else
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid tran')
goto 9999
end if
if(info /= psb_success_) then
ch_err='PSI_swapdata'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if (aliw) deallocate(iwork)
nullify(iwork)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dhalov_new

@ -0,0 +1,298 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_dhalo_new.f90
!
! Subroutine: psb_dhalo_vect_new
! Halo exchange for a distributed vector.
! comm_type selects the communication scheme:
! psb_comm_type_isend_ (0) : classic isend/irecv (delegates to psi_swapdata)
! psb_comm_type_neigh_a2av_ (1) : MPI_Neighbor_alltoallv via pre-built topology
!
subroutine psb_dhalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
use psb_base_mod, psb_protect_name => psb_dhalo_vect_new
use psi_mod
implicit none
type(psb_d_vect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, err_act, &
& nrow, ncol, lldx, imode, liwork, data_
real(psb_dpk_),pointer :: iwork(:)
real(psb_dpk_), allocatable :: sndbuf(:), rcvbuf(:)
character :: tran_
character(len=40) :: name, ch_err
logical :: aliw
integer(psb_mpk_) :: iret
integer(psb_ipk_) :: k
name='psb_dhalo_vect_new'
info=psb_success_
call psb_erractionsave(err_act)
if (psb_errstatus_fatal()) then
info = psb_err_internal_error_ ; goto 9999
end if
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -1) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
lldx = x%get_nrows()
if (present(tran)) then
tran_ = psb_toupper(tran)
else
tran_ = 'N'
endif
if (present(data)) then
data_ = data
else
data_ = psb_comm_halo_
endif
if ((info == 0).and.(lldx < ncol)) call x%reall(ncol,info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_ ; ch_err='reall'
call psb_errpush(info,name,a_err=ch_err); goto 9999
end if
select case(comm_type)
case(0) ! psb_comm_type_isend_
! ---- Classic isend/irecv: delegate to psi_swapdata ----
if (present(mode)) then
imode = mode
else
imode = IOR(psb_swap_send_,psb_swap_recv_)
end if
liwork=nrow
if (present(work)) then
if(size(work) >= liwork) then
iwork => work; aliw=.false.
else
aliw=.true.; allocate(iwork(liwork),stat=info)
end if
else
aliw=.true.; allocate(iwork(liwork),stat=info)
end if
if(info /= psb_success_) then
ch_err='psb_realloc'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if(tran_ == 'N') then
call psi_swapdata(imode,dzero,x%v,desc_a,iwork,info,data=data_)
else if((tran_ == 'T').or.(tran_ == 'C')) then
call psi_swaptran(imode,done,x%v,desc_a,iwork,info)
else
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid tran'); goto 9999
end if
if (info /= psb_success_) then
ch_err='PSI_swapdata'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if (aliw) deallocate(iwork)
nullify(iwork)
case(1) ! psb_comm_type_neigh_a2av_
! TODO
case default
info = psb_err_input_value_invalid_i_
call psb_errpush(info,name,i_err=(/5_psb_ipk_,comm_type,izero,izero,izero/))
goto 9999
end select
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dhalo_vect_new
!
! Subroutine: psb_dhalo_multivect_new
! Halo exchange for a distributed multivector.
! comm_type selects the communication scheme:
! psb_comm_type_isend_ (0) : classic isend/irecv
! psb_comm_type_neigh_a2av_ (1) : MPI_Neighbor_alltoallv via pre-built topology
!
subroutine psb_dhalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
use psb_base_mod, psb_protect_name => psb_dhalo_multivect_new
use psi_mod
implicit none
type(psb_d_multivect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, err_act, &
& nrow, ncol, lldx, imode, liwork, data_, nc
real(psb_dpk_),pointer :: iwork(:)
real(psb_dpk_), allocatable :: sndbuf(:), rcvbuf(:)
integer(psb_mpk_), allocatable :: mv_sndcnts(:), mv_rcvcnts(:), &
& mv_snddispls(:), mv_rcvdispls(:)
character :: tran_
character(len=40) :: name, ch_err
logical :: aliw
integer(psb_mpk_) :: iret
integer(psb_ipk_) :: k, j, bp, nn
name='psb_dhalo_multivect_new'
info=psb_success_
call psb_erractionsave(err_act)
if (psb_errstatus_fatal()) then
info = psb_err_internal_error_ ; goto 9999
end if
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -1) then
info = psb_err_context_error_
call psb_errpush(info,name); goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name); goto 9999
endif
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
lldx = x%get_nrows()
nc = x%get_ncols()
if (present(tran)) then
tran_ = psb_toupper(tran)
else
tran_ = 'N'
endif
if (present(data)) then
data_ = data
else
data_ = psb_comm_halo_
endif
if (lldx < ncol) call x%reall(ncol,nc,info)
if(info /= psb_success_) then
ch_err='psb_reall'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
select case(comm_type)
case(0)
! ---- Classic isend/irecv ----
if (present(mode)) then
imode = mode
else
imode = IOR(psb_swap_send_,psb_swap_recv_)
end if
liwork=nrow
if (present(work)) then
if(size(work) >= liwork) then
iwork => work; aliw=.false.
else
aliw=.true.; allocate(iwork(liwork),stat=info)
end if
else
aliw=.true.; allocate(iwork(liwork),stat=info)
end if
if(info /= psb_success_) then
ch_err='psb_realloc'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if(tran_ == 'N') then
call psi_swapdata(imode,dzero,x%v,desc_a,iwork,info,data=data_)
else if((tran_ == 'T').or.(tran_ == 'C')) then
call psi_swaptran(imode,done,x%v,desc_a,iwork,info)
else
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid tran'); goto 9999
end if
if (info /= psb_success_) then
ch_err='PSI_swapdata'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if (aliw) deallocate(iwork)
nullify(iwork)
case(1)
! TODO
case default
info = psb_err_input_value_invalid_i_
call psb_errpush(info,name,i_err=(/5_psb_ipk_,comm_type,izero,izero,izero/))
goto 9999
end select
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dhalo_multivect_new

@ -28,6 +28,7 @@ COMMINT= penv/psi_penv_mod.o \
SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \
serial/psb_c_serial_mod.o serial/psb_z_serial_mod.o \
serial/psb_serial_mod.o \
comm/psb_neighbor_topology_mod.o \
serial/psb_i_base_vect_mod.o serial/psb_i_vect_mod.o\
serial/psb_l_base_vect_mod.o serial/psb_l_vect_mod.o\
serial/psb_d_base_vect_mod.o serial/psb_d_vect_mod.o\
@ -181,6 +182,7 @@ auxil/psb_string_mod.o auxil/psb_m_realloc_mod.o auxil/psb_e_realloc_mod.o auxil
auxil/psb_d_realloc_mod.o auxil/psb_c_realloc_mod.o auxil/psb_z_realloc_mod.o \
desc/psb_desc_const_mod.o psi_penv_mod.o: psb_const_mod.o
comm/psb_neighbor_topology_mod.o: psb_const_mod.o desc/psb_desc_const_mod.o
desc/psb_indx_map_mod.o desc/psb_hash_mod.o: psb_realloc_mod.o psb_const_mod.o desc/psb_desc_const_mod.o
auxil/psb_i_sort_mod.o auxil/psb_s_sort_mod.o auxil/psb_d_sort_mod.o auxil/psb_c_sort_mod.o auxil/psb_z_sort_mod.o \
@ -261,7 +263,7 @@ serial/psb_d_base_mat_mod.o: serial/psb_d_base_vect_mod.o
serial/psb_c_base_mat_mod.o: serial/psb_c_base_vect_mod.o
serial/psb_z_base_mat_mod.o: serial/psb_z_base_vect_mod.o
serial/psb_l_base_vect_mod.o: serial/psb_i_base_vect_mod.o
serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o
serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o comm/psb_neighbor_topology_mod.o
serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: auxil/psi_serial_mod.o psb_realloc_mod.o
serial/psb_s_mat_mod.o: serial/psb_s_base_mat_mod.o serial/psb_s_csr_mat_mod.o serial/psb_s_csc_mat_mod.o serial/psb_s_vect_mod.o \
serial/psb_i_vect_mod.o serial/psb_l_vect_mod.o

@ -76,6 +76,31 @@ module psb_c_comm_a_mod
end subroutine psb_chalov
end interface psb_halo
interface psb_halo_new
subroutine psb_chalom_new(x,desc_a,info,comm_type,jx,ik,work,tran,mode,data)
import
implicit none
complex(psb_spk_), intent(inout), target :: x(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,jx,ik,data
character, intent(in), optional :: tran
end subroutine psb_chalom_new
subroutine psb_chalov_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
complex(psb_spk_), intent(inout) :: x(:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_chalov_new
end interface psb_halo_new
interface psb_scatter
subroutine psb_cscatterm(globx, locx, desc_a, info, root)

@ -57,6 +57,31 @@ module psb_c_comm_mod
end subroutine psb_covrl_multivect
end interface psb_ovrl
interface psb_halo_new
subroutine psb_chalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_c_vect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_chalo_vect_new
subroutine psb_chalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_c_multivect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_chalo_multivect_new
end interface psb_halo_new
interface psb_halo
subroutine psb_chalo_vect(x,desc_a,info,work,tran,mode,data)
import

@ -53,6 +53,31 @@ module psb_d_comm_a_mod
end subroutine psb_dovrlv
end interface psb_ovrl
interface psb_halo_new
subroutine psb_dhalom_new(x,desc_a,info,comm_type,jx,ik,work,tran,mode,data)
import
implicit none
real(psb_dpk_), intent(inout), target :: x(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,jx,ik,data
character, intent(in), optional :: tran
end subroutine psb_dhalom_new
subroutine psb_dhalov_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
real(psb_dpk_), intent(inout) :: x(:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_dhalov_new
end interface psb_halo_new
interface psb_halo
subroutine psb_dhalom(x,desc_a,info,jx,ik,work,tran,mode,data)
import

@ -57,6 +57,32 @@ module psb_d_comm_mod
end subroutine psb_dovrl_multivect
end interface psb_ovrl
interface psb_halo_new
subroutine psb_dhalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_d_vect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_dhalo_vect_new
subroutine psb_dhalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_d_multivect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_dhalo_multivect_new
end interface psb_halo_new
interface psb_halo
subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data)
import

@ -0,0 +1,422 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! Module: psb_neighbor_topology_mod
! Provides a type to hold pre-built MPI neighborhood topology
! information for persistent/repeated halo exchanges via
! MPI_Neighbor_alltoallv (MPI >= 3.0).
!
! The topology is stored inside the vector type (psb_d_base_vect_type)
! and lazily created on the first psi_swapdata call with the
! neighbor-alltoallv communication mode. Once built it is reused
! for every subsequent halo exchange, avoiding the per-call overhead
! of re-scanning the index list and allocating temporary arrays.
!
! The graph communicator and per-neighbor counts/displacements
! are built once and reused.
!
! The gather/scatter index arrays (send_indexes, recv_indexes) record
! which local vector positions must be packed / unpacked.
!
module psb_neighbor_topology_mod
use psb_const_mod
use psb_desc_const_mod
use psb_error_mod
!
! Only import mpi_comm_null here (needed for type default initializer).
! Full MPI access is done inside each contained subroutine so that
! MPI symbols do NOT leak into modules that use psb_neighbor_topology_mod.
!
#ifdef PSB_MPI_MOD
use mpi, only: mpi_comm_null
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type :: psb_neighbor_topology_type
!
! MPI dist-graph communicator (only communicating neighbors).
!
integer(psb_mpk_) :: graph_comm = mpi_comm_null
!
! Number of neighbors (processes I exchange with, excluding self).
!
integer(psb_ipk_) :: num_neighbors = 0
!
! Per-neighbor send/recv counts and displacements (units of
! single elements; for n-column multivectors multiply by n).
! send_counts(i) = number of elements sent to i-th neighbor
! recv_counts(i) = number of elements received from i-th neighbor
! send_displs(i) = displacement into contiguous send buffer
! recv_displs(i) = displacement into contiguous recv buffer
!
integer(psb_mpk_), allocatable :: send_counts(:), recv_counts(:)
integer(psb_mpk_), allocatable :: send_displs(:), recv_displs(:)
!
! Gather indexes: the k-th element of the send buffer is
! y%v( send_indexes(k) )
! Scatter indexes: the k-th element of the recv buffer goes to
! y%v( recv_indexes(k) )
!
integer(psb_ipk_), allocatable :: send_indexes(:)
integer(psb_ipk_), allocatable :: recv_indexes(:)
!
! Total number of elements to send / receive (per single column),
! excluding self-exchange.
!
integer(psb_ipk_) :: total_send = 0
integer(psb_ipk_) :: total_recv = 0
!
! Initialization flag.
!
logical :: is_initialized = .false.
contains
procedure, pass(topology) :: init => neighbor_topology_init
procedure, pass(topology) :: free => neighbor_topology_free
procedure, pass(topology) :: sizeof => neighbor_topology_sizeof
end type psb_neighbor_topology_type
contains
! ---------------------------------------------------------------
! neighbor_topology_init
!
! Parse the halo index list (obtained via desc_a%get_list_p)
! and build:
! - MPI dist-graph communicator with only the true neighbors
! - per-neighbor send/recv counts and displacements
! - contiguous gather/scatter index arrays
!
! The topology is stored inside the vector and lazily built
! on the first psi_swapdata call that uses the neighbor-alltoallv
! communication mode.
!
! Arguments:
! topology - the persistent state (output, intent inout)
! halo_index - halo_index array (from get_list_p, intent in)
! num_neighbors - number of exchanges (from get_list_p)
! total_send_elems - total send count (from get_list_p)
! total_recv_elems - total recv count (from get_list_p)
! ctxt - PSBLAS context
! icomm - MPI communicator
! info - error code (output)
! ---------------------------------------------------------------
subroutine neighbor_topology_init(topology, halo_index, num_neighbors, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
class(psb_neighbor_topology_type), intent(inout) :: topology
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_neighbors, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: iret
integer(psb_ipk_) :: i, k, idx_ptr, num_elem_recv, num_elem_send, partner_proc
integer(psb_ipk_) :: nbr_count, send_offset, recv_offset
integer(psb_mpk_), allocatable :: source_ranks(:), dest_ranks(:)
integer(psb_mpk_), allocatable :: source_weights(:), dest_weights(:)
integer(psb_mpk_) :: in_degree, out_degree
character(len=40) :: name
integer(psb_ipk_) :: proc_id
integer(psb_ipk_) :: position
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'neighbor_topology_init'
call psb_erractionsave(err_act)
! Clean up any previous state
call topology%free(info)
! ----------------------------------------------------------
! First pass: count neighbors (excluding self) and totals
! ----------------------------------------------------------
topology%num_neighbors = 0
topology%total_send = 0
topology%total_recv = 0
if(size(halo_index) < 1) then
call psb_errpush(psb_err_topology_invalid_args_,name)
goto 9999
end if
allocate(source_ranks(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Source ranks allocation failed')
goto 9999
end if
allocate(dest_ranks(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Destination ranks allocation failed')
goto 9999
end if
allocate(source_weights(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Source weights allocation failed')
goto 9999
end if
allocate(dest_weights(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Destination weights allocation failed')
goto 9999
end if
allocate(topology%send_counts(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Send counts allocation failed')
goto 9999
end if
allocate(topology%recv_counts(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Receive counts allocation failed')
goto 9999
end if
allocate(topology%send_displs(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Send displacements allocation failed')
goto 9999
end if
allocate(topology%recv_displs(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Receive displacements allocation failed')
goto 9999
end if
! -----------------------------------------------------------
! Allocate the gather/scatter index arrays
! -----------------------------------------------------------
allocate(topology%send_indexes(total_send_elems), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Send indexes allocation failed')
goto 9999
end if
allocate(topology%recv_indexes(total_recv_elems), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Recv indexes allocation failed')
goto 9999
end if
! -----------------------------------------------------------
! Fill neighbor ranks, weights, counts, displacements,
! and gather/scatter index arrays.
!
! The halo_index layout per neighbor (starting at position):
! position + 0 : process id
! position + 1 : nerv (num recv elements)
! position + 2 .. +1+nerv : recv element indexes
! position + 2+nerv : nesd (num send elements)
! position + 3+nerv .. +2+nerv+nesd : send element indexes
! Total stride per neighbor: nerv + nesd + 3
! -----------------------------------------------------------
send_offset = 0
recv_offset = 0
position = 1
do i = 1, num_neighbors
proc_id = halo_index(position)
num_elem_recv = halo_index(position + 1)
num_elem_send = halo_index(position + num_elem_recv + 2)
! Fill source/destination ranks and weights (weights are all 1 for now)
source_ranks(i) = int(proc_id, psb_mpk_)
dest_ranks(i) = int(proc_id, psb_mpk_)
source_weights(i) = 1
dest_weights(i) = 1
! Counts and displacements (displs set BEFORE accumulating offset)
topology%send_counts(i) = int(num_elem_send, psb_mpk_)
topology%recv_counts(i) = int(num_elem_recv, psb_mpk_)
topology%send_displs(i) = int(send_offset, psb_mpk_)
topology%recv_displs(i) = int(recv_offset, psb_mpk_)
! Fill recv_indexes from halo_index(position+2 .. position+1+nerv)
do k = 1, num_elem_recv
topology%recv_indexes(recv_offset + k) = halo_index(position + psb_elem_recv_ + k - 1)
end do
! Fill send_indexes from halo_index(position+3+nerv .. position+2+nerv+nesd)
do k = 1, num_elem_send
topology%send_indexes(send_offset + k) = halo_index(position + num_elem_recv + psb_elem_send_ + k - 1)
end do
send_offset = send_offset + num_elem_send
recv_offset = recv_offset + num_elem_recv
topology%num_neighbors = topology%num_neighbors + 1
topology%total_send = topology%total_send + num_elem_send
topology%total_recv = topology%total_recv + num_elem_recv
position = position + num_elem_recv + num_elem_send + 3
end do
! ----------------------------------------------------------
! Sanity check: the totals computed from the neighbor list
! should match the totals returned by get_list_p.
! ----------------------------------------------------------
if (topology%total_send /= total_send_elems) then
info = psb_err_topology_args_mismatch_
call psb_errpush(info, name, a_err='Send elements mismatch')
goto 9999
end if
if (topology%total_recv /= total_recv_elems) then
info = psb_err_topology_args_mismatch_
call psb_errpush(info, name, a_err='Receive elements mismatch')
goto 9999
end if
if(topology%num_neighbors /= num_neighbors) then
info = psb_err_topology_args_mismatch_
call psb_errpush(info, name, a_err='Number of neighbors mismatch')
goto 9999
end if
! ----------------------------------------------------------
! Build the dist-graph communicator
! ----------------------------------------------------------
in_degree = topology%num_neighbors !! Just for clarity
out_degree = topology%num_neighbors !! Just for clarity
call mpi_dist_graph_create_adjacent(icomm, &
& in_degree, source_ranks, source_weights, &
& out_degree, dest_ranks, dest_weights, &
& mpi_info_null, .false., & ! Check this line for optimizations
& topology%graph_comm, info)
if (info /= mpi_success) then
info = psb_err_topology_error_
call psb_errpush(info, name)
goto 9999
end if
topology%is_initialized = .true.
! TODO: Is it safe to deallocate these temporary arrays here, or do we need them for the gather/scatter indexes?
! deallocate(source_ranks, dest_ranks, source_weights, dest_weights)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine neighbor_topology_init
! ---------------------------------------------------------------
! neighbor_topology_free
! Release all resources held by the persistent state.
! ---------------------------------------------------------------
subroutine neighbor_topology_free(topology, info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
class(psb_neighbor_topology_type), intent(inout) :: topology
integer(psb_ipk_), intent(out) :: info
integer(psb_mpk_) :: iret
info = psb_success_
if (topology%graph_comm /= mpi_comm_null) then
call mpi_comm_free(topology%graph_comm, iret)
topology%graph_comm = mpi_comm_null
end if
if (allocated(topology%send_counts)) deallocate(topology%send_counts)
if (allocated(topology%recv_counts)) deallocate(topology%recv_counts)
if (allocated(topology%send_displs)) deallocate(topology%send_displs)
if (allocated(topology%recv_displs)) deallocate(topology%recv_displs)
if (allocated(topology%send_indexes)) deallocate(topology%send_indexes)
if (allocated(topology%recv_indexes)) deallocate(topology%recv_indexes)
topology%num_neighbors = 0
topology%total_send = 0
topology%total_recv = 0
topology%is_initialized = .false.
end subroutine neighbor_topology_free
! ---------------------------------------------------------------
! neighbor_topology_sizeof
! Return approximate memory footprint in bytes.
! ---------------------------------------------------------------
function neighbor_topology_sizeof(topology) result(val)
implicit none
class(psb_neighbor_topology_type), intent(in) :: topology
integer(psb_epk_) :: val
val = 0
val = val + psb_sizeof_ip * 6 ! scalar integers + logicals
if (allocated(topology%send_counts)) val = val + psb_sizeof_ip * size(topology%send_counts)
if (allocated(topology%recv_counts)) val = val + psb_sizeof_ip * size(topology%recv_counts)
if (allocated(topology%send_displs)) val = val + psb_sizeof_ip * size(topology%send_displs)
if (allocated(topology%recv_displs)) val = val + psb_sizeof_ip * size(topology%recv_displs)
if (allocated(topology%send_indexes)) val = val + psb_sizeof_ip * size(topology%send_indexes)
if (allocated(topology%recv_indexes)) val = val + psb_sizeof_ip * size(topology%recv_indexes)
end function neighbor_topology_sizeof
end module psb_neighbor_topology_mod

@ -76,6 +76,31 @@ module psb_s_comm_a_mod
end subroutine psb_shalov
end interface psb_halo
interface psb_halo_new
subroutine psb_shalom_new(x,desc_a,info,comm_type,jx,ik,work,tran,mode,data)
import
implicit none
real(psb_spk_), intent(inout), target :: x(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,jx,ik,data
character, intent(in), optional :: tran
end subroutine psb_shalom_new
subroutine psb_shalov_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
real(psb_spk_), intent(inout) :: x(:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_shalov_new
end interface psb_halo_new
interface psb_scatter
subroutine psb_sscatterm(globx, locx, desc_a, info, root)

@ -57,6 +57,31 @@ module psb_s_comm_mod
end subroutine psb_sovrl_multivect
end interface psb_ovrl
interface psb_halo_new
subroutine psb_shalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_s_vect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_shalo_vect_new
subroutine psb_shalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_s_multivect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_shalo_multivect_new
end interface psb_halo_new
interface psb_halo
subroutine psb_shalo_vect(x,desc_a,info,work,tran,mode,data)
import

@ -57,6 +57,31 @@ module psb_z_comm_mod
end subroutine psb_zovrl_multivect
end interface psb_ovrl
interface psb_halo_new
subroutine psb_zhalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_z_vect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_zhalo_vect_new
subroutine psb_zhalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_z_multivect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_zhalo_multivect_new
end interface psb_halo_new
interface psb_halo
subroutine psb_zhalo_vect(x,desc_a,info,work,tran,mode,data)
import

@ -36,54 +36,94 @@ module psi_d_comm_v_mod
use psb_d_base_multivect_mod, only : psb_d_base_multivect_type
interface psi_swapdata
! ---------------------------------------------------------------
! Upper call in order to populate idx using desc_a%get_list_p
! ---------------------------------------------------------------
subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)
import
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_) :: beta
real(psb_dpk_),target :: work(:)
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_),target :: work(:)
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
end subroutine psi_dswapdata_vect
subroutine psi_dswapdata_multivect(flag,beta,y,desc_a,work,info,data)
import
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_multivect_type) :: y
real(psb_dpk_) :: beta
real(psb_dpk_),target :: work(:)
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
class(psb_d_base_multivect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_),target :: work(:)
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
end subroutine psi_dswapdata_multivect
! ---------------------------------------------------------------
! Wrapper that calls different communications schemes depending on
! flag variable
! ---------------------------------------------------------------
subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_dswap_vidx_vect
subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_multivect_type) :: y
real(psb_dpk_) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_multivect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_dswap_vidx_multivect
! ---------------------------------------------------------------
end interface psi_swapdata
interface psi_dswap_baseline_vect
subroutine psi_dswap_baseline_vect(ctxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_dswap_baseline_vect
end interface psi_dswap_baseline_vect
interface psi_dswap_neighbor_topology_vect
subroutine psi_dswap_neighbor_topology_vect(ctxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_dswap_neighbor_topology_vect
end interface psi_dswap_neighbor_topology_vect
interface psi_swaptran
subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data)

@ -49,10 +49,12 @@ module psb_desc_const_mod
integer(psb_ipk_), parameter :: psb_setzero_ = 4
! The following are bit fields.
integer(psb_ipk_), parameter :: psb_swap_send_ = 1
integer(psb_ipk_), parameter :: psb_swap_recv_ = 2
integer(psb_ipk_), parameter :: psb_swap_sync_ = 4
integer(psb_ipk_), parameter :: psb_swap_mpi_ = 8
integer(psb_ipk_), parameter :: psb_swap_send_ = 1
integer(psb_ipk_), parameter :: psb_swap_recv_ = 2
integer(psb_ipk_), parameter :: psb_swap_sync_ = 4
integer(psb_ipk_), parameter :: psb_swap_mpi_ = 8
integer(psb_ipk_), parameter :: psb_swap_start_ = 16
integer(psb_ipk_), parameter :: psb_swap_wait_ = 32
integer(psb_ipk_), parameter :: psb_collective_start_ = 1
integer(psb_ipk_), parameter :: psb_collective_end_ = 2
integer(psb_ipk_), parameter :: psb_collective_sync_ = 4

@ -326,6 +326,9 @@ module psb_const_mod
integer(psb_ipk_), parameter, public :: psb_err_invalid_irst_ =5002
integer(psb_ipk_), parameter, public :: psb_err_invalid_preci_=5003
integer(psb_ipk_), parameter, public :: psb_err_invalid_preca_=5004
integer(psb_ipk_), parameter, public :: psb_err_topology_error_=6000
integer(psb_ipk_), parameter, public :: psb_err_topology_invalid_args_=6001
integer(psb_ipk_), parameter, public :: psb_err_topology_args_mismatch_=6002
type :: psb_ctxt_type

@ -390,13 +390,13 @@ contains
! pushes an error on the error stack
subroutine psb_errpush(err_c, r_name, a_err, i_err, l_err, m_err, e_err)
integer(psb_ipk_), intent(in) :: err_c
character(len=*), intent(in) :: r_name
character(len=*), optional :: a_err
integer(psb_ipk_), optional :: i_err(:)
integer(psb_lpk_), optional :: l_err(:)
integer(psb_mpk_), optional :: m_err(:)
integer(psb_epk_), optional :: e_err(:)
integer(psb_ipk_), intent(in) :: err_c
character(len=*), intent(in) :: r_name
character(len=*), optional :: a_err
integer(psb_ipk_), optional :: i_err(:)
integer(psb_lpk_), optional :: l_err(:)
integer(psb_mpk_), optional :: m_err(:)
integer(psb_epk_), optional :: e_err(:)
call psb_set_errstatus(psb_err_fatal_)
call psb_stackpush(err_c, r_name, a_err, i_err, l_err, m_err, e_err)

@ -49,6 +49,7 @@ module psb_d_base_vect_mod
use psb_realloc_mod
use psb_i_base_vect_mod
use psb_l_base_vect_mod
use psb_neighbor_topology_mod
!> \namespace psb_base_mod \class psb_d_base_vect_type
!! The psb_d_base_vect_type
@ -62,9 +63,10 @@ module psb_d_base_vect_mod
!!
type psb_d_base_vect_type
!> Values.
real(psb_dpk_), allocatable :: v(:)
real(psb_dpk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:)
real(psb_dpk_), allocatable :: v(:)
real(psb_dpk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:) ! This is used only for Isend/Irecv scheme, to store the communication handles for each neighbor
integer(psb_mpk_) :: communication_handle ! This is used only for Isend/Irecv scheme, to store the communication handle for the whole halo exchange
!> vector bldstate:
!! null: pristine;
!! build: it's being filled with entries;
@ -73,10 +75,11 @@ module psb_d_base_vect_mod
!! in already existing entries.
!! The transitions among the states are detailed in
!! psb_T_vect_mod.
integer(psb_ipk_), private :: bldstate = psb_vect_null_
integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:)
integer(psb_ipk_), private :: bldstate = psb_vect_null_
integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:)
type(psb_neighbor_topology_type) :: neighbor_topology
contains
!
! Constructors/allocators
@ -249,6 +252,10 @@ module psb_d_base_vect_mod
procedure, pass(x) :: minquotient_a2 => d_base_minquotient_a2
generic, public :: minquotient => minquotient_v, minquotient_a2
! Methods used to handle topology in neighbor_alltoallv communication scheme
procedure, pass(x) :: init_topology => d_base_init_topology
procedure, pass(x) :: free_topology => d_base_free_topology
end type psb_d_base_vect_type
public :: psb_d_base_vect
@ -821,6 +828,7 @@ contains
if (allocated(x%v)) deallocate(x%v, stat=info)
if ((info == 0).and.allocated(x%combuf)) call x%free_buffer(info)
if ((info == 0).and.allocated(x%comid)) call x%free_comid(info)
x%communication_handle = 0
if ((info == 0).and.allocated(x%iv)) deallocate(x%iv, stat=info)
if (info /= 0) call &
& psb_errpush(psb_err_alloc_dealloc_,'vect_free')
@ -2595,12 +2603,44 @@ contains
class(psb_d_base_vect_type), intent(inout) :: x
real(psb_dpk_), intent(in) :: b
class(psb_d_base_vect_type), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(out) :: info
info = 0
if (x%is_dev()) call x%sync()
call z%addconst(x%v,b,info)
end subroutine d_base_addconst_v2
! --------------------------------------------------------------------
! Implementation of methods used for neighbor alltoallv communication
! --------------------------------------------------------------------
subroutine d_base_init_topology(x, halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_exchanges, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%init(halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
end subroutine d_base_init_topology
subroutine d_base_free_topology(x, info)
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%free(info)
end subroutine d_base_free_topology
! --------------------------------------------------------------------
end module psb_d_base_vect_mod

@ -46,7 +46,7 @@ module psb_d_vect_mod
type psb_d_vect_type
class(psb_d_base_vect_type), allocatable :: v
integer(psb_ipk_) :: nrmv = 0
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
integer(psb_ipk_) :: remote_build = psb_matbld_noremote_
integer(psb_ipk_) :: dupl = psb_dupl_add_
real(psb_dpk_), allocatable :: rmtv(:)
integer(psb_lpk_), allocatable :: rmidx(:)

@ -131,6 +131,8 @@ extern "C" {
#define psb_swap_recv_ 2
#define psb_swap_sync_ 4
#define psb_swap_mpi_ 8
#define psb_swap_start_ 16
#define psb_swap_wait_ 32
/* legal values for ovrl update argument */
#define psb_none_ 0

@ -131,6 +131,8 @@ extern "C" {
#define psb_swap_recv_ 2
#define psb_swap_sync_ 4
#define psb_swap_mpi_ 8
#define psb_swap_start_ 16
#define psb_swap_wait_ 32
/* legal values for ovrl update argument */
#define psb_none_ 0

@ -0,0 +1,34 @@
INSTALLDIR=../..
INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas
#
# Libraries used
#
LIBDIR=$(INSTALLDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
TOBJS=test_halo_new.o
EXEDIR=./runs
all: runsd test_halo_new
runsd:
(if test ! -d runs ; then mkdir runs; fi)
test_halo_new: $(TOBJS)
$(FLINK) $(LOPT) $(TOBJS) -o test_halo_new $(PSBLAS_LIB) $(LDLIBS)
/bin/mv test_halo_new $(EXEDIR)
clean:
/bin/rm -f $(TOBJS) *$(.mod) $(EXEDIR)/test_halo_new
lib:
(cd ../../; make library)
verycleanlib:
(cd ../../; make veryclean)

@ -0,0 +1,279 @@
!
! Test program for D-type halo exchange: baseline vs neighbor topology.
!
! This test exercises the lower-level psi_swapdata interface directly
! to compare the two communication paths implemented in psi_dswapdata.F90:
!
! 1. Baseline (Isend/Irecv) : flag = IOR(psb_swap_send_, psb_swap_recv_)
! 2. Neighbor topology (Ineighbor_alltoallv) : flag = psb_swap_start_ then psb_swap_wait_
!
! It builds a 3D block-partitioned descriptor with a 7-point stencil,
! fills owned entries with their global index, performs halo exchange
! via both paths, then checks:
! (a) The two paths produce identical results (cross-check)
! (b) Every halo entry equals the global index of its source (absolute check)
!
! Run with: mpirun -np <P> ./test_halo_new
!
program test_halo_new
use psb_base_mod
use psi_mod
implicit none
! ---- parameters ----
integer(psb_ipk_), parameter :: idim = 10 ! grid idim x idim x idim
! ---- descriptor / context ----
type(psb_ctxt_type) :: ctxt
type(psb_desc_type) :: desc_a
integer(psb_ipk_) :: iam, np, info, i, nr, nlr
integer(psb_lpk_) :: m, nt
integer(psb_lpk_), allocatable :: myidx(:)
! ---- vectors ----
type(psb_d_vect_type) :: v_baseline, v_neighbor
! ---- temporary / comparison arrays ----
real(psb_dpk_), allocatable :: vals(:)
real(psb_dpk_), allocatable :: res_bl(:), res_nb(:)
real(psb_dpk_), allocatable :: expected(:)
! ---- work buffer for psi_swapdata ----
real(psb_dpk_), allocatable :: work(:)
! ---- halo index bookkeeping ----
integer(psb_ipk_) :: nrow, ncol, totxch, idxs, idxr, data_
class(psb_i_base_vect_type), pointer :: d_vidx
! ---- error / reporting ----
integer(psb_ipk_) :: n_pass, n_total, imode
real(psb_dpk_) :: err, tol
integer(psb_lpk_), allocatable :: glob_col(:)
character(len=40) :: name
name = 'test_halo_new'
tol = 1.0d-12
n_pass = 0
n_total = 0
! ==================================================================
! 1. Initialise MPI / PSBLAS context
! ==================================================================
call psb_init(ctxt)
call psb_info(ctxt, iam, np)
if (iam == 0) then
write(psb_out_unit,'("================================================")')
write(psb_out_unit,'(" Test: D-type halo baseline vs neighbor topo")')
write(psb_out_unit,'(" Processes : ",i0)') np
write(psb_out_unit,'(" Grid : ",i0," x ",i0," x ",i0)') idim,idim,idim
write(psb_out_unit,'("================================================")')
end if
! ==================================================================
! 2. Build descriptor with 7-point stencil connectivity
! ==================================================================
m = (1_psb_lpk_ * idim) * idim * idim
nt = (m + np - 1) / np
nr = max(0, min(int(nt,psb_ipk_), int(m - (iam * nt),psb_ipk_)))
call psb_cdall(ctxt, desc_a, info, nl=nr)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'cdall error:', info
call psb_abort(ctxt)
end if
myidx = desc_a%get_global_indices()
nlr = size(myidx)
do i = 1, nlr
call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)/), desc_a, info)
if (myidx(i) > 1) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-1/), desc_a, info)
if (myidx(i) < m) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+1/), desc_a, info)
if (myidx(i) > idim) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-idim/), desc_a, info)
if (myidx(i) + idim <= m) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+idim/), desc_a, info)
if (myidx(i) > int(idim,psb_lpk_)*idim) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), &
& (/myidx(i) - int(idim,psb_lpk_)*idim/), desc_a, info)
if (myidx(i) + int(idim,psb_lpk_)*idim <= m) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), &
& (/myidx(i) + int(idim,psb_lpk_)*idim/), desc_a, info)
end do
call psb_cdasb(desc_a, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'cdasb error:', info
call psb_abort(ctxt)
end if
nrow = desc_a%get_local_rows() ! owned
ncol = desc_a%get_local_cols() ! owned + halo
! ==================================================================
! 3. Allocate two D vectors (scratch) and fill owned entries
! ==================================================================
call psb_geall(v_baseline, desc_a, info)
call psb_geall(v_neighbor, desc_a, info)
call psb_geasb(v_baseline, desc_a, info, scratch=.true.)
call psb_geasb(v_neighbor, desc_a, info, scratch=.true.)
! Fill owned entries with the global index value
allocate(vals(ncol))
vals = dzero
do i = 1, nlr
vals(i) = real(myidx(i), psb_dpk_)
end do
call v_baseline%set_vect(vals)
call v_neighbor%set_vect(vals)
deallocate(vals)
! ==================================================================
! 4. Build the expected result for halo positions
! glob_col(j) = global index of local column j
! After halo exchange every position j should hold glob_col(j).
! ==================================================================
allocate(glob_col(ncol), expected(ncol))
glob_col = desc_a%get_global_indices(owned=.false.)
do i = 1, ncol
expected(i) = real(glob_col(i), psb_dpk_)
end do
! ==================================================================
! 5. Retrieve halo index list (same list used by both paths)
! ==================================================================
data_ = psb_comm_halo_
call desc_a%get_list_p(data_, d_vidx, totxch, idxr, idxs, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'get_list_p error:', info
call psb_abort(ctxt)
end if
allocate(work(nrow))
work = dzero
! ==================================================================
! 6. Baseline halo exchange (Isend/Irecv in one call)
! ==================================================================
imode = IOR(psb_swap_send_, psb_swap_recv_)
call psi_swapdata(ctxt, desc_a%get_mpic(), imode, dzero, &
& v_baseline%v, d_vidx, totxch, idxs, idxr, work, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'baseline swap error:', info
call psb_abort(ctxt)
end if
! ==================================================================
! 7. Neighbor topology halo exchange (start + wait)
! ==================================================================
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_start_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'neighbor start error:', info
call psb_abort(ctxt)
end if
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_wait_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'neighbor wait error:', info
call psb_abort(ctxt)
end if
! ==================================================================
! 8. Extract results and compare
! ==================================================================
res_bl = v_baseline%get_vect()
res_nb = v_neighbor%get_vect()
! ---- Test 1: cross-check baseline vs neighbor (all entries) ----
n_total = n_total + 1
err = maxval(abs(res_bl(1:ncol) - res_nb(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] cross-check baseline vs neighbor : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] cross-check baseline vs neighbor : err = ",es12.5)') err
end if
end if
! ---- Test 2: baseline absolute correctness (halo = global index) ----
n_total = n_total + 1
err = maxval(abs(res_bl(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] baseline absolute correctness : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] baseline absolute correctness : err = ",es12.5)') err
end if
end if
! ---- Test 3: neighbor absolute correctness (halo = global index) ----
n_total = n_total + 1
err = maxval(abs(res_nb(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] neighbor absolute correctness : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] neighbor absolute correctness : err = ",es12.5)') err
end if
end if
! ---- Test 4: repeat neighbor exchange (topology reuse) ----
! Reset halo entries to zero, run again, and check
do i = nrow+1, ncol
res_nb(i) = dzero
end do
call v_neighbor%set_vect(res_nb)
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_start_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_wait_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
res_nb = v_neighbor%get_vect()
n_total = n_total + 1
err = maxval(abs(res_nb(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] neighbor topology reuse : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] neighbor topology reuse : err = ",es12.5)') err
end if
end if
! ==================================================================
! 9. Summary
! ==================================================================
if (iam == 0) then
write(psb_out_unit,'("================================================")')
write(psb_out_unit,'(" Results: ",i0," / ",i0," tests passed")') n_pass, n_total
if (n_pass == n_total) then
write(psb_out_unit,'(" STATUS: ALL PASSED")')
else
write(psb_out_unit,'(" STATUS: SOME FAILURES")')
end if
write(psb_out_unit,'("================================================")')
end if
! ==================================================================
! 10. Cleanup
! ==================================================================
deallocate(res_bl, res_nb, expected, glob_col, work)
call psb_gefree(v_baseline, desc_a, info)
call psb_gefree(v_neighbor, desc_a, info)
call psb_cdfree(desc_a, info)
call psb_exit(ctxt)
end program test_halo_new

1413
tmp.log

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save