[ADD] Added first version of MPI_Ineighbor_alltoallv call. For now it's just for double precision routines

communication
Stack-1 3 months ago
parent 3d92668973
commit a3ebde071d

@ -68,26 +68,32 @@ set(PSB_base_source_files
comm/internals/psi_cswaptran_a.F90 comm/internals/psi_cswaptran_a.F90
comm/internals/psi_sovrl_restr.f90 comm/internals/psi_sovrl_restr.f90
comm/psb_dhalo.f90 comm/psb_dhalo.f90
comm/psb_dhalo_new.f90
comm/psb_zgather_a.f90 comm/psb_zgather_a.f90
comm/psb_zovrl.f90 comm/psb_zovrl.f90
comm/psb_mhalo_a.f90 comm/psb_mhalo_a.f90
comm/psb_zscatter_a.F90 comm/psb_zscatter_a.F90
comm/psb_chalo.f90 comm/psb_chalo.f90
comm/psb_chalo_new.f90
comm/psb_zscatter.F90 comm/psb_zscatter.F90
comm/psb_cscatter_a.F90 comm/psb_cscatter_a.F90
comm/psb_cspgather.F90 comm/psb_cspgather.F90
comm/psb_cscatter.F90 comm/psb_cscatter.F90
comm/psb_shalo_a.f90 comm/psb_shalo_a.f90
comm/psb_shalo_a_new.f90
comm/psb_cgather.f90 comm/psb_cgather.f90
comm/psb_zhalo.f90 comm/psb_zhalo.f90
comm/psb_zhalo_new.f90
comm/psb_movrl_a.f90 comm/psb_movrl_a.f90
comm/psb_chalo_a.f90 comm/psb_chalo_a.f90
comm/psb_chalo_a_new.f90
# comm/psb_i2scatter_a.F90 # comm/psb_i2scatter_a.F90
comm/psb_sgather_a.f90 comm/psb_sgather_a.f90
# comm/psb_i2ovrl_a.f90 # comm/psb_i2ovrl_a.f90
comm/psb_zovrl_a.f90 comm/psb_zovrl_a.f90
comm/psb_covrl.f90 comm/psb_covrl.f90
comm/psb_shalo.f90 comm/psb_shalo.f90
comm/psb_shalo_new.f90
comm/psb_dscatter_a.F90 comm/psb_dscatter_a.F90
comm/psb_lgather.f90 comm/psb_lgather.f90
comm/psb_iscatter.F90 comm/psb_iscatter.F90
@ -98,6 +104,7 @@ set(PSB_base_source_files
## comm/psb_lspgather.F90 ## comm/psb_lspgather.F90
## comm/psb_ispgather.F90 ## comm/psb_ispgather.F90
comm/psb_zhalo_a.f90 comm/psb_zhalo_a.f90
comm/psb_zhalo_a_new.f90
comm/psb_sscatter_a.F90 comm/psb_sscatter_a.F90
comm/psb_lscatter.F90 comm/psb_lscatter.F90
# comm/psb_i2gather_a.f90 # comm/psb_i2gather_a.f90
@ -110,6 +117,7 @@ set(PSB_base_source_files
comm/psb_covrl_a.f90 comm/psb_covrl_a.f90
comm/psb_sgather.f90 comm/psb_sgather.f90
comm/psb_dhalo_a.f90 comm/psb_dhalo_a.f90
comm/psb_dhalo_a_new.f90
comm/psb_zgather.f90 comm/psb_zgather.f90
comm/psb_igather.f90 comm/psb_igather.f90
comm/psb_sovrl.f90 comm/psb_sovrl.f90
@ -431,6 +439,7 @@ set(PSB_base_source_files
psblas/psb_zasum.f90 psblas/psb_zasum.f90
modules/comm/psi_z_comm_v_mod.f90 modules/comm/psi_z_comm_v_mod.f90
# modules/comm/psb_i2_comm_a_mod.f90 # modules/comm/psb_i2_comm_a_mod.f90
modules/comm/psb_neighbor_topology_mod.F90
modules/comm/psb_m_comm_a_mod.f90 modules/comm/psb_m_comm_a_mod.f90
modules/comm/psb_z_linmap_mod.f90 modules/comm/psb_z_linmap_mod.f90
modules/comm/psi_s_comm_a_mod.f90 modules/comm/psi_s_comm_a_mod.f90

@ -1,12 +1,12 @@
include ../../Make.inc include ../../Make.inc
OBJS = psb_dgather.o psb_dhalo.o psb_dovrl.o \ OBJS = psb_dgather.o psb_dhalo.o psb_dhalo_new.o psb_dovrl.o \
psb_sgather.o psb_shalo.o psb_sovrl.o \ psb_sgather.o psb_shalo.o psb_sovrl.o \
psb_igather.o psb_ihalo.o psb_iovrl.o \ psb_igather.o psb_ihalo.o psb_iovrl.o \
psb_lgather.o psb_lhalo.o psb_lovrl.o \ psb_lgather.o psb_lhalo.o psb_lovrl.o \
psb_cgather.o psb_chalo.o psb_covrl.o \ psb_cgather.o psb_chalo.o psb_covrl.o \
psb_zgather.o psb_zhalo.o psb_zovrl.o \ psb_zgather.o psb_zhalo.o psb_zovrl.o \
psb_dgather_a.o psb_dhalo_a.o psb_dovrl_a.o \ psb_dgather_a.o psb_dhalo_a.o psb_dhalo_a_new.o psb_dovrl_a.o \
psb_sgather_a.o psb_shalo_a.o psb_sovrl_a.o \ psb_sgather_a.o psb_shalo_a.o psb_sovrl_a.o \
psb_mgather_a.o psb_mhalo_a.o psb_movrl_a.o \ psb_mgather_a.o psb_mhalo_a.o psb_movrl_a.o \
psb_egather_a.o psb_ehalo_a.o psb_eovrl_a.o \ psb_egather_a.o psb_ehalo_a.o psb_eovrl_a.o \

@ -74,10 +74,10 @@
! !
! !
! n - integer Number of columns in Y ! n - integer Number of columns in Y
! beta - real Choose overwrite or sum. ! beta - real Choose overwrite or sum.
! y - type(psb_@x@_vect_type) The data area ! y - type(psb_@x@_vect_type) The data area
! desc_a - type(psb_desc_type). The communication descriptor. ! desc_a - type(psb_desc_type). The communication descriptor.
! work(:) - real Buffer space. If not sufficient, will do ! work(:) - real Buffer space. If not sufficient, will do
! our own internal allocation. ! our own internal allocation.
! info - integer. return code. ! info - integer. return code.
! data - integer which list is to be used to exchange data ! data - integer which list is to be used to exchange data
@ -106,21 +106,21 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)
integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y class(psb_d_base_vect_type) :: y
real(psb_dpk_) :: beta real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:) real(psb_dpk_), target :: work(:)
type(psb_desc_type), target :: desc_a type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data integer(psb_ipk_), optional :: data
! locals ! locals
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name character(len=20) :: name
info=psb_success_ info = psb_success_
name='psi_swap_datav' name = 'psi_swap_datav'
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
ctxt = desc_a%get_context() ctxt = desc_a%get_context()
@ -159,9 +159,11 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)
9999 call psb_error_handler(ctxt,err_act) 9999 call psb_error_handler(ctxt,err_act)
return return
end subroutine psi_dswapdata_vect end subroutine psi_dswapdata_vect
! !
! !
! Subroutine: psi_dswap_vidx_vect ! Subroutine: psi_dswap_vidx_vect
@ -173,17 +175,20 @@ end subroutine psi_dswapdata_vect
! The real workhorse: the outer routine will only choose the index list ! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange. ! this one takes the index list and does the actual exchange.
! !
! This is a wrapper function that calls different communication schemes depending
! on the flag variable.
! !
! !
subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_dswap_vidx_vect use psi_mod, psb_protect_name => psi_dswap_vidx_vect
use psb_error_mod use psb_error_mod
use psb_realloc_mod use psb_realloc_mod
use psb_desc_mod use psb_desc_mod
use psb_penv_mod use psb_penv_mod
use psb_d_base_vect_mod use psb_d_base_vect_mod
use psb_neighbor_topology_mod
#ifdef PSB_MPI_MOD #ifdef PSB_MPI_MOD
use mpi use mpi
#endif #endif
@ -192,34 +197,126 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
include 'mpif.h' include 'mpif.h'
#endif #endif
type(psb_ctxt_type), intent(in) :: ctxt type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y class(psb_d_base_vect_type) :: y
real(psb_dpk_) :: beta real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:) real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
! error handling variables
integer(psb_ipk_) :: err_act
integer(psb_mpk_) :: me, np
character(len=20) :: name
info=psb_success_
name='psi_dswap_vidx_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info=psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_dswap_baseline_vect(ctxt,icomm,flag,beta,y,idx,totxch,totsnd,totrcv,work,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_dswap_neighbor_topology_vect(ctxt,icomm,flag,beta,y,idx,totxch,totsnd,totrcv,work,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_dswap_vidx_vect
subroutine psi_dswap_baseline_vect(ctxt,icomm,flag,beta,y,idx, &
& totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_dswap_baseline_vect
use psb_d_base_vect_mod
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
! locals ! locals
integer(psb_mpk_) :: np, me integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size),& integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size),&
& iret, nesd, nerv & iret, nesd, nerv
integer(psb_mpk_), allocatable :: prcid(:) integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
& snd_pt, rcv_pt, pnti, n & snd_pt, rcv_pt, pnti, n
logical :: swap_mpi, swap_sync, swap_send, swap_recv,& logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv & albf,do_send,do_recv
logical, parameter :: usersend=.false., debug=.false. logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name character(len=20) :: name
info=psb_success_ info = psb_success_
name='psi_swap_datav' name = 'psi_dswap_baseline_vect'
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
call psb_info(ctxt,me,np) call psb_info(ctxt,me,np)
if (np == -1) then if (np == -1) then
info=psb_err_context_error_ info = psb_err_context_error_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
endif endif
@ -229,6 +326,7 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
swap_sync = iand(flag,psb_swap_sync_) /= 0 swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0 swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0 swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv do_recv = swap_mpi .or. swap_sync .or. swap_recv
@ -244,7 +342,7 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
! Unfinished communication? Something is wrong.... ! Unfinished communication? Something is wrong....
! !
info=psb_err_mpi_error_ info=psb_err_mpi_error_
call psb_errpush(info,name,m_err=(/-2/)) call psb_errpush(info,name,a_err='Unfinished communication? Something is wrong....')
goto 9999 goto 9999
end if end if
end if end if
@ -266,8 +364,8 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
if (debug) write(*,*) me,'Posting receive from',prcid(i),rcv_pt if (debug) write(*,*) me,'Posting receive from',prcid(i),rcv_pt
p2ptag = psb_double_swap_tag p2ptag = psb_double_swap_tag
call mpi_irecv(y%combuf(rcv_pt),nerv,& call mpi_irecv(y%combuf(rcv_pt),nerv,&
& psb_mpi_r_dpk_,prcid(i),& & psb_mpi_r_dpk_,prcid(i),&
& p2ptag, icomm,y%comid(i,2),iret) & p2ptag, icomm,y%comid(i,2),iret)
end if end if
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
end do end do
@ -309,8 +407,8 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
if ((nesd>0).and.(proc_to_comm /= me)) then if ((nesd>0).and.(proc_to_comm /= me)) then
call mpi_isend(y%combuf(snd_pt),nesd,& call mpi_isend(y%combuf(snd_pt),nesd,&
& psb_mpi_r_dpk_,prcid(i),& & psb_mpi_r_dpk_,prcid(i),&
& p2ptag,icomm,y%comid(i,1),iret) & p2ptag,icomm,y%comid(i,1),iret)
end if end if
if(iret /= mpi_success) then if(iret /= mpi_success) then
@ -365,8 +463,8 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
else if (proc_to_comm == me) then else if (proc_to_comm == me) then
if (nesd /= nerv) then if (nesd /= nerv) then
write(psb_err_unit,*) & write(psb_err_unit,*) &
& 'Fatal error in swapdata: mismatch on self send',& & 'Fatal error in swapdata: mismatch on self send',&
& nerv,nesd & nerv,nesd
end if end if
y%combuf(rcv_pt:rcv_pt+nerv-1) = y%combuf(snd_pt:snd_pt+nesd-1) y%combuf(rcv_pt:rcv_pt+nerv-1) = y%combuf(snd_pt:snd_pt+nesd-1)
end if end if
@ -386,7 +484,7 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
rcv_pt = 1+pnti+psb_n_elem_recv_ rcv_pt = 1+pnti+psb_n_elem_recv_
if (debug) write(0,*)me,' Received from: ',prcid(i),& if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(rcv_pt:rcv_pt+nerv-1) & y%combuf(rcv_pt:rcv_pt+nerv-1)
call y%sct(rcv_pt,nerv,idx,beta) call y%sct(rcv_pt,nerv,idx,beta)
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
end do end do
@ -417,7 +515,173 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
9999 call psb_error_handler(ctxt,err_act) 9999 call psb_error_handler(ctxt,err_act)
return return
end subroutine psi_dswap_vidx_vect end subroutine psi_dswap_baseline_vect
subroutine psi_dswap_neighbor_topology_vect(ctxt,icomm,flag,beta,y,idx, &
& totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_dswap_neighbor_topology_vect
use psb_d_base_vect_mod
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_neighbor_topology_mod
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
! locals
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_dswap_nbr_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call idx%sync()
! ---------------------------------------------------------
! START phase: build topology (if needed), gather, post MPI
! ---------------------------------------------------------
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
! Lazy initialization: build the topology on first call
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology'
call y%neighbor_topology%init(idx%v, totxch, totsnd, totrcv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Buffer layout:
! combuf(1 : total_send) = send area
! combuf(total_send+1 : total_send+total_recv) = recv area
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
! Gather send data into contiguous send buffer (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
! Wait for device (important for GPU subclasses)
call y%device_wait()
! Post non-blocking neighborhood alltoallv
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_r_dpk_, &
& y%combuf(topology_total_send + 1), & ! recv buffer
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_r_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if ! do_start
! ---------------------------------------------------------
! WAIT phase: complete MPI, scatter received data
! ---------------------------------------------------------
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
! No matching start? Something is wrong
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Wait for the non-blocking collective to complete
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
! Scatter received data to local vector positions (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
! Clean up
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: done'
end if ! do_wait
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_dswap_neighbor_topology_vect
! !
! !
@ -444,10 +708,10 @@ subroutine psi_dswapdata_multivect(flag,beta,y,desc_a,work,info,data)
integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
class(psb_d_base_multivect_type) :: y class(psb_d_base_multivect_type) :: y
real(psb_dpk_) :: beta real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:) real(psb_dpk_), target :: work(:)
type(psb_desc_type), target :: desc_a type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data integer(psb_ipk_), optional :: data
! locals ! locals
@ -535,7 +799,7 @@ subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, &
integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
class(psb_d_base_multivect_type) :: y class(psb_d_base_multivect_type) :: y
real(psb_dpk_) :: beta real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:) real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
@ -762,3 +1026,5 @@ subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, &
return return
end subroutine psi_dswap_vidx_multivect end subroutine psi_dswap_vidx_multivect

@ -0,0 +1,390 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_dhalo_a_new.f90
!
! Subroutine: psb_dhalom_new
! This subroutine performs the exchange of the halo elements in a
! distributed dense matrix between all the processes.
! The comm_type argument selects the communication scheme:
! psb_comm_type_isend_ (0) = classic irecv/send (default)
! psb_comm_type_neigh_a2av_ (1) = MPI_Alltoallv collective
!
! Arguments:
! x - real,dimension(:,:). The local part of the dense matrix.
! desc_a - type(psb_desc_type). The communication descriptor.
! info - integer. Return code
! comm_type - integer. Communication scheme selector
! jx - integer(optional). The starting column of the global matrix.
! ik - integer(optional). The number of columns to gather.
! work - real(optional). Work area.
! tran - character(optional). Transpose exchange.
! mode - integer(optional). Communication mode (see Swapdata)
! data - integer(optional). Which index list in desc_a should be used.
!
subroutine psb_dhalom_new(x,desc_a,info,comm_type,jx,ik,work,tran,mode,data)
use psb_base_mod, psb_protect_name => psb_dhalom_new
use psi_mod
implicit none
real(psb_dpk_), intent(inout), target :: x(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,jx,ik,data
character, intent(in), optional :: tran
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: np, me, k
integer(psb_ipk_) :: err_act, iix, jjx, maxk, nrow, imode, i,&
& liwork, data_, ldx
integer(psb_lpk_) :: m, n, ix, ijx
real(psb_dpk_), pointer :: iwork(:), xp(:,:)
character :: tran_
character(len=20) :: name, ch_err
logical :: aliw
name='psb_dhalom_new'
info=psb_success_
call psb_erractionsave(err_act)
if (psb_errstatus_fatal()) then
info = psb_err_internal_error_ ; goto 9999
end if
ctxt=desc_a%get_context()
! check on blacs grid
call psb_info(ctxt, me, np)
if (np == -1) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
ix = 1
if (present(jx)) then
ijx = jx
else
ijx = 1
endif
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
nrow = desc_a%get_local_rows()
maxk=size(x,2)-ijx+1
if(present(ik)) then
if(ik > maxk) then
k=maxk
else
k=ik
end if
else
k = maxk
end if
if (present(tran)) then
tran_ = psb_toupper(tran)
else
tran_ = 'N'
endif
if (present(data)) then
data_ = data
else
data_ = psb_comm_halo_
endif
!
! Select the communication mode based on comm_type
!
select case(comm_type)
case(0)
!
! Classic irecv/send scheme (default)
!
if (present(mode)) then
imode = mode
else
imode = IOR(psb_swap_send_,psb_swap_recv_)
endif
case(1)
if(present(mode)) then
imode = mode
else
imode = IOR(psb_swap_start_,psb_swap_wait_)
endif
case default
info = psb_err_input_value_invalid_i_
call psb_errpush(info,name,i_err=(/5_psb_ipk_,comm_type,izero,izero,izero/))
goto 9999
end select
ldx = size(x,1)
! check vector correctness
call psb_chkvect(m,lone,ldx,ix,ijx,desc_a,info,iix,jjx,check_halo=.true.)
if(info /= psb_success_) then
info=psb_err_from_subroutine_ ; ch_err='psb_chkvect'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
liwork=nrow
if (present(work)) then
if(size(work) >= liwork) then
aliw=.false.
iwork => work
else
aliw=.true.
allocate(iwork(liwork),stat=info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_realloc'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
end if
else
aliw=.true.
allocate(iwork(liwork),stat=info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_realloc'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
end if
! exchange halo elements
xp => x(iix:size(x,1),jjx:jjx+k-1)
if(tran_ == 'N') then
call psi_swapdata(imode,k,dzero,xp,&
& desc_a,iwork,info,data=data_)
else if((tran_ == 'T').or.(tran_ == 'C')) then
call psi_swaptran(imode,k,done,xp,&
&desc_a,iwork,info)
else
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid tran')
goto 9999
end if
if(info /= psb_success_) then
ch_err='PSI_swapdata'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if (aliw) deallocate(iwork)
nullify(iwork)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dhalom_new
!
! Subroutine: psb_dhalov_new
! This subroutine performs the exchange of the halo elements in a
! distributed dense vector between all the processes.
! The comm_type argument selects the communication scheme:
! psb_comm_type_isend_ (0) = classic irecv/send (default)
! psb_comm_type_neigh_a2av_ (1) = MPI_Alltoallv collective
!
! Arguments:
! x - real,dimension(:). The local part of the dense vector.
! desc_a - type(psb_desc_type). The communication descriptor.
! info - integer. Return code
! comm_type - integer. Communication scheme selector
! work - real(optional). Work area.
! tran - character(optional). Transpose exchange.
! mode - integer(optional). Communication mode (see Swapdata)
! data - integer(optional). Which index list in desc_a should be used.
!
subroutine psb_dhalov_new(x,desc_a,info,comm_type,work,tran,mode,data)
use psb_base_mod, psb_protect_name => psb_dhalov_new
use psi_mod
implicit none
real(psb_dpk_), intent(inout) :: x(:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: np, me
integer(psb_ipk_) :: err_act, ldx, iix, jjx, nrow, imode, liwork, data_
integer(psb_lpk_) :: m, n, ix, ijx
real(psb_dpk_), pointer :: iwork(:)
character :: tran_
character(len=20) :: name, ch_err
logical :: aliw
name='psb_dhalov_new'
info=psb_success_
call psb_erractionsave(err_act)
if (psb_errstatus_fatal()) then
info = psb_err_internal_error_ ; goto 9999
end if
ctxt=desc_a%get_context()
! check on blacs grid
call psb_info(ctxt, me, np)
if (np == -1) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
ix = 1
ijx = 1
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
nrow = desc_a%get_local_rows()
if (present(tran)) then
tran_ = psb_toupper(tran)
else
tran_ = 'N'
endif
if (present(data)) then
data_ = data
else
data_ = psb_comm_halo_
endif
!
! Select the communication mode based on comm_type
!
select case(comm_type)
case(0)
!
! Classic irecv/send scheme (default)
!
if (present(mode)) then
imode = mode
else
imode = IOR(psb_swap_send_,psb_swap_recv_)
endif
case(1)
if(present(mode)) then
imode = mode
else
imode = IOR(psb_swap_start_,psb_swap_wait_)
endif
case default
info = psb_err_input_value_invalid_i_
call psb_errpush(info,name,i_err=(/4_psb_ipk_,comm_type,izero,izero,izero/))
goto 9999
end select
ldx = size(x,1)
! check vector correctness
call psb_chkvect(m,lone,ldx,ix,ijx,desc_a,info,iix,jjx,check_halo=.true.)
if(info /= psb_success_) then
info=psb_err_from_subroutine_ ; ch_err='psb_chkvect'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
liwork=nrow
if (present(work)) then
if(size(work) >= liwork) then
aliw=.false.
iwork => work
else
aliw=.true.
allocate(iwork(liwork),stat=info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_realloc'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
end if
else
aliw=.true.
allocate(iwork(liwork),stat=info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_realloc'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
end if
! exchange halo elements
if(tran_ == 'N') then
call psi_swapdata(imode,dzero,x(iix:size(x)),&
& desc_a,iwork,info,data=data_)
else if((tran_ == 'T').or.(tran_ == 'C')) then
call psi_swaptran(imode,done,x(iix:size(x)),&
& desc_a,iwork,info)
else
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid tran')
goto 9999
end if
if(info /= psb_success_) then
ch_err='PSI_swapdata'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if (aliw) deallocate(iwork)
nullify(iwork)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dhalov_new

@ -0,0 +1,298 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_dhalo_new.f90
!
! Subroutine: psb_dhalo_vect_new
! Halo exchange for a distributed vector.
! comm_type selects the communication scheme:
! psb_comm_type_isend_ (0) : classic isend/irecv (delegates to psi_swapdata)
! psb_comm_type_neigh_a2av_ (1) : MPI_Neighbor_alltoallv via pre-built topology
!
subroutine psb_dhalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
use psb_base_mod, psb_protect_name => psb_dhalo_vect_new
use psi_mod
implicit none
type(psb_d_vect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, err_act, &
& nrow, ncol, lldx, imode, liwork, data_
real(psb_dpk_),pointer :: iwork(:)
real(psb_dpk_), allocatable :: sndbuf(:), rcvbuf(:)
character :: tran_
character(len=40) :: name, ch_err
logical :: aliw
integer(psb_mpk_) :: iret
integer(psb_ipk_) :: k
name='psb_dhalo_vect_new'
info=psb_success_
call psb_erractionsave(err_act)
if (psb_errstatus_fatal()) then
info = psb_err_internal_error_ ; goto 9999
end if
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -1) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
lldx = x%get_nrows()
if (present(tran)) then
tran_ = psb_toupper(tran)
else
tran_ = 'N'
endif
if (present(data)) then
data_ = data
else
data_ = psb_comm_halo_
endif
if ((info == 0).and.(lldx < ncol)) call x%reall(ncol,info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_ ; ch_err='reall'
call psb_errpush(info,name,a_err=ch_err); goto 9999
end if
select case(comm_type)
case(0) ! psb_comm_type_isend_
! ---- Classic isend/irecv: delegate to psi_swapdata ----
if (present(mode)) then
imode = mode
else
imode = IOR(psb_swap_send_,psb_swap_recv_)
end if
liwork=nrow
if (present(work)) then
if(size(work) >= liwork) then
iwork => work; aliw=.false.
else
aliw=.true.; allocate(iwork(liwork),stat=info)
end if
else
aliw=.true.; allocate(iwork(liwork),stat=info)
end if
if(info /= psb_success_) then
ch_err='psb_realloc'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if(tran_ == 'N') then
call psi_swapdata(imode,dzero,x%v,desc_a,iwork,info,data=data_)
else if((tran_ == 'T').or.(tran_ == 'C')) then
call psi_swaptran(imode,done,x%v,desc_a,iwork,info)
else
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid tran'); goto 9999
end if
if (info /= psb_success_) then
ch_err='PSI_swapdata'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if (aliw) deallocate(iwork)
nullify(iwork)
case(1) ! psb_comm_type_neigh_a2av_
! TODO
case default
info = psb_err_input_value_invalid_i_
call psb_errpush(info,name,i_err=(/5_psb_ipk_,comm_type,izero,izero,izero/))
goto 9999
end select
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dhalo_vect_new
!
! Subroutine: psb_dhalo_multivect_new
! Halo exchange for a distributed multivector.
! comm_type selects the communication scheme:
! psb_comm_type_isend_ (0) : classic isend/irecv
! psb_comm_type_neigh_a2av_ (1) : MPI_Neighbor_alltoallv via pre-built topology
!
subroutine psb_dhalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
use psb_base_mod, psb_protect_name => psb_dhalo_multivect_new
use psi_mod
implicit none
type(psb_d_multivect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, err_act, &
& nrow, ncol, lldx, imode, liwork, data_, nc
real(psb_dpk_),pointer :: iwork(:)
real(psb_dpk_), allocatable :: sndbuf(:), rcvbuf(:)
integer(psb_mpk_), allocatable :: mv_sndcnts(:), mv_rcvcnts(:), &
& mv_snddispls(:), mv_rcvdispls(:)
character :: tran_
character(len=40) :: name, ch_err
logical :: aliw
integer(psb_mpk_) :: iret
integer(psb_ipk_) :: k, j, bp, nn
name='psb_dhalo_multivect_new'
info=psb_success_
call psb_erractionsave(err_act)
if (psb_errstatus_fatal()) then
info = psb_err_internal_error_ ; goto 9999
end if
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -1) then
info = psb_err_context_error_
call psb_errpush(info,name); goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name); goto 9999
endif
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
lldx = x%get_nrows()
nc = x%get_ncols()
if (present(tran)) then
tran_ = psb_toupper(tran)
else
tran_ = 'N'
endif
if (present(data)) then
data_ = data
else
data_ = psb_comm_halo_
endif
if (lldx < ncol) call x%reall(ncol,nc,info)
if(info /= psb_success_) then
ch_err='psb_reall'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
select case(comm_type)
case(0)
! ---- Classic isend/irecv ----
if (present(mode)) then
imode = mode
else
imode = IOR(psb_swap_send_,psb_swap_recv_)
end if
liwork=nrow
if (present(work)) then
if(size(work) >= liwork) then
iwork => work; aliw=.false.
else
aliw=.true.; allocate(iwork(liwork),stat=info)
end if
else
aliw=.true.; allocate(iwork(liwork),stat=info)
end if
if(info /= psb_success_) then
ch_err='psb_realloc'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if(tran_ == 'N') then
call psi_swapdata(imode,dzero,x%v,desc_a,iwork,info,data=data_)
else if((tran_ == 'T').or.(tran_ == 'C')) then
call psi_swaptran(imode,done,x%v,desc_a,iwork,info)
else
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid tran'); goto 9999
end if
if (info /= psb_success_) then
ch_err='PSI_swapdata'
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
goto 9999
end if
if (aliw) deallocate(iwork)
nullify(iwork)
case(1)
! TODO
case default
info = psb_err_input_value_invalid_i_
call psb_errpush(info,name,i_err=(/5_psb_ipk_,comm_type,izero,izero,izero/))
goto 9999
end select
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dhalo_multivect_new

@ -28,6 +28,7 @@ COMMINT= penv/psi_penv_mod.o \
SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \
serial/psb_c_serial_mod.o serial/psb_z_serial_mod.o \ serial/psb_c_serial_mod.o serial/psb_z_serial_mod.o \
serial/psb_serial_mod.o \ serial/psb_serial_mod.o \
comm/psb_neighbor_topology_mod.o \
serial/psb_i_base_vect_mod.o serial/psb_i_vect_mod.o\ serial/psb_i_base_vect_mod.o serial/psb_i_vect_mod.o\
serial/psb_l_base_vect_mod.o serial/psb_l_vect_mod.o\ serial/psb_l_base_vect_mod.o serial/psb_l_vect_mod.o\
serial/psb_d_base_vect_mod.o serial/psb_d_vect_mod.o\ serial/psb_d_base_vect_mod.o serial/psb_d_vect_mod.o\
@ -181,6 +182,7 @@ auxil/psb_string_mod.o auxil/psb_m_realloc_mod.o auxil/psb_e_realloc_mod.o auxil
auxil/psb_d_realloc_mod.o auxil/psb_c_realloc_mod.o auxil/psb_z_realloc_mod.o \ auxil/psb_d_realloc_mod.o auxil/psb_c_realloc_mod.o auxil/psb_z_realloc_mod.o \
desc/psb_desc_const_mod.o psi_penv_mod.o: psb_const_mod.o desc/psb_desc_const_mod.o psi_penv_mod.o: psb_const_mod.o
comm/psb_neighbor_topology_mod.o: psb_const_mod.o desc/psb_desc_const_mod.o
desc/psb_indx_map_mod.o desc/psb_hash_mod.o: psb_realloc_mod.o psb_const_mod.o desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o desc/psb_hash_mod.o: psb_realloc_mod.o psb_const_mod.o desc/psb_desc_const_mod.o
auxil/psb_i_sort_mod.o auxil/psb_s_sort_mod.o auxil/psb_d_sort_mod.o auxil/psb_c_sort_mod.o auxil/psb_z_sort_mod.o \ auxil/psb_i_sort_mod.o auxil/psb_s_sort_mod.o auxil/psb_d_sort_mod.o auxil/psb_c_sort_mod.o auxil/psb_z_sort_mod.o \
@ -261,7 +263,7 @@ serial/psb_d_base_mat_mod.o: serial/psb_d_base_vect_mod.o
serial/psb_c_base_mat_mod.o: serial/psb_c_base_vect_mod.o serial/psb_c_base_mat_mod.o: serial/psb_c_base_vect_mod.o
serial/psb_z_base_mat_mod.o: serial/psb_z_base_vect_mod.o serial/psb_z_base_mat_mod.o: serial/psb_z_base_vect_mod.o
serial/psb_l_base_vect_mod.o: serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o: serial/psb_i_base_vect_mod.o
serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o comm/psb_neighbor_topology_mod.o
serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: auxil/psi_serial_mod.o psb_realloc_mod.o serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: auxil/psi_serial_mod.o psb_realloc_mod.o
serial/psb_s_mat_mod.o: serial/psb_s_base_mat_mod.o serial/psb_s_csr_mat_mod.o serial/psb_s_csc_mat_mod.o serial/psb_s_vect_mod.o \ serial/psb_s_mat_mod.o: serial/psb_s_base_mat_mod.o serial/psb_s_csr_mat_mod.o serial/psb_s_csc_mat_mod.o serial/psb_s_vect_mod.o \
serial/psb_i_vect_mod.o serial/psb_l_vect_mod.o serial/psb_i_vect_mod.o serial/psb_l_vect_mod.o

@ -76,6 +76,31 @@ module psb_c_comm_a_mod
end subroutine psb_chalov end subroutine psb_chalov
end interface psb_halo end interface psb_halo
interface psb_halo_new
subroutine psb_chalom_new(x,desc_a,info,comm_type,jx,ik,work,tran,mode,data)
import
implicit none
complex(psb_spk_), intent(inout), target :: x(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,jx,ik,data
character, intent(in), optional :: tran
end subroutine psb_chalom_new
subroutine psb_chalov_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
complex(psb_spk_), intent(inout) :: x(:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_chalov_new
end interface psb_halo_new
interface psb_scatter interface psb_scatter
subroutine psb_cscatterm(globx, locx, desc_a, info, root) subroutine psb_cscatterm(globx, locx, desc_a, info, root)

@ -57,6 +57,31 @@ module psb_c_comm_mod
end subroutine psb_covrl_multivect end subroutine psb_covrl_multivect
end interface psb_ovrl end interface psb_ovrl
interface psb_halo_new
subroutine psb_chalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_c_vect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_chalo_vect_new
subroutine psb_chalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_c_multivect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_chalo_multivect_new
end interface psb_halo_new
interface psb_halo interface psb_halo
subroutine psb_chalo_vect(x,desc_a,info,work,tran,mode,data) subroutine psb_chalo_vect(x,desc_a,info,work,tran,mode,data)
import import

@ -53,6 +53,31 @@ module psb_d_comm_a_mod
end subroutine psb_dovrlv end subroutine psb_dovrlv
end interface psb_ovrl end interface psb_ovrl
interface psb_halo_new
subroutine psb_dhalom_new(x,desc_a,info,comm_type,jx,ik,work,tran,mode,data)
import
implicit none
real(psb_dpk_), intent(inout), target :: x(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,jx,ik,data
character, intent(in), optional :: tran
end subroutine psb_dhalom_new
subroutine psb_dhalov_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
real(psb_dpk_), intent(inout) :: x(:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_dhalov_new
end interface psb_halo_new
interface psb_halo interface psb_halo
subroutine psb_dhalom(x,desc_a,info,jx,ik,work,tran,mode,data) subroutine psb_dhalom(x,desc_a,info,jx,ik,work,tran,mode,data)
import import

@ -57,6 +57,32 @@ module psb_d_comm_mod
end subroutine psb_dovrl_multivect end subroutine psb_dovrl_multivect
end interface psb_ovrl end interface psb_ovrl
interface psb_halo_new
subroutine psb_dhalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_d_vect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_dhalo_vect_new
subroutine psb_dhalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_d_multivect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_dhalo_multivect_new
end interface psb_halo_new
interface psb_halo interface psb_halo
subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data) subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data)
import import

@ -0,0 +1,422 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! Module: psb_neighbor_topology_mod
! Provides a type to hold pre-built MPI neighborhood topology
! information for persistent/repeated halo exchanges via
! MPI_Neighbor_alltoallv (MPI >= 3.0).
!
! The topology is stored inside the vector type (psb_d_base_vect_type)
! and lazily created on the first psi_swapdata call with the
! neighbor-alltoallv communication mode. Once built it is reused
! for every subsequent halo exchange, avoiding the per-call overhead
! of re-scanning the index list and allocating temporary arrays.
!
! The graph communicator and per-neighbor counts/displacements
! are built once and reused.
!
! The gather/scatter index arrays (send_indexes, recv_indexes) record
! which local vector positions must be packed / unpacked.
!
module psb_neighbor_topology_mod
use psb_const_mod
use psb_desc_const_mod
use psb_error_mod
!
! Only import mpi_comm_null here (needed for type default initializer).
! Full MPI access is done inside each contained subroutine so that
! MPI symbols do NOT leak into modules that use psb_neighbor_topology_mod.
!
#ifdef PSB_MPI_MOD
use mpi, only: mpi_comm_null
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type :: psb_neighbor_topology_type
!
! MPI dist-graph communicator (only communicating neighbors).
!
integer(psb_mpk_) :: graph_comm = mpi_comm_null
!
! Number of neighbors (processes I exchange with, excluding self).
!
integer(psb_ipk_) :: num_neighbors = 0
!
! Per-neighbor send/recv counts and displacements (units of
! single elements; for n-column multivectors multiply by n).
! send_counts(i) = number of elements sent to i-th neighbor
! recv_counts(i) = number of elements received from i-th neighbor
! send_displs(i) = displacement into contiguous send buffer
! recv_displs(i) = displacement into contiguous recv buffer
!
integer(psb_mpk_), allocatable :: send_counts(:), recv_counts(:)
integer(psb_mpk_), allocatable :: send_displs(:), recv_displs(:)
!
! Gather indexes: the k-th element of the send buffer is
! y%v( send_indexes(k) )
! Scatter indexes: the k-th element of the recv buffer goes to
! y%v( recv_indexes(k) )
!
integer(psb_ipk_), allocatable :: send_indexes(:)
integer(psb_ipk_), allocatable :: recv_indexes(:)
!
! Total number of elements to send / receive (per single column),
! excluding self-exchange.
!
integer(psb_ipk_) :: total_send = 0
integer(psb_ipk_) :: total_recv = 0
!
! Initialization flag.
!
logical :: is_initialized = .false.
contains
procedure, pass(topology) :: init => neighbor_topology_init
procedure, pass(topology) :: free => neighbor_topology_free
procedure, pass(topology) :: sizeof => neighbor_topology_sizeof
end type psb_neighbor_topology_type
contains
! ---------------------------------------------------------------
! neighbor_topology_init
!
! Parse the halo index list (obtained via desc_a%get_list_p)
! and build:
! - MPI dist-graph communicator with only the true neighbors
! - per-neighbor send/recv counts and displacements
! - contiguous gather/scatter index arrays
!
! The topology is stored inside the vector and lazily built
! on the first psi_swapdata call that uses the neighbor-alltoallv
! communication mode.
!
! Arguments:
! topology - the persistent state (output, intent inout)
! halo_index - halo_index array (from get_list_p, intent in)
! num_neighbors - number of exchanges (from get_list_p)
! total_send_elems - total send count (from get_list_p)
! total_recv_elems - total recv count (from get_list_p)
! ctxt - PSBLAS context
! icomm - MPI communicator
! info - error code (output)
! ---------------------------------------------------------------
subroutine neighbor_topology_init(topology, halo_index, num_neighbors, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
class(psb_neighbor_topology_type), intent(inout) :: topology
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_neighbors, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: iret
integer(psb_ipk_) :: i, k, idx_ptr, num_elem_recv, num_elem_send, partner_proc
integer(psb_ipk_) :: nbr_count, send_offset, recv_offset
integer(psb_mpk_), allocatable :: source_ranks(:), dest_ranks(:)
integer(psb_mpk_), allocatable :: source_weights(:), dest_weights(:)
integer(psb_mpk_) :: in_degree, out_degree
character(len=40) :: name
integer(psb_ipk_) :: proc_id
integer(psb_ipk_) :: position
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'neighbor_topology_init'
call psb_erractionsave(err_act)
! Clean up any previous state
call topology%free(info)
! ----------------------------------------------------------
! First pass: count neighbors (excluding self) and totals
! ----------------------------------------------------------
topology%num_neighbors = 0
topology%total_send = 0
topology%total_recv = 0
if(size(halo_index) < 1) then
call psb_errpush(psb_err_topology_invalid_args_,name)
goto 9999
end if
allocate(source_ranks(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Source ranks allocation failed')
goto 9999
end if
allocate(dest_ranks(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Destination ranks allocation failed')
goto 9999
end if
allocate(source_weights(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Source weights allocation failed')
goto 9999
end if
allocate(dest_weights(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Destination weights allocation failed')
goto 9999
end if
allocate(topology%send_counts(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Send counts allocation failed')
goto 9999
end if
allocate(topology%recv_counts(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Receive counts allocation failed')
goto 9999
end if
allocate(topology%send_displs(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Send displacements allocation failed')
goto 9999
end if
allocate(topology%recv_displs(num_neighbors), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Receive displacements allocation failed')
goto 9999
end if
! -----------------------------------------------------------
! Allocate the gather/scatter index arrays
! -----------------------------------------------------------
allocate(topology%send_indexes(total_send_elems), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Send indexes allocation failed')
goto 9999
end if
allocate(topology%recv_indexes(total_recv_elems), stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name, a_err='Recv indexes allocation failed')
goto 9999
end if
! -----------------------------------------------------------
! Fill neighbor ranks, weights, counts, displacements,
! and gather/scatter index arrays.
!
! The halo_index layout per neighbor (starting at position):
! position + 0 : process id
! position + 1 : nerv (num recv elements)
! position + 2 .. +1+nerv : recv element indexes
! position + 2+nerv : nesd (num send elements)
! position + 3+nerv .. +2+nerv+nesd : send element indexes
! Total stride per neighbor: nerv + nesd + 3
! -----------------------------------------------------------
send_offset = 0
recv_offset = 0
position = 1
do i = 1, num_neighbors
proc_id = halo_index(position)
num_elem_recv = halo_index(position + 1)
num_elem_send = halo_index(position + num_elem_recv + 2)
! Fill source/destination ranks and weights (weights are all 1 for now)
source_ranks(i) = int(proc_id, psb_mpk_)
dest_ranks(i) = int(proc_id, psb_mpk_)
source_weights(i) = 1
dest_weights(i) = 1
! Counts and displacements (displs set BEFORE accumulating offset)
topology%send_counts(i) = int(num_elem_send, psb_mpk_)
topology%recv_counts(i) = int(num_elem_recv, psb_mpk_)
topology%send_displs(i) = int(send_offset, psb_mpk_)
topology%recv_displs(i) = int(recv_offset, psb_mpk_)
! Fill recv_indexes from halo_index(position+2 .. position+1+nerv)
do k = 1, num_elem_recv
topology%recv_indexes(recv_offset + k) = halo_index(position + psb_elem_recv_ + k - 1)
end do
! Fill send_indexes from halo_index(position+3+nerv .. position+2+nerv+nesd)
do k = 1, num_elem_send
topology%send_indexes(send_offset + k) = halo_index(position + num_elem_recv + psb_elem_send_ + k - 1)
end do
send_offset = send_offset + num_elem_send
recv_offset = recv_offset + num_elem_recv
topology%num_neighbors = topology%num_neighbors + 1
topology%total_send = topology%total_send + num_elem_send
topology%total_recv = topology%total_recv + num_elem_recv
position = position + num_elem_recv + num_elem_send + 3
end do
! ----------------------------------------------------------
! Sanity check: the totals computed from the neighbor list
! should match the totals returned by get_list_p.
! ----------------------------------------------------------
if (topology%total_send /= total_send_elems) then
info = psb_err_topology_args_mismatch_
call psb_errpush(info, name, a_err='Send elements mismatch')
goto 9999
end if
if (topology%total_recv /= total_recv_elems) then
info = psb_err_topology_args_mismatch_
call psb_errpush(info, name, a_err='Receive elements mismatch')
goto 9999
end if
if(topology%num_neighbors /= num_neighbors) then
info = psb_err_topology_args_mismatch_
call psb_errpush(info, name, a_err='Number of neighbors mismatch')
goto 9999
end if
! ----------------------------------------------------------
! Build the dist-graph communicator
! ----------------------------------------------------------
in_degree = topology%num_neighbors !! Just for clarity
out_degree = topology%num_neighbors !! Just for clarity
call mpi_dist_graph_create_adjacent(icomm, &
& in_degree, source_ranks, source_weights, &
& out_degree, dest_ranks, dest_weights, &
& mpi_info_null, .false., & ! Check this line for optimizations
& topology%graph_comm, info)
if (info /= mpi_success) then
info = psb_err_topology_error_
call psb_errpush(info, name)
goto 9999
end if
topology%is_initialized = .true.
! TODO: Is it safe to deallocate these temporary arrays here, or do we need them for the gather/scatter indexes?
! deallocate(source_ranks, dest_ranks, source_weights, dest_weights)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine neighbor_topology_init
! ---------------------------------------------------------------
! neighbor_topology_free
! Release all resources held by the persistent state.
! ---------------------------------------------------------------
subroutine neighbor_topology_free(topology, info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
class(psb_neighbor_topology_type), intent(inout) :: topology
integer(psb_ipk_), intent(out) :: info
integer(psb_mpk_) :: iret
info = psb_success_
if (topology%graph_comm /= mpi_comm_null) then
call mpi_comm_free(topology%graph_comm, iret)
topology%graph_comm = mpi_comm_null
end if
if (allocated(topology%send_counts)) deallocate(topology%send_counts)
if (allocated(topology%recv_counts)) deallocate(topology%recv_counts)
if (allocated(topology%send_displs)) deallocate(topology%send_displs)
if (allocated(topology%recv_displs)) deallocate(topology%recv_displs)
if (allocated(topology%send_indexes)) deallocate(topology%send_indexes)
if (allocated(topology%recv_indexes)) deallocate(topology%recv_indexes)
topology%num_neighbors = 0
topology%total_send = 0
topology%total_recv = 0
topology%is_initialized = .false.
end subroutine neighbor_topology_free
! ---------------------------------------------------------------
! neighbor_topology_sizeof
! Return approximate memory footprint in bytes.
! ---------------------------------------------------------------
function neighbor_topology_sizeof(topology) result(val)
implicit none
class(psb_neighbor_topology_type), intent(in) :: topology
integer(psb_epk_) :: val
val = 0
val = val + psb_sizeof_ip * 6 ! scalar integers + logicals
if (allocated(topology%send_counts)) val = val + psb_sizeof_ip * size(topology%send_counts)
if (allocated(topology%recv_counts)) val = val + psb_sizeof_ip * size(topology%recv_counts)
if (allocated(topology%send_displs)) val = val + psb_sizeof_ip * size(topology%send_displs)
if (allocated(topology%recv_displs)) val = val + psb_sizeof_ip * size(topology%recv_displs)
if (allocated(topology%send_indexes)) val = val + psb_sizeof_ip * size(topology%send_indexes)
if (allocated(topology%recv_indexes)) val = val + psb_sizeof_ip * size(topology%recv_indexes)
end function neighbor_topology_sizeof
end module psb_neighbor_topology_mod

@ -76,6 +76,31 @@ module psb_s_comm_a_mod
end subroutine psb_shalov end subroutine psb_shalov
end interface psb_halo end interface psb_halo
interface psb_halo_new
subroutine psb_shalom_new(x,desc_a,info,comm_type,jx,ik,work,tran,mode,data)
import
implicit none
real(psb_spk_), intent(inout), target :: x(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,jx,ik,data
character, intent(in), optional :: tran
end subroutine psb_shalom_new
subroutine psb_shalov_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
real(psb_spk_), intent(inout) :: x(:)
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_shalov_new
end interface psb_halo_new
interface psb_scatter interface psb_scatter
subroutine psb_sscatterm(globx, locx, desc_a, info, root) subroutine psb_sscatterm(globx, locx, desc_a, info, root)

@ -57,6 +57,31 @@ module psb_s_comm_mod
end subroutine psb_sovrl_multivect end subroutine psb_sovrl_multivect
end interface psb_ovrl end interface psb_ovrl
interface psb_halo_new
subroutine psb_shalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_s_vect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_shalo_vect_new
subroutine psb_shalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_s_multivect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
real(psb_spk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_shalo_multivect_new
end interface psb_halo_new
interface psb_halo interface psb_halo
subroutine psb_shalo_vect(x,desc_a,info,work,tran,mode,data) subroutine psb_shalo_vect(x,desc_a,info,work,tran,mode,data)
import import

@ -57,6 +57,31 @@ module psb_z_comm_mod
end subroutine psb_zovrl_multivect end subroutine psb_zovrl_multivect
end interface psb_ovrl end interface psb_ovrl
interface psb_halo_new
subroutine psb_zhalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_z_vect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_zhalo_vect_new
subroutine psb_zhalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
import
implicit none
type(psb_z_multivect_type), intent(inout) :: x
type(psb_desc_type), intent(in) :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_type
complex(psb_dpk_), target, optional, intent(inout) :: work(:)
integer(psb_ipk_), intent(in), optional :: mode,data
character, intent(in), optional :: tran
end subroutine psb_zhalo_multivect_new
end interface psb_halo_new
interface psb_halo interface psb_halo
subroutine psb_zhalo_vect(x,desc_a,info,work,tran,mode,data) subroutine psb_zhalo_vect(x,desc_a,info,work,tran,mode,data)
import import

@ -36,54 +36,94 @@ module psi_d_comm_v_mod
use psb_d_base_multivect_mod, only : psb_d_base_multivect_type use psb_d_base_multivect_mod, only : psb_d_base_multivect_type
interface psi_swapdata interface psi_swapdata
! ---------------------------------------------------------------
! Upper call in order to populate idx using desc_a%get_list_p
! ---------------------------------------------------------------
subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data) subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)
import import
integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y class(psb_d_base_vect_type) :: y
real(psb_dpk_) :: beta real(psb_dpk_), intent(in) :: beta
real(psb_dpk_),target :: work(:) real(psb_dpk_),target :: work(:)
type(psb_desc_type), target :: desc_a type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data integer(psb_ipk_), optional :: data
end subroutine psi_dswapdata_vect end subroutine psi_dswapdata_vect
subroutine psi_dswapdata_multivect(flag,beta,y,desc_a,work,info,data) subroutine psi_dswapdata_multivect(flag,beta,y,desc_a,work,info,data)
import import
integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
class(psb_d_base_multivect_type) :: y class(psb_d_base_multivect_type) :: y
real(psb_dpk_) :: beta real(psb_dpk_), intent(in) :: beta
real(psb_dpk_),target :: work(:) real(psb_dpk_),target :: work(:)
type(psb_desc_type), target :: desc_a type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data integer(psb_ipk_), optional :: data
end subroutine psi_dswapdata_multivect end subroutine psi_dswapdata_multivect
! ---------------------------------------------------------------
! Wrapper that calls different communications schemes depending on
! flag variable
! ---------------------------------------------------------------
subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,& subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
type(psb_ctxt_type), intent(in) :: ctxt type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y class(psb_d_base_vect_type) :: y
real(psb_dpk_) :: beta real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:) real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_dswap_vidx_vect end subroutine psi_dswap_vidx_vect
subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
type(psb_ctxt_type), intent(in) :: ctxt type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
class(psb_d_base_multivect_type) :: y class(psb_d_base_multivect_type) :: y
real(psb_dpk_) :: beta real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:) real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_dswap_vidx_multivect end subroutine psi_dswap_vidx_multivect
! ---------------------------------------------------------------
end interface psi_swapdata end interface psi_swapdata
interface psi_dswap_baseline_vect
subroutine psi_dswap_baseline_vect(ctxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_dswap_baseline_vect
end interface psi_dswap_baseline_vect
interface psi_dswap_neighbor_topology_vect
subroutine psi_dswap_neighbor_topology_vect(ctxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_), intent(in) :: beta
real(psb_dpk_), target :: work(:)
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_dswap_neighbor_topology_vect
end interface psi_dswap_neighbor_topology_vect
interface psi_swaptran interface psi_swaptran
subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data) subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data)

@ -49,10 +49,12 @@ module psb_desc_const_mod
integer(psb_ipk_), parameter :: psb_setzero_ = 4 integer(psb_ipk_), parameter :: psb_setzero_ = 4
! The following are bit fields. ! The following are bit fields.
integer(psb_ipk_), parameter :: psb_swap_send_ = 1 integer(psb_ipk_), parameter :: psb_swap_send_ = 1
integer(psb_ipk_), parameter :: psb_swap_recv_ = 2 integer(psb_ipk_), parameter :: psb_swap_recv_ = 2
integer(psb_ipk_), parameter :: psb_swap_sync_ = 4 integer(psb_ipk_), parameter :: psb_swap_sync_ = 4
integer(psb_ipk_), parameter :: psb_swap_mpi_ = 8 integer(psb_ipk_), parameter :: psb_swap_mpi_ = 8
integer(psb_ipk_), parameter :: psb_swap_start_ = 16
integer(psb_ipk_), parameter :: psb_swap_wait_ = 32
integer(psb_ipk_), parameter :: psb_collective_start_ = 1 integer(psb_ipk_), parameter :: psb_collective_start_ = 1
integer(psb_ipk_), parameter :: psb_collective_end_ = 2 integer(psb_ipk_), parameter :: psb_collective_end_ = 2
integer(psb_ipk_), parameter :: psb_collective_sync_ = 4 integer(psb_ipk_), parameter :: psb_collective_sync_ = 4

@ -326,6 +326,9 @@ module psb_const_mod
integer(psb_ipk_), parameter, public :: psb_err_invalid_irst_ =5002 integer(psb_ipk_), parameter, public :: psb_err_invalid_irst_ =5002
integer(psb_ipk_), parameter, public :: psb_err_invalid_preci_=5003 integer(psb_ipk_), parameter, public :: psb_err_invalid_preci_=5003
integer(psb_ipk_), parameter, public :: psb_err_invalid_preca_=5004 integer(psb_ipk_), parameter, public :: psb_err_invalid_preca_=5004
integer(psb_ipk_), parameter, public :: psb_err_topology_error_=6000
integer(psb_ipk_), parameter, public :: psb_err_topology_invalid_args_=6001
integer(psb_ipk_), parameter, public :: psb_err_topology_args_mismatch_=6002
type :: psb_ctxt_type type :: psb_ctxt_type

@ -390,13 +390,13 @@ contains
! pushes an error on the error stack ! pushes an error on the error stack
subroutine psb_errpush(err_c, r_name, a_err, i_err, l_err, m_err, e_err) subroutine psb_errpush(err_c, r_name, a_err, i_err, l_err, m_err, e_err)
integer(psb_ipk_), intent(in) :: err_c integer(psb_ipk_), intent(in) :: err_c
character(len=*), intent(in) :: r_name character(len=*), intent(in) :: r_name
character(len=*), optional :: a_err character(len=*), optional :: a_err
integer(psb_ipk_), optional :: i_err(:) integer(psb_ipk_), optional :: i_err(:)
integer(psb_lpk_), optional :: l_err(:) integer(psb_lpk_), optional :: l_err(:)
integer(psb_mpk_), optional :: m_err(:) integer(psb_mpk_), optional :: m_err(:)
integer(psb_epk_), optional :: e_err(:) integer(psb_epk_), optional :: e_err(:)
call psb_set_errstatus(psb_err_fatal_) call psb_set_errstatus(psb_err_fatal_)
call psb_stackpush(err_c, r_name, a_err, i_err, l_err, m_err, e_err) call psb_stackpush(err_c, r_name, a_err, i_err, l_err, m_err, e_err)

@ -49,6 +49,7 @@ module psb_d_base_vect_mod
use psb_realloc_mod use psb_realloc_mod
use psb_i_base_vect_mod use psb_i_base_vect_mod
use psb_l_base_vect_mod use psb_l_base_vect_mod
use psb_neighbor_topology_mod
!> \namespace psb_base_mod \class psb_d_base_vect_type !> \namespace psb_base_mod \class psb_d_base_vect_type
!! The psb_d_base_vect_type !! The psb_d_base_vect_type
@ -62,9 +63,10 @@ module psb_d_base_vect_mod
!! !!
type psb_d_base_vect_type type psb_d_base_vect_type
!> Values. !> Values.
real(psb_dpk_), allocatable :: v(:) real(psb_dpk_), allocatable :: v(:)
real(psb_dpk_), allocatable :: combuf(:) real(psb_dpk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:) integer(psb_mpk_), allocatable :: comid(:,:) ! This is used only for Isend/Irecv scheme, to store the communication handles for each neighbor
integer(psb_mpk_) :: communication_handle ! This is used only for Isend/Irecv scheme, to store the communication handle for the whole halo exchange
!> vector bldstate: !> vector bldstate:
!! null: pristine; !! null: pristine;
!! build: it's being filled with entries; !! build: it's being filled with entries;
@ -73,10 +75,11 @@ module psb_d_base_vect_mod
!! in already existing entries. !! in already existing entries.
!! The transitions among the states are detailed in !! The transitions among the states are detailed in
!! psb_T_vect_mod. !! psb_T_vect_mod.
integer(psb_ipk_), private :: bldstate = psb_vect_null_ integer(psb_ipk_), private :: bldstate = psb_vect_null_
integer(psb_ipk_), private :: dupl = psb_dupl_null_ integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0 integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:) integer(psb_ipk_), allocatable :: iv(:)
type(psb_neighbor_topology_type) :: neighbor_topology
contains contains
! !
! Constructors/allocators ! Constructors/allocators
@ -249,6 +252,10 @@ module psb_d_base_vect_mod
procedure, pass(x) :: minquotient_a2 => d_base_minquotient_a2 procedure, pass(x) :: minquotient_a2 => d_base_minquotient_a2
generic, public :: minquotient => minquotient_v, minquotient_a2 generic, public :: minquotient => minquotient_v, minquotient_a2
! Methods used to handle topology in neighbor_alltoallv communication scheme
procedure, pass(x) :: init_topology => d_base_init_topology
procedure, pass(x) :: free_topology => d_base_free_topology
end type psb_d_base_vect_type end type psb_d_base_vect_type
public :: psb_d_base_vect public :: psb_d_base_vect
@ -821,6 +828,7 @@ contains
if (allocated(x%v)) deallocate(x%v, stat=info) if (allocated(x%v)) deallocate(x%v, stat=info)
if ((info == 0).and.allocated(x%combuf)) call x%free_buffer(info) if ((info == 0).and.allocated(x%combuf)) call x%free_buffer(info)
if ((info == 0).and.allocated(x%comid)) call x%free_comid(info) if ((info == 0).and.allocated(x%comid)) call x%free_comid(info)
x%communication_handle = 0
if ((info == 0).and.allocated(x%iv)) deallocate(x%iv, stat=info) if ((info == 0).and.allocated(x%iv)) deallocate(x%iv, stat=info)
if (info /= 0) call & if (info /= 0) call &
& psb_errpush(psb_err_alloc_dealloc_,'vect_free') & psb_errpush(psb_err_alloc_dealloc_,'vect_free')
@ -2595,12 +2603,44 @@ contains
class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: x
real(psb_dpk_), intent(in) :: b real(psb_dpk_), intent(in) :: b
class(psb_d_base_vect_type), intent(inout) :: z class(psb_d_base_vect_type), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
info = 0 info = 0
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
call z%addconst(x%v,b,info) call z%addconst(x%v,b,info)
end subroutine d_base_addconst_v2 end subroutine d_base_addconst_v2
! --------------------------------------------------------------------
! Implementation of methods used for neighbor alltoallv communication
! --------------------------------------------------------------------
subroutine d_base_init_topology(x, halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_exchanges, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%init(halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
end subroutine d_base_init_topology
subroutine d_base_free_topology(x, info)
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%free(info)
end subroutine d_base_free_topology
! --------------------------------------------------------------------
end module psb_d_base_vect_mod end module psb_d_base_vect_mod

@ -46,7 +46,7 @@ module psb_d_vect_mod
type psb_d_vect_type type psb_d_vect_type
class(psb_d_base_vect_type), allocatable :: v class(psb_d_base_vect_type), allocatable :: v
integer(psb_ipk_) :: nrmv = 0 integer(psb_ipk_) :: nrmv = 0
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ integer(psb_ipk_) :: remote_build = psb_matbld_noremote_
integer(psb_ipk_) :: dupl = psb_dupl_add_ integer(psb_ipk_) :: dupl = psb_dupl_add_
real(psb_dpk_), allocatable :: rmtv(:) real(psb_dpk_), allocatable :: rmtv(:)
integer(psb_lpk_), allocatable :: rmidx(:) integer(psb_lpk_), allocatable :: rmidx(:)

@ -131,6 +131,8 @@ extern "C" {
#define psb_swap_recv_ 2 #define psb_swap_recv_ 2
#define psb_swap_sync_ 4 #define psb_swap_sync_ 4
#define psb_swap_mpi_ 8 #define psb_swap_mpi_ 8
#define psb_swap_start_ 16
#define psb_swap_wait_ 32
/* legal values for ovrl update argument */ /* legal values for ovrl update argument */
#define psb_none_ 0 #define psb_none_ 0

@ -131,6 +131,8 @@ extern "C" {
#define psb_swap_recv_ 2 #define psb_swap_recv_ 2
#define psb_swap_sync_ 4 #define psb_swap_sync_ 4
#define psb_swap_mpi_ 8 #define psb_swap_mpi_ 8
#define psb_swap_start_ 16
#define psb_swap_wait_ 32
/* legal values for ovrl update argument */ /* legal values for ovrl update argument */
#define psb_none_ 0 #define psb_none_ 0

@ -0,0 +1,34 @@
INSTALLDIR=../..
INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas
#
# Libraries used
#
LIBDIR=$(INSTALLDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
TOBJS=test_halo_new.o
EXEDIR=./runs
all: runsd test_halo_new
runsd:
(if test ! -d runs ; then mkdir runs; fi)
test_halo_new: $(TOBJS)
$(FLINK) $(LOPT) $(TOBJS) -o test_halo_new $(PSBLAS_LIB) $(LDLIBS)
/bin/mv test_halo_new $(EXEDIR)
clean:
/bin/rm -f $(TOBJS) *$(.mod) $(EXEDIR)/test_halo_new
lib:
(cd ../../; make library)
verycleanlib:
(cd ../../; make veryclean)

@ -0,0 +1,279 @@
!
! Test program for D-type halo exchange: baseline vs neighbor topology.
!
! This test exercises the lower-level psi_swapdata interface directly
! to compare the two communication paths implemented in psi_dswapdata.F90:
!
! 1. Baseline (Isend/Irecv) : flag = IOR(psb_swap_send_, psb_swap_recv_)
! 2. Neighbor topology (Ineighbor_alltoallv) : flag = psb_swap_start_ then psb_swap_wait_
!
! It builds a 3D block-partitioned descriptor with a 7-point stencil,
! fills owned entries with their global index, performs halo exchange
! via both paths, then checks:
! (a) The two paths produce identical results (cross-check)
! (b) Every halo entry equals the global index of its source (absolute check)
!
! Run with: mpirun -np <P> ./test_halo_new
!
program test_halo_new
use psb_base_mod
use psi_mod
implicit none
! ---- parameters ----
integer(psb_ipk_), parameter :: idim = 10 ! grid idim x idim x idim
! ---- descriptor / context ----
type(psb_ctxt_type) :: ctxt
type(psb_desc_type) :: desc_a
integer(psb_ipk_) :: iam, np, info, i, nr, nlr
integer(psb_lpk_) :: m, nt
integer(psb_lpk_), allocatable :: myidx(:)
! ---- vectors ----
type(psb_d_vect_type) :: v_baseline, v_neighbor
! ---- temporary / comparison arrays ----
real(psb_dpk_), allocatable :: vals(:)
real(psb_dpk_), allocatable :: res_bl(:), res_nb(:)
real(psb_dpk_), allocatable :: expected(:)
! ---- work buffer for psi_swapdata ----
real(psb_dpk_), allocatable :: work(:)
! ---- halo index bookkeeping ----
integer(psb_ipk_) :: nrow, ncol, totxch, idxs, idxr, data_
class(psb_i_base_vect_type), pointer :: d_vidx
! ---- error / reporting ----
integer(psb_ipk_) :: n_pass, n_total, imode
real(psb_dpk_) :: err, tol
integer(psb_lpk_), allocatable :: glob_col(:)
character(len=40) :: name
name = 'test_halo_new'
tol = 1.0d-12
n_pass = 0
n_total = 0
! ==================================================================
! 1. Initialise MPI / PSBLAS context
! ==================================================================
call psb_init(ctxt)
call psb_info(ctxt, iam, np)
if (iam == 0) then
write(psb_out_unit,'("================================================")')
write(psb_out_unit,'(" Test: D-type halo baseline vs neighbor topo")')
write(psb_out_unit,'(" Processes : ",i0)') np
write(psb_out_unit,'(" Grid : ",i0," x ",i0," x ",i0)') idim,idim,idim
write(psb_out_unit,'("================================================")')
end if
! ==================================================================
! 2. Build descriptor with 7-point stencil connectivity
! ==================================================================
m = (1_psb_lpk_ * idim) * idim * idim
nt = (m + np - 1) / np
nr = max(0, min(int(nt,psb_ipk_), int(m - (iam * nt),psb_ipk_)))
call psb_cdall(ctxt, desc_a, info, nl=nr)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'cdall error:', info
call psb_abort(ctxt)
end if
myidx = desc_a%get_global_indices()
nlr = size(myidx)
do i = 1, nlr
call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)/), desc_a, info)
if (myidx(i) > 1) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-1/), desc_a, info)
if (myidx(i) < m) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+1/), desc_a, info)
if (myidx(i) > idim) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-idim/), desc_a, info)
if (myidx(i) + idim <= m) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+idim/), desc_a, info)
if (myidx(i) > int(idim,psb_lpk_)*idim) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), &
& (/myidx(i) - int(idim,psb_lpk_)*idim/), desc_a, info)
if (myidx(i) + int(idim,psb_lpk_)*idim <= m) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), &
& (/myidx(i) + int(idim,psb_lpk_)*idim/), desc_a, info)
end do
call psb_cdasb(desc_a, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'cdasb error:', info
call psb_abort(ctxt)
end if
nrow = desc_a%get_local_rows() ! owned
ncol = desc_a%get_local_cols() ! owned + halo
! ==================================================================
! 3. Allocate two D vectors (scratch) and fill owned entries
! ==================================================================
call psb_geall(v_baseline, desc_a, info)
call psb_geall(v_neighbor, desc_a, info)
call psb_geasb(v_baseline, desc_a, info, scratch=.true.)
call psb_geasb(v_neighbor, desc_a, info, scratch=.true.)
! Fill owned entries with the global index value
allocate(vals(ncol))
vals = dzero
do i = 1, nlr
vals(i) = real(myidx(i), psb_dpk_)
end do
call v_baseline%set_vect(vals)
call v_neighbor%set_vect(vals)
deallocate(vals)
! ==================================================================
! 4. Build the expected result for halo positions
! glob_col(j) = global index of local column j
! After halo exchange every position j should hold glob_col(j).
! ==================================================================
allocate(glob_col(ncol), expected(ncol))
glob_col = desc_a%get_global_indices(owned=.false.)
do i = 1, ncol
expected(i) = real(glob_col(i), psb_dpk_)
end do
! ==================================================================
! 5. Retrieve halo index list (same list used by both paths)
! ==================================================================
data_ = psb_comm_halo_
call desc_a%get_list_p(data_, d_vidx, totxch, idxr, idxs, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'get_list_p error:', info
call psb_abort(ctxt)
end if
allocate(work(nrow))
work = dzero
! ==================================================================
! 6. Baseline halo exchange (Isend/Irecv in one call)
! ==================================================================
imode = IOR(psb_swap_send_, psb_swap_recv_)
call psi_swapdata(ctxt, desc_a%get_mpic(), imode, dzero, &
& v_baseline%v, d_vidx, totxch, idxs, idxr, work, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'baseline swap error:', info
call psb_abort(ctxt)
end if
! ==================================================================
! 7. Neighbor topology halo exchange (start + wait)
! ==================================================================
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_start_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'neighbor start error:', info
call psb_abort(ctxt)
end if
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_wait_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'neighbor wait error:', info
call psb_abort(ctxt)
end if
! ==================================================================
! 8. Extract results and compare
! ==================================================================
res_bl = v_baseline%get_vect()
res_nb = v_neighbor%get_vect()
! ---- Test 1: cross-check baseline vs neighbor (all entries) ----
n_total = n_total + 1
err = maxval(abs(res_bl(1:ncol) - res_nb(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] cross-check baseline vs neighbor : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] cross-check baseline vs neighbor : err = ",es12.5)') err
end if
end if
! ---- Test 2: baseline absolute correctness (halo = global index) ----
n_total = n_total + 1
err = maxval(abs(res_bl(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] baseline absolute correctness : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] baseline absolute correctness : err = ",es12.5)') err
end if
end if
! ---- Test 3: neighbor absolute correctness (halo = global index) ----
n_total = n_total + 1
err = maxval(abs(res_nb(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] neighbor absolute correctness : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] neighbor absolute correctness : err = ",es12.5)') err
end if
end if
! ---- Test 4: repeat neighbor exchange (topology reuse) ----
! Reset halo entries to zero, run again, and check
do i = nrow+1, ncol
res_nb(i) = dzero
end do
call v_neighbor%set_vect(res_nb)
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_start_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_wait_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
res_nb = v_neighbor%get_vect()
n_total = n_total + 1
err = maxval(abs(res_nb(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] neighbor topology reuse : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] neighbor topology reuse : err = ",es12.5)') err
end if
end if
! ==================================================================
! 9. Summary
! ==================================================================
if (iam == 0) then
write(psb_out_unit,'("================================================")')
write(psb_out_unit,'(" Results: ",i0," / ",i0," tests passed")') n_pass, n_total
if (n_pass == n_total) then
write(psb_out_unit,'(" STATUS: ALL PASSED")')
else
write(psb_out_unit,'(" STATUS: SOME FAILURES")')
end if
write(psb_out_unit,'("================================================")')
end if
! ==================================================================
! 10. Cleanup
! ==================================================================
deallocate(res_bl, res_nb, expected, glob_col, work)
call psb_gefree(v_baseline, desc_a, info)
call psb_gefree(v_neighbor, desc_a, info)
call psb_cdfree(desc_a, info)
call psb_exit(ctxt)
end program test_halo_new

1413
tmp.log

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save