[ADD] Added first version of MPI_Ineighbor_alltoallv call. For now it's just for double precision routines
parent
3d92668973
commit
a3ebde071d
@ -0,0 +1,390 @@
|
||||
!
|
||||
! Parallel Sparse BLAS version 3.5
|
||||
! (C) Copyright 2006-2018
|
||||
! Salvatore Filippone
|
||||
! Alfredo Buttari
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
!
|
||||
! File: psb_dhalo_a_new.f90
|
||||
!
|
||||
! Subroutine: psb_dhalom_new
|
||||
! This subroutine performs the exchange of the halo elements in a
|
||||
! distributed dense matrix between all the processes.
|
||||
! The comm_type argument selects the communication scheme:
|
||||
! psb_comm_type_isend_ (0) = classic irecv/send (default)
|
||||
! psb_comm_type_neigh_a2av_ (1) = MPI_Alltoallv collective
|
||||
!
|
||||
! Arguments:
|
||||
! x - real,dimension(:,:). The local part of the dense matrix.
|
||||
! desc_a - type(psb_desc_type). The communication descriptor.
|
||||
! info - integer. Return code
|
||||
! comm_type - integer. Communication scheme selector
|
||||
! jx - integer(optional). The starting column of the global matrix.
|
||||
! ik - integer(optional). The number of columns to gather.
|
||||
! work - real(optional). Work area.
|
||||
! tran - character(optional). Transpose exchange.
|
||||
! mode - integer(optional). Communication mode (see Swapdata)
|
||||
! data - integer(optional). Which index list in desc_a should be used.
|
||||
!
|
||||
subroutine psb_dhalom_new(x,desc_a,info,comm_type,jx,ik,work,tran,mode,data)
|
||||
use psb_base_mod, psb_protect_name => psb_dhalom_new
|
||||
use psi_mod
|
||||
implicit none
|
||||
|
||||
real(psb_dpk_), intent(inout), target :: x(:,:)
|
||||
type(psb_desc_type), intent(in) :: desc_a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_), intent(in) :: comm_type
|
||||
real(psb_dpk_), target, optional, intent(inout) :: work(:)
|
||||
integer(psb_ipk_), intent(in), optional :: mode,jx,ik,data
|
||||
character, intent(in), optional :: tran
|
||||
|
||||
! locals
|
||||
type(psb_ctxt_type) :: ctxt
|
||||
integer(psb_mpk_) :: np, me, k
|
||||
integer(psb_ipk_) :: err_act, iix, jjx, maxk, nrow, imode, i,&
|
||||
& liwork, data_, ldx
|
||||
integer(psb_lpk_) :: m, n, ix, ijx
|
||||
real(psb_dpk_), pointer :: iwork(:), xp(:,:)
|
||||
character :: tran_
|
||||
character(len=20) :: name, ch_err
|
||||
logical :: aliw
|
||||
|
||||
name='psb_dhalom_new'
|
||||
info=psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
if (psb_errstatus_fatal()) then
|
||||
info = psb_err_internal_error_ ; goto 9999
|
||||
end if
|
||||
|
||||
ctxt=desc_a%get_context()
|
||||
|
||||
! check on blacs grid
|
||||
call psb_info(ctxt, me, np)
|
||||
if (np == -1) then
|
||||
info = psb_err_context_error_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
ix = 1
|
||||
if (present(jx)) then
|
||||
ijx = jx
|
||||
else
|
||||
ijx = 1
|
||||
endif
|
||||
|
||||
m = desc_a%get_global_rows()
|
||||
n = desc_a%get_global_cols()
|
||||
nrow = desc_a%get_local_rows()
|
||||
|
||||
maxk=size(x,2)-ijx+1
|
||||
|
||||
if(present(ik)) then
|
||||
if(ik > maxk) then
|
||||
k=maxk
|
||||
else
|
||||
k=ik
|
||||
end if
|
||||
else
|
||||
k = maxk
|
||||
end if
|
||||
|
||||
if (present(tran)) then
|
||||
tran_ = psb_toupper(tran)
|
||||
else
|
||||
tran_ = 'N'
|
||||
endif
|
||||
|
||||
if (present(data)) then
|
||||
data_ = data
|
||||
else
|
||||
data_ = psb_comm_halo_
|
||||
endif
|
||||
|
||||
!
|
||||
! Select the communication mode based on comm_type
|
||||
!
|
||||
select case(comm_type)
|
||||
case(0)
|
||||
!
|
||||
! Classic irecv/send scheme (default)
|
||||
!
|
||||
if (present(mode)) then
|
||||
imode = mode
|
||||
else
|
||||
imode = IOR(psb_swap_send_,psb_swap_recv_)
|
||||
endif
|
||||
|
||||
case(1)
|
||||
if(present(mode)) then
|
||||
imode = mode
|
||||
else
|
||||
imode = IOR(psb_swap_start_,psb_swap_wait_)
|
||||
endif
|
||||
|
||||
case default
|
||||
info = psb_err_input_value_invalid_i_
|
||||
call psb_errpush(info,name,i_err=(/5_psb_ipk_,comm_type,izero,izero,izero/))
|
||||
goto 9999
|
||||
end select
|
||||
|
||||
ldx = size(x,1)
|
||||
! check vector correctness
|
||||
call psb_chkvect(m,lone,ldx,ix,ijx,desc_a,info,iix,jjx,check_halo=.true.)
|
||||
if(info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_ ; ch_err='psb_chkvect'
|
||||
call psb_errpush(info,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
liwork=nrow
|
||||
if (present(work)) then
|
||||
if(size(work) >= liwork) then
|
||||
aliw=.false.
|
||||
iwork => work
|
||||
else
|
||||
aliw=.true.
|
||||
allocate(iwork(liwork),stat=info)
|
||||
if(info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_
|
||||
ch_err='psb_realloc'
|
||||
call psb_errpush(info,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
end if
|
||||
else
|
||||
aliw=.true.
|
||||
allocate(iwork(liwork),stat=info)
|
||||
if(info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_
|
||||
ch_err='psb_realloc'
|
||||
call psb_errpush(info,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
end if
|
||||
|
||||
! exchange halo elements
|
||||
xp => x(iix:size(x,1),jjx:jjx+k-1)
|
||||
if(tran_ == 'N') then
|
||||
call psi_swapdata(imode,k,dzero,xp,&
|
||||
& desc_a,iwork,info,data=data_)
|
||||
else if((tran_ == 'T').or.(tran_ == 'C')) then
|
||||
call psi_swaptran(imode,k,done,xp,&
|
||||
&desc_a,iwork,info)
|
||||
else
|
||||
info = psb_err_internal_error_
|
||||
call psb_errpush(info,name,a_err='invalid tran')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if(info /= psb_success_) then
|
||||
ch_err='PSI_swapdata'
|
||||
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (aliw) deallocate(iwork)
|
||||
nullify(iwork)
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(ctxt,err_act)
|
||||
|
||||
return
|
||||
end subroutine psb_dhalom_new
|
||||
|
||||
|
||||
!
|
||||
! Subroutine: psb_dhalov_new
|
||||
! This subroutine performs the exchange of the halo elements in a
|
||||
! distributed dense vector between all the processes.
|
||||
! The comm_type argument selects the communication scheme:
|
||||
! psb_comm_type_isend_ (0) = classic irecv/send (default)
|
||||
! psb_comm_type_neigh_a2av_ (1) = MPI_Alltoallv collective
|
||||
!
|
||||
! Arguments:
|
||||
! x - real,dimension(:). The local part of the dense vector.
|
||||
! desc_a - type(psb_desc_type). The communication descriptor.
|
||||
! info - integer. Return code
|
||||
! comm_type - integer. Communication scheme selector
|
||||
! work - real(optional). Work area.
|
||||
! tran - character(optional). Transpose exchange.
|
||||
! mode - integer(optional). Communication mode (see Swapdata)
|
||||
! data - integer(optional). Which index list in desc_a should be used.
|
||||
!
|
||||
subroutine psb_dhalov_new(x,desc_a,info,comm_type,work,tran,mode,data)
|
||||
use psb_base_mod, psb_protect_name => psb_dhalov_new
|
||||
use psi_mod
|
||||
implicit none
|
||||
|
||||
real(psb_dpk_), intent(inout) :: x(:)
|
||||
type(psb_desc_type), intent(in) :: desc_a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_), intent(in) :: comm_type
|
||||
real(psb_dpk_), target, optional, intent(inout) :: work(:)
|
||||
integer(psb_ipk_), intent(in), optional :: mode,data
|
||||
character, intent(in), optional :: tran
|
||||
|
||||
! locals
|
||||
type(psb_ctxt_type) :: ctxt
|
||||
integer(psb_mpk_) :: np, me
|
||||
integer(psb_ipk_) :: err_act, ldx, iix, jjx, nrow, imode, liwork, data_
|
||||
integer(psb_lpk_) :: m, n, ix, ijx
|
||||
real(psb_dpk_), pointer :: iwork(:)
|
||||
character :: tran_
|
||||
character(len=20) :: name, ch_err
|
||||
logical :: aliw
|
||||
|
||||
name='psb_dhalov_new'
|
||||
info=psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
if (psb_errstatus_fatal()) then
|
||||
info = psb_err_internal_error_ ; goto 9999
|
||||
end if
|
||||
|
||||
ctxt=desc_a%get_context()
|
||||
|
||||
! check on blacs grid
|
||||
call psb_info(ctxt, me, np)
|
||||
if (np == -1) then
|
||||
info = psb_err_context_error_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
ix = 1
|
||||
ijx = 1
|
||||
|
||||
m = desc_a%get_global_rows()
|
||||
n = desc_a%get_global_cols()
|
||||
nrow = desc_a%get_local_rows()
|
||||
|
||||
if (present(tran)) then
|
||||
tran_ = psb_toupper(tran)
|
||||
else
|
||||
tran_ = 'N'
|
||||
endif
|
||||
|
||||
if (present(data)) then
|
||||
data_ = data
|
||||
else
|
||||
data_ = psb_comm_halo_
|
||||
endif
|
||||
|
||||
!
|
||||
! Select the communication mode based on comm_type
|
||||
!
|
||||
select case(comm_type)
|
||||
case(0)
|
||||
!
|
||||
! Classic irecv/send scheme (default)
|
||||
!
|
||||
if (present(mode)) then
|
||||
imode = mode
|
||||
else
|
||||
imode = IOR(psb_swap_send_,psb_swap_recv_)
|
||||
endif
|
||||
|
||||
case(1)
|
||||
if(present(mode)) then
|
||||
imode = mode
|
||||
else
|
||||
imode = IOR(psb_swap_start_,psb_swap_wait_)
|
||||
endif
|
||||
|
||||
case default
|
||||
info = psb_err_input_value_invalid_i_
|
||||
call psb_errpush(info,name,i_err=(/4_psb_ipk_,comm_type,izero,izero,izero/))
|
||||
goto 9999
|
||||
end select
|
||||
|
||||
ldx = size(x,1)
|
||||
! check vector correctness
|
||||
call psb_chkvect(m,lone,ldx,ix,ijx,desc_a,info,iix,jjx,check_halo=.true.)
|
||||
if(info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_ ; ch_err='psb_chkvect'
|
||||
call psb_errpush(info,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
liwork=nrow
|
||||
if (present(work)) then
|
||||
if(size(work) >= liwork) then
|
||||
aliw=.false.
|
||||
iwork => work
|
||||
else
|
||||
aliw=.true.
|
||||
allocate(iwork(liwork),stat=info)
|
||||
if(info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_
|
||||
ch_err='psb_realloc'
|
||||
call psb_errpush(info,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
end if
|
||||
else
|
||||
aliw=.true.
|
||||
allocate(iwork(liwork),stat=info)
|
||||
if(info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_
|
||||
ch_err='psb_realloc'
|
||||
call psb_errpush(info,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
end if
|
||||
|
||||
! exchange halo elements
|
||||
if(tran_ == 'N') then
|
||||
call psi_swapdata(imode,dzero,x(iix:size(x)),&
|
||||
& desc_a,iwork,info,data=data_)
|
||||
else if((tran_ == 'T').or.(tran_ == 'C')) then
|
||||
call psi_swaptran(imode,done,x(iix:size(x)),&
|
||||
& desc_a,iwork,info)
|
||||
else
|
||||
info = psb_err_internal_error_
|
||||
call psb_errpush(info,name,a_err='invalid tran')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if(info /= psb_success_) then
|
||||
ch_err='PSI_swapdata'
|
||||
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (aliw) deallocate(iwork)
|
||||
nullify(iwork)
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(ctxt,err_act)
|
||||
|
||||
return
|
||||
end subroutine psb_dhalov_new
|
||||
@ -0,0 +1,298 @@
|
||||
!
|
||||
! Parallel Sparse BLAS version 3.5
|
||||
! (C) Copyright 2006-2018
|
||||
! Salvatore Filippone
|
||||
! Alfredo Buttari
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
!
|
||||
! File: psb_dhalo_new.f90
|
||||
!
|
||||
! Subroutine: psb_dhalo_vect_new
|
||||
! Halo exchange for a distributed vector.
|
||||
! comm_type selects the communication scheme:
|
||||
! psb_comm_type_isend_ (0) : classic isend/irecv (delegates to psi_swapdata)
|
||||
! psb_comm_type_neigh_a2av_ (1) : MPI_Neighbor_alltoallv via pre-built topology
|
||||
!
|
||||
subroutine psb_dhalo_vect_new(x,desc_a,info,comm_type,work,tran,mode,data)
|
||||
use psb_base_mod, psb_protect_name => psb_dhalo_vect_new
|
||||
use psi_mod
|
||||
implicit none
|
||||
|
||||
|
||||
type(psb_d_vect_type), intent(inout) :: x
|
||||
type(psb_desc_type), intent(in) :: desc_a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_), intent(in) :: comm_type
|
||||
real(psb_dpk_), target, optional, intent(inout) :: work(:)
|
||||
integer(psb_ipk_), intent(in), optional :: mode,data
|
||||
character, intent(in), optional :: tran
|
||||
|
||||
! locals
|
||||
type(psb_ctxt_type) :: ctxt
|
||||
integer(psb_ipk_) :: np, me, err_act, &
|
||||
& nrow, ncol, lldx, imode, liwork, data_
|
||||
real(psb_dpk_),pointer :: iwork(:)
|
||||
real(psb_dpk_), allocatable :: sndbuf(:), rcvbuf(:)
|
||||
character :: tran_
|
||||
character(len=40) :: name, ch_err
|
||||
logical :: aliw
|
||||
integer(psb_mpk_) :: iret
|
||||
integer(psb_ipk_) :: k
|
||||
|
||||
name='psb_dhalo_vect_new'
|
||||
info=psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
if (psb_errstatus_fatal()) then
|
||||
info = psb_err_internal_error_ ; goto 9999
|
||||
end if
|
||||
|
||||
ctxt=desc_a%get_context()
|
||||
call psb_info(ctxt, me, np)
|
||||
if (np == -1) then
|
||||
info = psb_err_context_error_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
if (.not.allocated(x%v)) then
|
||||
info = psb_err_invalid_vect_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
nrow = desc_a%get_local_rows()
|
||||
ncol = desc_a%get_local_cols()
|
||||
lldx = x%get_nrows()
|
||||
|
||||
if (present(tran)) then
|
||||
tran_ = psb_toupper(tran)
|
||||
else
|
||||
tran_ = 'N'
|
||||
endif
|
||||
if (present(data)) then
|
||||
data_ = data
|
||||
else
|
||||
data_ = psb_comm_halo_
|
||||
endif
|
||||
|
||||
if ((info == 0).and.(lldx < ncol)) call x%reall(ncol,info)
|
||||
if(info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_ ; ch_err='reall'
|
||||
call psb_errpush(info,name,a_err=ch_err); goto 9999
|
||||
end if
|
||||
|
||||
select case(comm_type)
|
||||
case(0) ! psb_comm_type_isend_
|
||||
! ---- Classic isend/irecv: delegate to psi_swapdata ----
|
||||
if (present(mode)) then
|
||||
imode = mode
|
||||
else
|
||||
imode = IOR(psb_swap_send_,psb_swap_recv_)
|
||||
end if
|
||||
|
||||
liwork=nrow
|
||||
if (present(work)) then
|
||||
if(size(work) >= liwork) then
|
||||
iwork => work; aliw=.false.
|
||||
else
|
||||
aliw=.true.; allocate(iwork(liwork),stat=info)
|
||||
end if
|
||||
else
|
||||
aliw=.true.; allocate(iwork(liwork),stat=info)
|
||||
end if
|
||||
if(info /= psb_success_) then
|
||||
ch_err='psb_realloc'
|
||||
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if(tran_ == 'N') then
|
||||
call psi_swapdata(imode,dzero,x%v,desc_a,iwork,info,data=data_)
|
||||
else if((tran_ == 'T').or.(tran_ == 'C')) then
|
||||
call psi_swaptran(imode,done,x%v,desc_a,iwork,info)
|
||||
else
|
||||
info = psb_err_internal_error_
|
||||
call psb_errpush(info,name,a_err='invalid tran'); goto 9999
|
||||
end if
|
||||
|
||||
if (info /= psb_success_) then
|
||||
ch_err='PSI_swapdata'
|
||||
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
if (aliw) deallocate(iwork)
|
||||
nullify(iwork)
|
||||
|
||||
case(1) ! psb_comm_type_neigh_a2av_
|
||||
! TODO
|
||||
case default
|
||||
info = psb_err_input_value_invalid_i_
|
||||
call psb_errpush(info,name,i_err=(/5_psb_ipk_,comm_type,izero,izero,izero/))
|
||||
goto 9999
|
||||
end select
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(ctxt,err_act)
|
||||
return
|
||||
end subroutine psb_dhalo_vect_new
|
||||
|
||||
|
||||
!
|
||||
! Subroutine: psb_dhalo_multivect_new
|
||||
! Halo exchange for a distributed multivector.
|
||||
! comm_type selects the communication scheme:
|
||||
! psb_comm_type_isend_ (0) : classic isend/irecv
|
||||
! psb_comm_type_neigh_a2av_ (1) : MPI_Neighbor_alltoallv via pre-built topology
|
||||
!
|
||||
subroutine psb_dhalo_multivect_new(x,desc_a,info,comm_type,work,tran,mode,data)
|
||||
use psb_base_mod, psb_protect_name => psb_dhalo_multivect_new
|
||||
use psi_mod
|
||||
implicit none
|
||||
|
||||
type(psb_d_multivect_type), intent(inout) :: x
|
||||
type(psb_desc_type), intent(in) :: desc_a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_), intent(in) :: comm_type
|
||||
real(psb_dpk_), target, optional, intent(inout) :: work(:)
|
||||
integer(psb_ipk_), intent(in), optional :: mode,data
|
||||
character, intent(in), optional :: tran
|
||||
|
||||
! locals
|
||||
type(psb_ctxt_type) :: ctxt
|
||||
integer(psb_ipk_) :: np, me, err_act, &
|
||||
& nrow, ncol, lldx, imode, liwork, data_, nc
|
||||
real(psb_dpk_),pointer :: iwork(:)
|
||||
real(psb_dpk_), allocatable :: sndbuf(:), rcvbuf(:)
|
||||
integer(psb_mpk_), allocatable :: mv_sndcnts(:), mv_rcvcnts(:), &
|
||||
& mv_snddispls(:), mv_rcvdispls(:)
|
||||
character :: tran_
|
||||
character(len=40) :: name, ch_err
|
||||
logical :: aliw
|
||||
integer(psb_mpk_) :: iret
|
||||
integer(psb_ipk_) :: k, j, bp, nn
|
||||
|
||||
name='psb_dhalo_multivect_new'
|
||||
info=psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
if (psb_errstatus_fatal()) then
|
||||
info = psb_err_internal_error_ ; goto 9999
|
||||
end if
|
||||
|
||||
ctxt=desc_a%get_context()
|
||||
call psb_info(ctxt, me, np)
|
||||
if (np == -1) then
|
||||
info = psb_err_context_error_
|
||||
call psb_errpush(info,name); goto 9999
|
||||
endif
|
||||
|
||||
if (.not.allocated(x%v)) then
|
||||
info = psb_err_invalid_vect_state_
|
||||
call psb_errpush(info,name); goto 9999
|
||||
endif
|
||||
|
||||
nrow = desc_a%get_local_rows()
|
||||
ncol = desc_a%get_local_cols()
|
||||
lldx = x%get_nrows()
|
||||
nc = x%get_ncols()
|
||||
|
||||
if (present(tran)) then
|
||||
tran_ = psb_toupper(tran)
|
||||
else
|
||||
tran_ = 'N'
|
||||
endif
|
||||
if (present(data)) then
|
||||
data_ = data
|
||||
else
|
||||
data_ = psb_comm_halo_
|
||||
endif
|
||||
|
||||
if (lldx < ncol) call x%reall(ncol,nc,info)
|
||||
if(info /= psb_success_) then
|
||||
ch_err='psb_reall'
|
||||
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
select case(comm_type)
|
||||
case(0)
|
||||
! ---- Classic isend/irecv ----
|
||||
if (present(mode)) then
|
||||
imode = mode
|
||||
else
|
||||
imode = IOR(psb_swap_send_,psb_swap_recv_)
|
||||
end if
|
||||
|
||||
liwork=nrow
|
||||
if (present(work)) then
|
||||
if(size(work) >= liwork) then
|
||||
iwork => work; aliw=.false.
|
||||
else
|
||||
aliw=.true.; allocate(iwork(liwork),stat=info)
|
||||
end if
|
||||
else
|
||||
aliw=.true.; allocate(iwork(liwork),stat=info)
|
||||
end if
|
||||
if(info /= psb_success_) then
|
||||
ch_err='psb_realloc'
|
||||
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if(tran_ == 'N') then
|
||||
call psi_swapdata(imode,dzero,x%v,desc_a,iwork,info,data=data_)
|
||||
else if((tran_ == 'T').or.(tran_ == 'C')) then
|
||||
call psi_swaptran(imode,done,x%v,desc_a,iwork,info)
|
||||
else
|
||||
info = psb_err_internal_error_
|
||||
call psb_errpush(info,name,a_err='invalid tran'); goto 9999
|
||||
end if
|
||||
|
||||
if (info /= psb_success_) then
|
||||
ch_err='PSI_swapdata'
|
||||
call psb_errpush(psb_err_from_subroutine_,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
if (aliw) deallocate(iwork)
|
||||
nullify(iwork)
|
||||
|
||||
case(1)
|
||||
! TODO
|
||||
|
||||
case default
|
||||
info = psb_err_input_value_invalid_i_
|
||||
call psb_errpush(info,name,i_err=(/5_psb_ipk_,comm_type,izero,izero,izero/))
|
||||
goto 9999
|
||||
end select
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(ctxt,err_act)
|
||||
return
|
||||
end subroutine psb_dhalo_multivect_new
|
||||
@ -0,0 +1,422 @@
|
||||
!
|
||||
! Parallel Sparse BLAS version 3.5
|
||||
! (C) Copyright 2006-2018
|
||||
! Salvatore Filippone
|
||||
! Alfredo Buttari
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
!
|
||||
! Module: psb_neighbor_topology_mod
|
||||
! Provides a type to hold pre-built MPI neighborhood topology
|
||||
! information for persistent/repeated halo exchanges via
|
||||
! MPI_Neighbor_alltoallv (MPI >= 3.0).
|
||||
!
|
||||
! The topology is stored inside the vector type (psb_d_base_vect_type)
|
||||
! and lazily created on the first psi_swapdata call with the
|
||||
! neighbor-alltoallv communication mode. Once built it is reused
|
||||
! for every subsequent halo exchange, avoiding the per-call overhead
|
||||
! of re-scanning the index list and allocating temporary arrays.
|
||||
!
|
||||
! The graph communicator and per-neighbor counts/displacements
|
||||
! are built once and reused.
|
||||
!
|
||||
! The gather/scatter index arrays (send_indexes, recv_indexes) record
|
||||
! which local vector positions must be packed / unpacked.
|
||||
!
|
||||
module psb_neighbor_topology_mod
|
||||
use psb_const_mod
|
||||
use psb_desc_const_mod
|
||||
use psb_error_mod
|
||||
!
|
||||
! Only import mpi_comm_null here (needed for type default initializer).
|
||||
! Full MPI access is done inside each contained subroutine so that
|
||||
! MPI symbols do NOT leak into modules that use psb_neighbor_topology_mod.
|
||||
!
|
||||
#ifdef PSB_MPI_MOD
|
||||
use mpi, only: mpi_comm_null
|
||||
#endif
|
||||
implicit none
|
||||
#ifdef PSB_MPI_H
|
||||
include 'mpif.h'
|
||||
#endif
|
||||
|
||||
type :: psb_neighbor_topology_type
|
||||
!
|
||||
! MPI dist-graph communicator (only communicating neighbors).
|
||||
!
|
||||
integer(psb_mpk_) :: graph_comm = mpi_comm_null
|
||||
!
|
||||
! Number of neighbors (processes I exchange with, excluding self).
|
||||
!
|
||||
integer(psb_ipk_) :: num_neighbors = 0
|
||||
!
|
||||
! Per-neighbor send/recv counts and displacements (units of
|
||||
! single elements; for n-column multivectors multiply by n).
|
||||
! send_counts(i) = number of elements sent to i-th neighbor
|
||||
! recv_counts(i) = number of elements received from i-th neighbor
|
||||
! send_displs(i) = displacement into contiguous send buffer
|
||||
! recv_displs(i) = displacement into contiguous recv buffer
|
||||
!
|
||||
integer(psb_mpk_), allocatable :: send_counts(:), recv_counts(:)
|
||||
integer(psb_mpk_), allocatable :: send_displs(:), recv_displs(:)
|
||||
!
|
||||
! Gather indexes: the k-th element of the send buffer is
|
||||
! y%v( send_indexes(k) )
|
||||
! Scatter indexes: the k-th element of the recv buffer goes to
|
||||
! y%v( recv_indexes(k) )
|
||||
!
|
||||
integer(psb_ipk_), allocatable :: send_indexes(:)
|
||||
integer(psb_ipk_), allocatable :: recv_indexes(:)
|
||||
!
|
||||
! Total number of elements to send / receive (per single column),
|
||||
! excluding self-exchange.
|
||||
!
|
||||
integer(psb_ipk_) :: total_send = 0
|
||||
integer(psb_ipk_) :: total_recv = 0
|
||||
!
|
||||
|
||||
! Initialization flag.
|
||||
!
|
||||
logical :: is_initialized = .false.
|
||||
contains
|
||||
procedure, pass(topology) :: init => neighbor_topology_init
|
||||
procedure, pass(topology) :: free => neighbor_topology_free
|
||||
procedure, pass(topology) :: sizeof => neighbor_topology_sizeof
|
||||
end type psb_neighbor_topology_type
|
||||
|
||||
contains
|
||||
|
||||
! ---------------------------------------------------------------
|
||||
! neighbor_topology_init
|
||||
!
|
||||
! Parse the halo index list (obtained via desc_a%get_list_p)
|
||||
! and build:
|
||||
! - MPI dist-graph communicator with only the true neighbors
|
||||
! - per-neighbor send/recv counts and displacements
|
||||
! - contiguous gather/scatter index arrays
|
||||
!
|
||||
! The topology is stored inside the vector and lazily built
|
||||
! on the first psi_swapdata call that uses the neighbor-alltoallv
|
||||
! communication mode.
|
||||
!
|
||||
! Arguments:
|
||||
! topology - the persistent state (output, intent inout)
|
||||
! halo_index - halo_index array (from get_list_p, intent in)
|
||||
! num_neighbors - number of exchanges (from get_list_p)
|
||||
! total_send_elems - total send count (from get_list_p)
|
||||
! total_recv_elems - total recv count (from get_list_p)
|
||||
! ctxt - PSBLAS context
|
||||
! icomm - MPI communicator
|
||||
! info - error code (output)
|
||||
! ---------------------------------------------------------------
|
||||
subroutine neighbor_topology_init(topology, halo_index, num_neighbors, &
|
||||
& total_send_elems, total_recv_elems, ctxt, icomm, info)
|
||||
#ifdef PSB_MPI_MOD
|
||||
use mpi
|
||||
#endif
|
||||
implicit none
|
||||
#ifdef PSB_MPI_H
|
||||
include 'mpif.h'
|
||||
#endif
|
||||
|
||||
class(psb_neighbor_topology_type), intent(inout) :: topology
|
||||
integer(psb_ipk_), intent(in) :: halo_index(:)
|
||||
integer(psb_ipk_), intent(in) :: num_neighbors, total_send_elems, total_recv_elems
|
||||
type(psb_ctxt_type), intent(in) :: ctxt
|
||||
integer(psb_mpk_), intent(in) :: icomm
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
! locals
|
||||
integer(psb_mpk_) :: iret
|
||||
integer(psb_ipk_) :: i, k, idx_ptr, num_elem_recv, num_elem_send, partner_proc
|
||||
integer(psb_ipk_) :: nbr_count, send_offset, recv_offset
|
||||
integer(psb_mpk_), allocatable :: source_ranks(:), dest_ranks(:)
|
||||
integer(psb_mpk_), allocatable :: source_weights(:), dest_weights(:)
|
||||
integer(psb_mpk_) :: in_degree, out_degree
|
||||
character(len=40) :: name
|
||||
integer(psb_ipk_) :: proc_id
|
||||
integer(psb_ipk_) :: position
|
||||
integer(psb_ipk_) :: err_act
|
||||
|
||||
info = psb_success_
|
||||
name = 'neighbor_topology_init'
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
! Clean up any previous state
|
||||
call topology%free(info)
|
||||
|
||||
! ----------------------------------------------------------
|
||||
! First pass: count neighbors (excluding self) and totals
|
||||
! ----------------------------------------------------------
|
||||
topology%num_neighbors = 0
|
||||
topology%total_send = 0
|
||||
topology%total_recv = 0
|
||||
|
||||
if(size(halo_index) < 1) then
|
||||
call psb_errpush(psb_err_topology_invalid_args_,name)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(source_ranks(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Source ranks allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(dest_ranks(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Destination ranks allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(source_weights(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Source weights allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(dest_weights(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Destination weights allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(topology%send_counts(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Send counts allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(topology%recv_counts(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Receive counts allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(topology%send_displs(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Send displacements allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(topology%recv_displs(num_neighbors), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Receive displacements allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
|
||||
! -----------------------------------------------------------
|
||||
! Allocate the gather/scatter index arrays
|
||||
! -----------------------------------------------------------
|
||||
allocate(topology%send_indexes(total_send_elems), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Send indexes allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
allocate(topology%recv_indexes(total_recv_elems), stat=info)
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name, a_err='Recv indexes allocation failed')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
! -----------------------------------------------------------
|
||||
! Fill neighbor ranks, weights, counts, displacements,
|
||||
! and gather/scatter index arrays.
|
||||
!
|
||||
! The halo_index layout per neighbor (starting at position):
|
||||
! position + 0 : process id
|
||||
! position + 1 : nerv (num recv elements)
|
||||
! position + 2 .. +1+nerv : recv element indexes
|
||||
! position + 2+nerv : nesd (num send elements)
|
||||
! position + 3+nerv .. +2+nerv+nesd : send element indexes
|
||||
! Total stride per neighbor: nerv + nesd + 3
|
||||
! -----------------------------------------------------------
|
||||
send_offset = 0
|
||||
recv_offset = 0
|
||||
position = 1
|
||||
|
||||
do i = 1, num_neighbors
|
||||
proc_id = halo_index(position)
|
||||
num_elem_recv = halo_index(position + 1)
|
||||
num_elem_send = halo_index(position + num_elem_recv + 2)
|
||||
|
||||
! Fill source/destination ranks and weights (weights are all 1 for now)
|
||||
source_ranks(i) = int(proc_id, psb_mpk_)
|
||||
dest_ranks(i) = int(proc_id, psb_mpk_)
|
||||
source_weights(i) = 1
|
||||
dest_weights(i) = 1
|
||||
|
||||
! Counts and displacements (displs set BEFORE accumulating offset)
|
||||
topology%send_counts(i) = int(num_elem_send, psb_mpk_)
|
||||
topology%recv_counts(i) = int(num_elem_recv, psb_mpk_)
|
||||
topology%send_displs(i) = int(send_offset, psb_mpk_)
|
||||
topology%recv_displs(i) = int(recv_offset, psb_mpk_)
|
||||
|
||||
! Fill recv_indexes from halo_index(position+2 .. position+1+nerv)
|
||||
do k = 1, num_elem_recv
|
||||
topology%recv_indexes(recv_offset + k) = halo_index(position + psb_elem_recv_ + k - 1)
|
||||
end do
|
||||
|
||||
! Fill send_indexes from halo_index(position+3+nerv .. position+2+nerv+nesd)
|
||||
do k = 1, num_elem_send
|
||||
topology%send_indexes(send_offset + k) = halo_index(position + num_elem_recv + psb_elem_send_ + k - 1)
|
||||
end do
|
||||
|
||||
send_offset = send_offset + num_elem_send
|
||||
recv_offset = recv_offset + num_elem_recv
|
||||
|
||||
topology%num_neighbors = topology%num_neighbors + 1
|
||||
topology%total_send = topology%total_send + num_elem_send
|
||||
topology%total_recv = topology%total_recv + num_elem_recv
|
||||
|
||||
position = position + num_elem_recv + num_elem_send + 3
|
||||
end do
|
||||
|
||||
! ----------------------------------------------------------
|
||||
! Sanity check: the totals computed from the neighbor list
|
||||
! should match the totals returned by get_list_p.
|
||||
! ----------------------------------------------------------
|
||||
if (topology%total_send /= total_send_elems) then
|
||||
info = psb_err_topology_args_mismatch_
|
||||
call psb_errpush(info, name, a_err='Send elements mismatch')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (topology%total_recv /= total_recv_elems) then
|
||||
info = psb_err_topology_args_mismatch_
|
||||
call psb_errpush(info, name, a_err='Receive elements mismatch')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if(topology%num_neighbors /= num_neighbors) then
|
||||
info = psb_err_topology_args_mismatch_
|
||||
call psb_errpush(info, name, a_err='Number of neighbors mismatch')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
|
||||
! ----------------------------------------------------------
|
||||
! Build the dist-graph communicator
|
||||
! ----------------------------------------------------------
|
||||
in_degree = topology%num_neighbors !! Just for clarity
|
||||
out_degree = topology%num_neighbors !! Just for clarity
|
||||
|
||||
call mpi_dist_graph_create_adjacent(icomm, &
|
||||
& in_degree, source_ranks, source_weights, &
|
||||
& out_degree, dest_ranks, dest_weights, &
|
||||
& mpi_info_null, .false., & ! Check this line for optimizations
|
||||
& topology%graph_comm, info)
|
||||
if (info /= mpi_success) then
|
||||
info = psb_err_topology_error_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
topology%is_initialized = .true.
|
||||
|
||||
! TODO: Is it safe to deallocate these temporary arrays here, or do we need them for the gather/scatter indexes?
|
||||
! deallocate(source_ranks, dest_ranks, source_weights, dest_weights)
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(ctxt,err_act)
|
||||
|
||||
return
|
||||
end subroutine neighbor_topology_init
|
||||
|
||||
|
||||
! ---------------------------------------------------------------
|
||||
! neighbor_topology_free
|
||||
! Release all resources held by the persistent state.
|
||||
! ---------------------------------------------------------------
|
||||
subroutine neighbor_topology_free(topology, info)
|
||||
#ifdef PSB_MPI_MOD
|
||||
use mpi
|
||||
#endif
|
||||
implicit none
|
||||
#ifdef PSB_MPI_H
|
||||
include 'mpif.h'
|
||||
#endif
|
||||
class(psb_neighbor_topology_type), intent(inout) :: topology
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_mpk_) :: iret
|
||||
|
||||
info = psb_success_
|
||||
|
||||
if (topology%graph_comm /= mpi_comm_null) then
|
||||
call mpi_comm_free(topology%graph_comm, iret)
|
||||
topology%graph_comm = mpi_comm_null
|
||||
end if
|
||||
|
||||
if (allocated(topology%send_counts)) deallocate(topology%send_counts)
|
||||
if (allocated(topology%recv_counts)) deallocate(topology%recv_counts)
|
||||
if (allocated(topology%send_displs)) deallocate(topology%send_displs)
|
||||
if (allocated(topology%recv_displs)) deallocate(topology%recv_displs)
|
||||
if (allocated(topology%send_indexes)) deallocate(topology%send_indexes)
|
||||
if (allocated(topology%recv_indexes)) deallocate(topology%recv_indexes)
|
||||
|
||||
topology%num_neighbors = 0
|
||||
topology%total_send = 0
|
||||
topology%total_recv = 0
|
||||
topology%is_initialized = .false.
|
||||
|
||||
end subroutine neighbor_topology_free
|
||||
|
||||
|
||||
! ---------------------------------------------------------------
|
||||
! neighbor_topology_sizeof
|
||||
! Return approximate memory footprint in bytes.
|
||||
! ---------------------------------------------------------------
|
||||
function neighbor_topology_sizeof(topology) result(val)
|
||||
implicit none
|
||||
class(psb_neighbor_topology_type), intent(in) :: topology
|
||||
integer(psb_epk_) :: val
|
||||
|
||||
val = 0
|
||||
val = val + psb_sizeof_ip * 6 ! scalar integers + logicals
|
||||
if (allocated(topology%send_counts)) val = val + psb_sizeof_ip * size(topology%send_counts)
|
||||
if (allocated(topology%recv_counts)) val = val + psb_sizeof_ip * size(topology%recv_counts)
|
||||
if (allocated(topology%send_displs)) val = val + psb_sizeof_ip * size(topology%send_displs)
|
||||
if (allocated(topology%recv_displs)) val = val + psb_sizeof_ip * size(topology%recv_displs)
|
||||
if (allocated(topology%send_indexes)) val = val + psb_sizeof_ip * size(topology%send_indexes)
|
||||
if (allocated(topology%recv_indexes)) val = val + psb_sizeof_ip * size(topology%recv_indexes)
|
||||
|
||||
|
||||
end function neighbor_topology_sizeof
|
||||
|
||||
end module psb_neighbor_topology_mod
|
||||
@ -0,0 +1,34 @@
|
||||
INSTALLDIR=../..
|
||||
INCDIR=$(INSTALLDIR)/include/
|
||||
MODDIR=$(INSTALLDIR)/modules/
|
||||
include $(INCDIR)/Make.inc.psblas
|
||||
#
|
||||
# Libraries used
|
||||
#
|
||||
LIBDIR=$(INSTALLDIR)/lib/
|
||||
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
|
||||
LDLIBS=$(PSBLDLIBS)
|
||||
|
||||
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
|
||||
|
||||
TOBJS=test_halo_new.o
|
||||
|
||||
EXEDIR=./runs
|
||||
|
||||
all: runsd test_halo_new
|
||||
|
||||
runsd:
|
||||
(if test ! -d runs ; then mkdir runs; fi)
|
||||
|
||||
test_halo_new: $(TOBJS)
|
||||
$(FLINK) $(LOPT) $(TOBJS) -o test_halo_new $(PSBLAS_LIB) $(LDLIBS)
|
||||
/bin/mv test_halo_new $(EXEDIR)
|
||||
|
||||
clean:
|
||||
/bin/rm -f $(TOBJS) *$(.mod) $(EXEDIR)/test_halo_new
|
||||
|
||||
lib:
|
||||
(cd ../../; make library)
|
||||
verycleanlib:
|
||||
(cd ../../; make veryclean)
|
||||
|
||||
@ -0,0 +1,279 @@
|
||||
!
|
||||
! Test program for D-type halo exchange: baseline vs neighbor topology.
|
||||
!
|
||||
! This test exercises the lower-level psi_swapdata interface directly
|
||||
! to compare the two communication paths implemented in psi_dswapdata.F90:
|
||||
!
|
||||
! 1. Baseline (Isend/Irecv) : flag = IOR(psb_swap_send_, psb_swap_recv_)
|
||||
! 2. Neighbor topology (Ineighbor_alltoallv) : flag = psb_swap_start_ then psb_swap_wait_
|
||||
!
|
||||
! It builds a 3D block-partitioned descriptor with a 7-point stencil,
|
||||
! fills owned entries with their global index, performs halo exchange
|
||||
! via both paths, then checks:
|
||||
! (a) The two paths produce identical results (cross-check)
|
||||
! (b) Every halo entry equals the global index of its source (absolute check)
|
||||
!
|
||||
! Run with: mpirun -np <P> ./test_halo_new
|
||||
!
|
||||
program test_halo_new
|
||||
use psb_base_mod
|
||||
use psi_mod
|
||||
implicit none
|
||||
|
||||
! ---- parameters ----
|
||||
integer(psb_ipk_), parameter :: idim = 10 ! grid idim x idim x idim
|
||||
|
||||
! ---- descriptor / context ----
|
||||
type(psb_ctxt_type) :: ctxt
|
||||
type(psb_desc_type) :: desc_a
|
||||
integer(psb_ipk_) :: iam, np, info, i, nr, nlr
|
||||
integer(psb_lpk_) :: m, nt
|
||||
integer(psb_lpk_), allocatable :: myidx(:)
|
||||
|
||||
! ---- vectors ----
|
||||
type(psb_d_vect_type) :: v_baseline, v_neighbor
|
||||
|
||||
! ---- temporary / comparison arrays ----
|
||||
real(psb_dpk_), allocatable :: vals(:)
|
||||
real(psb_dpk_), allocatable :: res_bl(:), res_nb(:)
|
||||
real(psb_dpk_), allocatable :: expected(:)
|
||||
|
||||
! ---- work buffer for psi_swapdata ----
|
||||
real(psb_dpk_), allocatable :: work(:)
|
||||
|
||||
! ---- halo index bookkeeping ----
|
||||
integer(psb_ipk_) :: nrow, ncol, totxch, idxs, idxr, data_
|
||||
class(psb_i_base_vect_type), pointer :: d_vidx
|
||||
|
||||
! ---- error / reporting ----
|
||||
integer(psb_ipk_) :: n_pass, n_total, imode
|
||||
real(psb_dpk_) :: err, tol
|
||||
integer(psb_lpk_), allocatable :: glob_col(:)
|
||||
character(len=40) :: name
|
||||
|
||||
name = 'test_halo_new'
|
||||
tol = 1.0d-12
|
||||
n_pass = 0
|
||||
n_total = 0
|
||||
|
||||
! ==================================================================
|
||||
! 1. Initialise MPI / PSBLAS context
|
||||
! ==================================================================
|
||||
call psb_init(ctxt)
|
||||
call psb_info(ctxt, iam, np)
|
||||
|
||||
if (iam == 0) then
|
||||
write(psb_out_unit,'("================================================")')
|
||||
write(psb_out_unit,'(" Test: D-type halo baseline vs neighbor topo")')
|
||||
write(psb_out_unit,'(" Processes : ",i0)') np
|
||||
write(psb_out_unit,'(" Grid : ",i0," x ",i0," x ",i0)') idim,idim,idim
|
||||
write(psb_out_unit,'("================================================")')
|
||||
end if
|
||||
|
||||
! ==================================================================
|
||||
! 2. Build descriptor with 7-point stencil connectivity
|
||||
! ==================================================================
|
||||
m = (1_psb_lpk_ * idim) * idim * idim
|
||||
nt = (m + np - 1) / np
|
||||
nr = max(0, min(int(nt,psb_ipk_), int(m - (iam * nt),psb_ipk_)))
|
||||
|
||||
call psb_cdall(ctxt, desc_a, info, nl=nr)
|
||||
if (info /= psb_success_) then
|
||||
write(psb_err_unit,*) iam, 'cdall error:', info
|
||||
call psb_abort(ctxt)
|
||||
end if
|
||||
|
||||
myidx = desc_a%get_global_indices()
|
||||
nlr = size(myidx)
|
||||
|
||||
do i = 1, nlr
|
||||
call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)/), desc_a, info)
|
||||
if (myidx(i) > 1) &
|
||||
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-1/), desc_a, info)
|
||||
if (myidx(i) < m) &
|
||||
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+1/), desc_a, info)
|
||||
if (myidx(i) > idim) &
|
||||
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-idim/), desc_a, info)
|
||||
if (myidx(i) + idim <= m) &
|
||||
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+idim/), desc_a, info)
|
||||
if (myidx(i) > int(idim,psb_lpk_)*idim) &
|
||||
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), &
|
||||
& (/myidx(i) - int(idim,psb_lpk_)*idim/), desc_a, info)
|
||||
if (myidx(i) + int(idim,psb_lpk_)*idim <= m) &
|
||||
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), &
|
||||
& (/myidx(i) + int(idim,psb_lpk_)*idim/), desc_a, info)
|
||||
end do
|
||||
|
||||
call psb_cdasb(desc_a, info)
|
||||
if (info /= psb_success_) then
|
||||
write(psb_err_unit,*) iam, 'cdasb error:', info
|
||||
call psb_abort(ctxt)
|
||||
end if
|
||||
|
||||
nrow = desc_a%get_local_rows() ! owned
|
||||
ncol = desc_a%get_local_cols() ! owned + halo
|
||||
|
||||
! ==================================================================
|
||||
! 3. Allocate two D vectors (scratch) and fill owned entries
|
||||
! ==================================================================
|
||||
call psb_geall(v_baseline, desc_a, info)
|
||||
call psb_geall(v_neighbor, desc_a, info)
|
||||
call psb_geasb(v_baseline, desc_a, info, scratch=.true.)
|
||||
call psb_geasb(v_neighbor, desc_a, info, scratch=.true.)
|
||||
|
||||
! Fill owned entries with the global index value
|
||||
allocate(vals(ncol))
|
||||
vals = dzero
|
||||
do i = 1, nlr
|
||||
vals(i) = real(myidx(i), psb_dpk_)
|
||||
end do
|
||||
call v_baseline%set_vect(vals)
|
||||
call v_neighbor%set_vect(vals)
|
||||
deallocate(vals)
|
||||
|
||||
! ==================================================================
|
||||
! 4. Build the expected result for halo positions
|
||||
! glob_col(j) = global index of local column j
|
||||
! After halo exchange every position j should hold glob_col(j).
|
||||
! ==================================================================
|
||||
allocate(glob_col(ncol), expected(ncol))
|
||||
glob_col = desc_a%get_global_indices(owned=.false.)
|
||||
do i = 1, ncol
|
||||
expected(i) = real(glob_col(i), psb_dpk_)
|
||||
end do
|
||||
|
||||
! ==================================================================
|
||||
! 5. Retrieve halo index list (same list used by both paths)
|
||||
! ==================================================================
|
||||
data_ = psb_comm_halo_
|
||||
call desc_a%get_list_p(data_, d_vidx, totxch, idxr, idxs, info)
|
||||
if (info /= psb_success_) then
|
||||
write(psb_err_unit,*) iam, 'get_list_p error:', info
|
||||
call psb_abort(ctxt)
|
||||
end if
|
||||
|
||||
allocate(work(nrow))
|
||||
work = dzero
|
||||
|
||||
! ==================================================================
|
||||
! 6. Baseline halo exchange (Isend/Irecv in one call)
|
||||
! ==================================================================
|
||||
imode = IOR(psb_swap_send_, psb_swap_recv_)
|
||||
call psi_swapdata(ctxt, desc_a%get_mpic(), imode, dzero, &
|
||||
& v_baseline%v, d_vidx, totxch, idxs, idxr, work, info)
|
||||
if (info /= psb_success_) then
|
||||
write(psb_err_unit,*) iam, 'baseline swap error:', info
|
||||
call psb_abort(ctxt)
|
||||
end if
|
||||
|
||||
! ==================================================================
|
||||
! 7. Neighbor topology halo exchange (start + wait)
|
||||
! ==================================================================
|
||||
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_start_, dzero, &
|
||||
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
|
||||
if (info /= psb_success_) then
|
||||
write(psb_err_unit,*) iam, 'neighbor start error:', info
|
||||
call psb_abort(ctxt)
|
||||
end if
|
||||
|
||||
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_wait_, dzero, &
|
||||
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
|
||||
if (info /= psb_success_) then
|
||||
write(psb_err_unit,*) iam, 'neighbor wait error:', info
|
||||
call psb_abort(ctxt)
|
||||
end if
|
||||
|
||||
! ==================================================================
|
||||
! 8. Extract results and compare
|
||||
! ==================================================================
|
||||
res_bl = v_baseline%get_vect()
|
||||
res_nb = v_neighbor%get_vect()
|
||||
|
||||
! ---- Test 1: cross-check baseline vs neighbor (all entries) ----
|
||||
n_total = n_total + 1
|
||||
err = maxval(abs(res_bl(1:ncol) - res_nb(1:ncol)))
|
||||
call psb_amx(ctxt, err)
|
||||
if (iam == 0) then
|
||||
if (err < tol) then
|
||||
write(psb_out_unit,'(" [PASS] cross-check baseline vs neighbor : err = ",es12.5)') err
|
||||
n_pass = n_pass + 1
|
||||
else
|
||||
write(psb_out_unit,'(" [FAIL] cross-check baseline vs neighbor : err = ",es12.5)') err
|
||||
end if
|
||||
end if
|
||||
|
||||
! ---- Test 2: baseline absolute correctness (halo = global index) ----
|
||||
n_total = n_total + 1
|
||||
err = maxval(abs(res_bl(1:ncol) - expected(1:ncol)))
|
||||
call psb_amx(ctxt, err)
|
||||
if (iam == 0) then
|
||||
if (err < tol) then
|
||||
write(psb_out_unit,'(" [PASS] baseline absolute correctness : err = ",es12.5)') err
|
||||
n_pass = n_pass + 1
|
||||
else
|
||||
write(psb_out_unit,'(" [FAIL] baseline absolute correctness : err = ",es12.5)') err
|
||||
end if
|
||||
end if
|
||||
|
||||
! ---- Test 3: neighbor absolute correctness (halo = global index) ----
|
||||
n_total = n_total + 1
|
||||
err = maxval(abs(res_nb(1:ncol) - expected(1:ncol)))
|
||||
call psb_amx(ctxt, err)
|
||||
if (iam == 0) then
|
||||
if (err < tol) then
|
||||
write(psb_out_unit,'(" [PASS] neighbor absolute correctness : err = ",es12.5)') err
|
||||
n_pass = n_pass + 1
|
||||
else
|
||||
write(psb_out_unit,'(" [FAIL] neighbor absolute correctness : err = ",es12.5)') err
|
||||
end if
|
||||
end if
|
||||
|
||||
! ---- Test 4: repeat neighbor exchange (topology reuse) ----
|
||||
! Reset halo entries to zero, run again, and check
|
||||
do i = nrow+1, ncol
|
||||
res_nb(i) = dzero
|
||||
end do
|
||||
call v_neighbor%set_vect(res_nb)
|
||||
|
||||
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_start_, dzero, &
|
||||
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
|
||||
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_wait_, dzero, &
|
||||
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
|
||||
|
||||
res_nb = v_neighbor%get_vect()
|
||||
n_total = n_total + 1
|
||||
err = maxval(abs(res_nb(1:ncol) - expected(1:ncol)))
|
||||
call psb_amx(ctxt, err)
|
||||
if (iam == 0) then
|
||||
if (err < tol) then
|
||||
write(psb_out_unit,'(" [PASS] neighbor topology reuse : err = ",es12.5)') err
|
||||
n_pass = n_pass + 1
|
||||
else
|
||||
write(psb_out_unit,'(" [FAIL] neighbor topology reuse : err = ",es12.5)') err
|
||||
end if
|
||||
end if
|
||||
|
||||
! ==================================================================
|
||||
! 9. Summary
|
||||
! ==================================================================
|
||||
if (iam == 0) then
|
||||
write(psb_out_unit,'("================================================")')
|
||||
write(psb_out_unit,'(" Results: ",i0," / ",i0," tests passed")') n_pass, n_total
|
||||
if (n_pass == n_total) then
|
||||
write(psb_out_unit,'(" STATUS: ALL PASSED")')
|
||||
else
|
||||
write(psb_out_unit,'(" STATUS: SOME FAILURES")')
|
||||
end if
|
||||
write(psb_out_unit,'("================================================")')
|
||||
end if
|
||||
|
||||
! ==================================================================
|
||||
! 10. Cleanup
|
||||
! ==================================================================
|
||||
deallocate(res_bl, res_nb, expected, glob_col, work)
|
||||
call psb_gefree(v_baseline, desc_a, info)
|
||||
call psb_gefree(v_neighbor, desc_a, info)
|
||||
call psb_cdfree(desc_a, info)
|
||||
call psb_exit(ctxt)
|
||||
|
||||
end program test_halo_new
|
||||
Loading…
Reference in New Issue