communication_v2
Stack-1 2 months ago
parent 461a6a325f
commit 02f1ef741c

@ -89,9 +89,10 @@
!
!
submodule (psi_c_comm_v_mod) psi_c_swapdata_impl
use psb_desc_const_mod, only: psb_swap_start_, psb_swap_wait_
use psb_base_mod
contains
subroutine psi_cswapdata_vect(flag,beta,y,desc_a,info,data)
module subroutine psi_cswapdata_vect(flag,beta,y,desc_a,info,data)
#ifdef PSB_MPI_MOD
use mpi
@ -101,34 +102,38 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
class(psb_c_base_vect_type), intent(inout) :: y
complex(psb_spk_), intent(in) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
character(len=30) :: name
info = psb_success_
name = 'psi_cswapdata_vect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
icomm = ctxt%get_mpic()
call psb_info(ctxt,me,np)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.psb_is_asb_desc(desc_a)) then
if (.not.psb_is_asb_desc(desc_a)) then
info=psb_err_invalid_cd_state_
call psb_errpush(info,name)
goto 9999
@ -140,14 +145,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='desc_a%get_list_p')
goto 9999
end if
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
call psi_swapdata(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
if (baseline) then
call psi_cswap_baseline_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_cswap_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -158,22 +194,8 @@ contains
end subroutine psi_cswapdata_vect
!
!
! Subroutine: psi_cswap_vidx_vect
! Data exchange among processes.
!
! Takes care of Y an exanspulated vector. Relies on the gather/scatter methods
! of vectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_cswap_vidx_vect(ctxt,flag,beta,y,idx, &
& totxch,totsnd,totrcv,info)
subroutine psi_cswap_baseline_vect(ctxt,flag,beta,y,idx, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
@ -182,13 +204,13 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me
@ -196,15 +218,15 @@ contains
& iret, nesd, nerv
integer(psb_mpk_) :: icomm
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
integer(psb_ipk_) :: err_act, i, idx_pt, total_send_, total_recv_,&
& snd_pt, rcv_pt, pnti, n
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_cswap_vidx_vect'
info = psb_success_
name = 'psi_cswap_baseline_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
@ -222,8 +244,8 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
total_recv_ = total_recv * n
total_send_ = total_send * n
call idx%sync()
if (debug) write(*,*) me,'Internal buffer'
@ -240,12 +262,12 @@ contains
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -266,7 +288,7 @@ contains
! Then gather for sending.
!
pnti = 1
do i=1, totxch
do i=1, num_neighbors
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
@ -290,7 +312,7 @@ contains
snd_pt = 1
rcv_pt = 1
p2ptag = psb_complex_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -323,12 +345,12 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
p2ptag = psb_complex_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -367,7 +389,7 @@ contains
pnti = 1
snd_pt = 1
rcv_pt = 1
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -407,7 +429,149 @@ contains
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_cswap_vidx_vect
end subroutine psi_cswap_baseline_vect
subroutine psi_cswap_neighbor_topology_vect(ctxt,flag,beta,y,idx, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_cswap_nbr_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call idx%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology'
call y%neighbor_topology%init(idx%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
call y%device_wait()
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_c_spk_, &
& y%combuf(topology_total_send + 1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_c_spk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_cswap_neighbor_topology_vect
!
!
@ -426,34 +590,36 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_multivect_type) :: y
complex(psb_spk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_multivect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=30) :: name
info=psb_success_
name='psi_swap_datav'
info = psb_success_
name = 'psi_cswapdata_multivect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
icomm = ctxt%get_mpic()
call psb_info(ctxt,me,np)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.psb_is_asb_desc(desc_a)) then
if (.not.psb_is_asb_desc(desc_a)) then
info=psb_err_invalid_cd_state_
call psb_errpush(info,name)
goto 9999
@ -465,14 +631,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='desc_a%get_list_p')
goto 9999
end if
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info=psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
call psi_swapdata(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
if (baseline) then
call psi_cswap_baseline_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_cswap_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -483,22 +680,8 @@ contains
end subroutine psi_cswapdata_multivect
!
!
! Subroutine: psi_cswap_vidx_multivect
! Data exchange among processes.
!
! Takes care of Y an encapsulated multivector. Relies on the gather/scatter methods
! of multivectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_cswap_vidx_multivect(ctxt,flag,beta,y,idx, &
& totxch,totsnd,totrcv,info)
subroutine psi_cswap_baseline_multivect(ctxt,flag,beta,y,idx, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
@ -506,21 +689,20 @@ contains
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_multivect_type) :: y
complex(psb_spk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_) :: beta
class(psb_c_base_multivect_type) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
integer(psb_mpk_) :: icomm
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
integer(psb_ipk_) :: err_act, i, idx_pt, total_send_, total_recv_,&
& snd_pt, rcv_pt, pnti
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
@ -547,8 +729,8 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
total_recv_ = total_recv * n
total_send_ = total_send * n
call idx%sync()
@ -566,14 +748,14 @@ contains
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -594,9 +776,9 @@ contains
! Then gather for sending.
!
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
do i=1, num_neighbors
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+nerv+psb_n_elem_send_
@ -617,10 +799,10 @@ contains
!
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_complex_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -652,14 +834,14 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_complex_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -695,9 +877,9 @@ contains
if (debug) write(*,*) me,' scatter'
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -737,6 +919,150 @@ contains
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_cswap_vidx_multivect
end subroutine psi_cswap_baseline_multivect
subroutine psi_cswap_neighbor_topology_multivect(ctxt,flag,beta,y,idx, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_multivect_type), intent(inout) :: y
complex(psb_spk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
! locals
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_cswap_neighbor_topology_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call idx%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology'
call y%neighbor_topology%init(idx%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
call y%device_wait()
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_c_spk_, &
& y%combuf(topology_total_send + 1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_c_spk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_cswap_neighbor_topology_multivect
end submodule psi_c_swapdata_impl

@ -103,22 +103,26 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_vect_type), intent(inout) :: y
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
info=psb_success_
name='psi_swap_tranv'
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_cswaptran_vect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -142,14 +146,46 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_ctran_baseline_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_ctran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -172,9 +208,8 @@ contains
!
!
!
module subroutine psi_ctran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
module subroutine psi_ctran_baseline_vect(ctxt,flag,beta,y,idx,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
@ -183,28 +218,28 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
integer(psb_mpk_) :: icomm
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
integer(psb_ipk_) :: err_act, i, idx_pt, total_send_, total_recv_,&
& snd_pt, rcv_pt, pnti
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_swap_tran'
info = psb_success_
name = 'psi_ctran_baseline_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
@ -222,8 +257,8 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
total_recv_ = total_recv * n
total_send_ = total_send * n
call idx%sync()
@ -241,13 +276,13 @@ contains
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
p2ptag = psb_complex_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -270,7 +305,7 @@ contains
!
pnti = 1
snd_pt = 1
do i=1, totxch
do i=1, num_neighbors
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
@ -296,7 +331,7 @@ contains
snd_pt = 1
rcv_pt = 1
p2ptag = psb_complex_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -329,12 +364,12 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
p2ptag = psb_complex_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -373,7 +408,7 @@ contains
pnti = 1
snd_pt = 1
rcv_pt = 1
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -414,9 +449,152 @@ contains
return
end subroutine psi_ctran_vidx_vect
end subroutine psi_ctran_baseline_vect
subroutine psi_ctran_neighbor_topology_vect(ctxt,flag,beta,y,idx, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_cswap_trn_nbr_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call idx%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_tran_vect: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_tran_vect: building topology'
call y%neighbor_topology%init(idx%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_tran_vect: gathering (recv) data,', topology_total_recv,' elems'
call y%gth(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
call y%device_wait()
if (debug) write(*,*) me,' nbr_tran_vect: posting MPI_Ineighbor_alltoallv (swapped)'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_c_spk_, &
& y%combuf(topology_total_recv + 1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_c_spk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_tran_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: scattering (send) data,', topology_total_send,' elems'
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_ctran_neighbor_topology_vect
!
!
!
@ -437,22 +615,26 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_multivect_type) :: y
complex(psb_spk_) :: beta
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_multivect_type), intent(inout) :: y
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
info=psb_success_
name='psi_swap_tranv'
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_cswaptran_multivect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -476,14 +658,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_ctran_baseline_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_ctran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -507,8 +720,8 @@ contains
!
!
!
module subroutine psi_ctran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
module subroutine psi_ctran_baseline_multivect(ctxt,flag,beta,y,idx,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -518,20 +731,20 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_multivect_type) :: y
complex(psb_spk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
integer(psb_mpk_) :: icomm
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
integer(psb_ipk_) :: err_act, i, idx_pt, total_send_, total_recv_,&
& snd_pt, rcv_pt, pnti
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
@ -558,8 +771,8 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
total_recv_ = total_recv * n
total_send_ = total_send * n
call idx%sync()
@ -577,15 +790,15 @@ contains
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_complex_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -606,9 +819,9 @@ contains
! Then gather for sending.
!
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
do i=1, num_neighbors
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
@ -629,10 +842,10 @@ contains
!
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_complex_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -665,14 +878,14 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_complex_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -708,9 +921,9 @@ contains
if (debug) write(*,*) me,' scatter'
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -753,6 +966,151 @@ contains
return
end subroutine psi_ctran_vidx_multivect
end subroutine psi_ctran_baseline_multivect
subroutine psi_ctran_neighbor_topology_multivect(ctxt,flag,beta,y,idx, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_ctran_neighbor_topology_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call idx%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_tran_vect: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_tran_vect: building topology'
call y%neighbor_topology%init(idx%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_tran_vect: gathering (recv) data,', topology_total_recv,' elems'
call y%gth(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
call y%device_wait()
if (debug) write(*,*) me,' nbr_tran_vect: posting MPI_Ineighbor_alltoallv (swapped)'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_c_spk_, &
& y%combuf(topology_total_recv + 1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_c_spk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_tran_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: scattering (send) data,', topology_total_send,' elems'
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_ctran_neighbor_topology_multivect
end submodule psi_c_swaptran_impl

@ -109,18 +109,17 @@ contains
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
! error handling variables
integer(psb_ipk_) :: err_act
integer(psb_mpk_) :: me, np
character(len=30) :: name
integer(psb_ipk_) :: err_act
character(len=30) :: name
info = psb_success_
name = 'psi_dswapdata_vect'
@ -448,8 +447,8 @@ contains
#endif
type(psb_ctxt_type), intent(in) :: ctxt
real(psb_dpk_), intent(in) :: beta
integer(psb_ipk_), intent(in) :: flag
real(psb_dpk_), intent(in) :: beta
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
@ -466,7 +465,7 @@ contains
info = psb_success_
name = 'psi_dswap_nbr_vect'
name = 'psi_dswap_neighbor_topology_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
@ -622,16 +621,17 @@ contains
integer(psb_ipk_), optional :: data
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=30) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=30) :: name
info = psb_success_
name = 'psi_dswapdata_multivect'
call psb_erractionsave(err_act)

@ -111,12 +111,17 @@ contains
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_dswaptran_vect'
call psb_erractionsave(err_act)
@ -142,14 +147,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_dtran_baseline_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_dtran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -161,7 +197,7 @@ contains
!
!
! Subroutine: psi_dtran_vidx_vect
! Subroutine: psi_dtran_baseline_vect
! Data exchange among processes.
!
! Takes care of Y an encapsulated vector. Relies on the gather/scatter methods
@ -172,8 +208,8 @@ contains
!
!
!
module subroutine psi_dtran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
module subroutine psi_dtran_baseline_vect(ctxt,flag,beta,y,idx,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -188,7 +224,7 @@ contains
real(psb_dpk_), intent(in) :: beta
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
integer(psb_ipk_), intent(out) :: info
! locals
@ -196,7 +232,7 @@ contains
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
integer(psb_mpk_) :: icomm
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
integer(psb_ipk_) :: err_act, i, idx_pt, total_send_, total_recv_,&
& snd_pt, rcv_pt, pnti
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
@ -204,7 +240,7 @@ contains
character(len=20) :: name
info = psb_success_
name = 'psi_dtran_vidx_vect'
name = 'psi_dtran_baseline_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
@ -222,8 +258,8 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
total_recv_ = total_recv * n
total_send_ = total_send * n
call idx%sync()
@ -241,13 +277,13 @@ contains
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
p2ptag = psb_double_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -270,7 +306,7 @@ contains
!
pnti = 1
snd_pt = 1
do i=1, totxch
do i=1, num_neighbors
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
@ -296,7 +332,7 @@ contains
snd_pt = 1
rcv_pt = 1
p2ptag = psb_double_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -329,12 +365,12 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
p2ptag = psb_double_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -373,7 +409,7 @@ contains
pnti = 1
snd_pt = 1
rcv_pt = 1
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -414,7 +450,169 @@ contains
return
end subroutine psi_dtran_vidx_vect
end subroutine psi_dtran_baseline_vect
subroutine psi_dtran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
real(psb_dpk_), intent(in) :: beta
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_dtran_neighbor_topology_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
! ---------------------------------------------------------
! START phase: build topology (if needed), gather, post MPI
! ---------------------------------------------------------
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
! Lazy initialization: build the topology on first call
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Buffer layout:
! combuf(1 : total_send) = send area
! combuf(total_send+1 : total_send+total_recv) = recv area
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
! For transpose exchange: gather recv area first (we will send "recv" data)
if (debug) write(*,*) me,' nbr_tran_vect: gathering recv data,', topology_total_recv,' elems'
call y%gth(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
! Wait for device (important for GPU subclasses)
call y%device_wait()
! Post non-blocking neighborhood alltoallv swapping send/recv arrays
if (debug) write(*,*) me,' nbr_tran_vect: posting MPI_Ineighbor_alltoallv (swapped)'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer (recv_indexes gathered)
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_r_dpk_, &
& y%combuf(topology_total_recv + 1), & ! recv buffer (will contain send_indexes data)
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_r_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if ! do_start
! ---------------------------------------------------------
! WAIT phase: complete MPI, scatter received data
! ---------------------------------------------------------
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
! No matching start? Something is wrong
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Wait for the non-blocking collective to complete
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
! For transpose exchange: scatter the data that correspond to peers' send area
if (debug) write(*,*) me,' nbr_tran_vect: scattering send-index data,', topology_total_send,' elems'
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
! Clean up
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: done'
end if ! do_wait
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_dtran_neighbor_topology_vect
!
@ -445,21 +643,26 @@ contains
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info=psb_success_
name='psi_swap_tranv'
info = psb_success_
name = 'psi_dswaptran_multivect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
icomm = ctxt%get_mpic()
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
@ -476,14 +679,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_dtran_baseline_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_dtran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -493,22 +727,8 @@ contains
return
end subroutine psi_dswaptran_multivect
!
!
! Subroutine: psi_dtran_vidx_multivect
! Data exchange among processes.
!
! Takes care of Y an encapsulated multivector. Relies on the gather/scatter methods
! of multivectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_dtran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
subroutine psi_dtran_baseline_multivect(ctxt,flag,beta,y,idx,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -524,14 +744,14 @@ contains
class(psb_d_base_multivect_type), intent(inout) :: y
real(psb_dpk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
integer(psb_mpk_) :: icomm
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
integer(psb_ipk_) :: err_act, i, idx_pt, total_send_, total_recv_,&
& snd_pt, rcv_pt, pnti
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
@ -558,8 +778,8 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
total_recv_ = total_recv * n
total_send_ = total_send * n
call idx%sync()
@ -577,15 +797,15 @@ contains
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_double_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -606,9 +826,9 @@ contains
! Then gather for sending.
!
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
do i=1, num_neighbors
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
@ -629,10 +849,10 @@ contains
!
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_double_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -665,14 +885,14 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_double_swap_tag
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -708,9 +928,9 @@ contains
if (debug) write(*,*) me,' scatter'
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
do i=1, num_neighbors
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
@ -753,6 +973,166 @@ contains
return
end subroutine psi_dtran_vidx_multivect
end subroutine psi_dtran_baseline_multivect
subroutine psi_dtran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
real(psb_dpk_), intent(in) :: beta
class(psb_d_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_dtran_neighbor_topology_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
! ---------------------------------------------------------
! START phase: build topology (if needed), gather, post MPI
! ---------------------------------------------------------
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
! Lazy initialization: build the topology on first call
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Buffer layout:
! combuf(1 : total_send) = send area
! combuf(total_send+1 : total_send+total_recv) = recv area
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
! For transpose exchange: gather recv area first (we will send "recv" data)
if (debug) write(*,*) me,' nbr_tran_vect: gathering recv data,', topology_total_recv,' elems'
call y%gth(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
! Wait for device (important for GPU subclasses)
call y%device_wait()
! Post non-blocking neighborhood alltoallv swapping send/recv arrays
if (debug) write(*,*) me,' nbr_tran_vect: posting MPI_Ineighbor_alltoallv (swapped)'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer (recv_indexes gathered)
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_r_dpk_, &
& y%combuf(topology_total_recv + 1), & ! recv buffer (will contain send_indexes data)
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_r_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if ! do_start
! ---------------------------------------------------------
! WAIT phase: complete MPI, scatter received data
! ---------------------------------------------------------
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
! No matching start? Something is wrong
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Wait for the non-blocking collective to complete
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
! For transpose exchange: scatter the data that correspond to peers' send area
if (debug) write(*,*) me,' nbr_tran_vect: scattering send-index data,', topology_total_send,' elems'
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
! Clean up
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: done'
end if ! do_wait
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_dtran_neighbor_topology_multivect
end submodule psi_d_swaptran_impl

@ -100,22 +100,26 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info=psb_success_
name='psi_swap_datav'
info = psb_success_
name = 'psi_iswapdata_vect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -128,7 +132,7 @@ contains
endif
if (.not.psb_is_asb_desc(desc_a)) then
info=psb_err_invalid_cd_state_
info = psb_err_invalid_cd_state_
call psb_errpush(info,name)
goto 9999
endif
@ -139,14 +143,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swapdata(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_iswap_baseline_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_iswap_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -157,21 +192,8 @@ contains
end subroutine psi_iswapdata_vect
!
!
! Subroutine: psi_iswap_vidx_vect
! Data exchange among processes.
!
! Takes care of Y an exanspulated vector. Relies on the gather/scatter methods
! of vectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_iswap_vidx_vect(ctxt,flag,beta,y,idx, &
& totxch,totsnd,totrcv,info)
subroutine psi_iswap_baseline_vect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -181,13 +203,13 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me
@ -221,9 +243,9 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
call idx%sync()
totrcv_ = total_recv * n
totsnd_ = total_send * n
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -238,16 +260,16 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
rcv_pt = 1+pnti+psb_n_elem_recv_
prcid(i) = psb_get_mpi_rank(ctxt,proc_to_comm)
@ -265,13 +287,13 @@ contains
! Then gather for sending.
!
pnti = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
idx_pt = snd_pt
call y%gth(idx_pt,nesd,idx)
call y%gth(idx_pt,nesd,comm_indexes)
pnti = pnti + nerv + nesd + 3
end do
@ -289,10 +311,10 @@ contains
snd_pt = 1
rcv_pt = 1
p2ptag = psb_int_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -322,15 +344,15 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
p2ptag = psb_int_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -366,17 +388,17 @@ contains
pnti = 1
snd_pt = 1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(rcv_pt:rcv_pt+nerv-1)
call y%sct(rcv_pt,nerv,idx,beta)
call y%sct(rcv_pt,nerv,comm_indexes,beta)
pnti = pnti + nerv + nesd + 3
end do
!
@ -406,16 +428,174 @@ contains
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_iswap_vidx_vect
!
!
! Subroutine: psi_iswapdata_multivect
! Data exchange among processes.
!
! Takes care of Y an encaspulated multivector.
!
!
end subroutine psi_iswap_baseline_vect
subroutine psi_iswap_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_iswap_neighbor_topology_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
! ---------------------------------------------------------
! START phase: build topology (if needed), gather, post MPI
! ---------------------------------------------------------
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
! Lazy initialization: build the topology on first call
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Buffer layout:
! combuf(1 : total_send) = send area
! combuf(total_send+1 : total_send+total_recv) = recv area
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
! Gather send data into contiguous send buffer (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
! Wait for device (important for GPU subclasses)
call y%device_wait()
! Post non-blocking neighborhood alltoallv
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_r_dpk_, &
& y%combuf(topology_total_send + 1), & ! recv buffer
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_r_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if ! do_start
! ---------------------------------------------------------
! WAIT phase: complete MPI, scatter received data
! ---------------------------------------------------------
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
! No matching start? Something is wrong
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Wait for the non-blocking collective to complete
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
! Scatter received data to local vector positions (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
! Clean up
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: done'
end if ! do_wait
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_iswap_neighbor_topology_vect
module subroutine psi_iswapdata_multivect(flag,beta,y,desc_a,info,data)
#ifdef PSB_MPI_MOD
use mpi
@ -425,35 +605,39 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_multivect_type) :: y
integer(psb_ipk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_multivect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_iswapdata_multivect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
icomm = ctxt%get_mpic()
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.psb_is_asb_desc(desc_a)) then
info=psb_err_invalid_cd_state_
info = psb_err_invalid_cd_state_
call psb_errpush(info,name)
goto 9999
endif
@ -464,14 +648,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swapdata(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info=psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_iswap_baseline_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_iswap_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -482,21 +697,8 @@ contains
end subroutine psi_iswapdata_multivect
!
!
! Subroutine: psi_iswap_vidx_multivect
! Data exchange among processes.
!
! Takes care of Y an encapsulated multivector. Relies on the gather/scatter methods
! of multivectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_iswap_vidx_multivect(ctxt,flag,beta,y,idx, &
& totxch,totsnd,totrcv,info)
subroutine psi_iswap_baseline_multivect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -506,13 +708,13 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_multivect_type) :: y
integer(psb_ipk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
@ -526,8 +728,8 @@ contains
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
info = psb_success_
name = 'psi_iswap_baseline_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
@ -546,10 +748,10 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
totrcv_ = total_recv * n
totsnd_ = total_send * n
call idx%sync()
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -564,18 +766,18 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
prcid(i) = psb_get_mpi_rank(ctxt,proc_to_comm)
if ((nerv>0).and.(proc_to_comm /= me)) then
if (debug) write(*,*) me,'Posting receive from',prcid(i),rcv_pt
@ -595,11 +797,11 @@ contains
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+nerv+psb_n_elem_send_
call y%gth(idx_pt,snd_pt,nesd,idx)
call y%gth(idx_pt,snd_pt,nesd,comm_indexes)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -619,10 +821,10 @@ contains
snd_pt = totrcv_+1
rcv_pt = 1
p2ptag = psb_int_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
if ((nesd>0).and.(proc_to_comm /= me)) then
call mpi_isend(y%combuf(snd_pt),n*nesd,&
@ -651,17 +853,17 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
p2ptag = psb_int_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm /= me)then
if (nesd>0) then
call mpi_wait(y%comid(i,1),p2pstat,iret)
@ -696,15 +898,15 @@ contains
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(rcv_pt:rcv_pt+n*nerv-1)
call y%sct(idx_pt,rcv_pt,nerv,idx,beta)
call y%sct(idx_pt,rcv_pt,nerv,comm_indexes,beta)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -736,6 +938,173 @@ contains
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_iswap_vidx_multivect
end subroutine psi_iswap_baseline_multivect
subroutine psi_iswap_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_iswap_neighbor_topology_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
! ---------------------------------------------------------
! START phase: build topology (if needed), gather, post MPI
! ---------------------------------------------------------
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
! Lazy initialization: build the topology on first call
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Buffer layout:
! combuf(1 : total_send) = send area
! combuf(total_send+1 : total_send+total_recv) = recv area
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
! Gather send data into contiguous send buffer (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
! Wait for device (important for GPU subclasses)
call y%device_wait()
! Post non-blocking neighborhood alltoallv
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_r_dpk_, &
& y%combuf(topology_total_send + 1), & ! recv buffer
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_r_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if ! do_start
! ---------------------------------------------------------
! WAIT phase: complete MPI, scatter received data
! ---------------------------------------------------------
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
! No matching start? Something is wrong
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
! Wait for the non-blocking collective to complete
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
! Scatter received data to local vector positions (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
! Clean up
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: done'
end if ! do_wait
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_iswap_neighbor_topology_multivect
end submodule psi_i_swapdata_impl

@ -80,7 +80,6 @@
! beta - integer Choose overwrite or sum.
! y - type(psb_i_vect_type) The data area
! desc_a - type(psb_desc_type). The communication descriptor.
! our own internal allocation.
! info - integer. return code.
! data - integer which list is to be used to exchange data
! default psb_comm_halo_
@ -90,7 +89,7 @@
! psb_comm_mov_ use ovr_mst_idx
!
!
submodule (psi_i_comm_v_mod) psi_i_swaptran_impl
submodule (psi_i_comm_v_mod) psi_i_swaptran_impl
use psb_base_mod
contains
module subroutine psi_iswaptran_vect(flag,beta,y,desc_a,info,data)
@ -103,19 +102,21 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: y
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
character(len=20) :: name
info = psb_success_
name = 'psi_iswaptran_vect'
@ -142,14 +143,33 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
if (info /= psb_success_) then
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
! choose baseline or neighbor-topology algorithm
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start= iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if (baseline) then
call psi_itran_baseline_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else if (neighbor_a2av) then
call psi_itran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else
call psb_errpush(psb_err_mpi_error_,name,a_err='Incompatible flag settings')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -172,8 +192,8 @@ contains
!
!
!
module subroutine psi_itran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
subroutine psi_itran_baseline_vect(ctxt,flag,beta,y,comm_indexes,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -183,13 +203,13 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
@ -203,12 +223,12 @@ contains
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_swap_tran'
info = psb_success_
name = 'psi_itran_baseline_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
@ -222,10 +242,10 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
totrcv_ = total_recv * n
totsnd_ = total_send * n
call idx%sync()
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -240,17 +260,17 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
p2ptag = psb_int_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -270,14 +290,14 @@ contains
!
pnti = 1
snd_pt = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
idx_pt = rcv_pt
call y%gth(idx_pt,nerv,idx)
call y%gth(idx_pt,nerv,comm_indexes)
pnti = pnti + nerv + nesd + 3
end do
@ -296,10 +316,10 @@ contains
snd_pt = 1
rcv_pt = 1
p2ptag = psb_int_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -329,15 +349,15 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
p2ptag = psb_int_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -373,17 +393,17 @@ contains
pnti = 1
snd_pt = 1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(snd_pt:snd_pt+nesd-1)
call y%sct(snd_pt,nesd,idx,beta)
call y%sct(snd_pt,nesd,comm_indexes,beta)
pnti = pnti + nerv + nesd + 3
end do
!
@ -414,19 +434,149 @@ contains
return
end subroutine psi_itran_vidx_vect
end subroutine psi_itran_baseline_vect
subroutine psi_itran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_itran_neighbor_topology_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_tran: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_tran: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_tran: gathering send data,', topology_total_send,' elems'
! For tran we gather from recv_indexes (swap of roles)
call y%gth(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
call y%device_wait()
if (debug) write(*,*) me,' nbr_tran: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_r_dpk_, &
& y%combuf(topology_total_recv + 1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_r_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_tran: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran: scattering recv data,', topology_total_recv,' elems'
! For tran we scatter into send_indexes (swap of roles)
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_itran_neighbor_topology_vect
!
!
!
!
! Subroutine: psi_iswaptran_multivect
! Data exchange among processes.
!
! Takes care of Y an encaspulated multivector.
!
!
module subroutine psi_iswaptran_multivect(flag,beta,y,desc_a,info,data)
#ifdef PSB_MPI_MOD
@ -437,22 +587,27 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_multivect_type) :: y
integer(psb_ipk_) :: beta
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_multivect_type), intent(inout) :: y
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
info=psb_success_
name='psi_swap_tranv'
! local variables for communication
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_iswaptran_multivect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -476,14 +631,32 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
if (info /= psb_success_) then
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start= iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if (baseline) then
call psi_itran_baseline_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else if (neighbor_a2av) then
call psi_itran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else
call psb_errpush(psb_err_mpi_error_,name,a_err='Incompatible flag settings')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -494,21 +667,8 @@ contains
end subroutine psi_iswaptran_multivect
!
!
! Subroutine: psi_itran_vidx_multivect
! Data exchange among processes.
!
! Takes care of Y an encapsulated multivector. Relies on the gather/scatter methods
! of multivectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_itran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
module subroutine psi_itran_baseline_multivect(ctxt,flag,beta,y,comm_indexes,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -518,13 +678,13 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_multivect_type) :: y
integer(psb_ipk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
@ -538,12 +698,12 @@ contains
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_swap_tran'
info = psb_success_
name = 'psi_itran_baseline_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
@ -558,10 +718,10 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
totrcv_ = total_recv * n
totsnd_ = total_send * n
call idx%sync()
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -576,19 +736,19 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
p2ptag = psb_int_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
prcid(i) = psb_get_mpi_rank(ctxt,proc_to_comm)
if ((nesd>0).and.(proc_to_comm /= me)) then
if (debug) write(*,*) me,'Posting receive from',prcid(i),snd_pt
@ -608,11 +768,11 @@ contains
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
call y%gth(idx_pt,rcv_pt,nerv,idx)
call y%gth(idx_pt,rcv_pt,nerv,comm_indexes)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -632,10 +792,10 @@ contains
snd_pt = totrcv_+1
rcv_pt = 1
p2ptag = psb_int_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
if ((nerv>0).and.(proc_to_comm /= me)) then
@ -665,17 +825,17 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
p2ptag = psb_int_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm /= me)then
if (nerv>0) then
call mpi_wait(y%comid(i,1),p2pstat,iret)
@ -710,15 +870,15 @@ contains
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+nerv+psb_n_elem_send_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(snd_pt:snd_pt+n*nesd-1)
call y%sct(idx_pt,snd_pt,nesd,idx,beta)
call y%sct(idx_pt,snd_pt,nesd,comm_indexes,beta)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -753,6 +913,145 @@ contains
return
end subroutine psi_itran_vidx_multivect
end subroutine psi_itran_baseline_multivect
subroutine psi_itran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_itran_neighbor_topology_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_tran_mv: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_tran_mv: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_tran_mv: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
call y%device_wait()
if (debug) write(*,*) me,' nbr_tran_mv: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_r_dpk_, &
& y%combuf(topology_total_recv + 1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_r_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_tran_mv: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_mv: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_mv: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_itran_neighbor_topology_multivect
end submodule psi_i_swaptran_impl

@ -87,7 +87,7 @@
!
!
!
submodule (psi_l_comm_v_mod) psi_l_swapdata_impl
submodule (psi_l_comm_v_mod) psi_l_swapdata_impl
use psb_base_mod
contains
subroutine psi_lswapdata_vect(flag,beta,y,desc_a,info,data)
@ -100,22 +100,30 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_vect_type) :: y
integer(psb_lpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
! Decide which communication scheme to use: baseline (point-to-point / alltoallv)
! or neighbor alltoallv topology (MPI >= 3.0). Follow conventions used in other
! swapdata implementations: baseline when any of swap_mpi/swap_sync/swap_send/swap_recv
! is set; neighbor topology when swap_start or swap_wait is set.
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_lswapdata_vect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -139,14 +147,40 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
if (info /= psb_success_) then
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swapdata(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_lswap_baseline_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else if (neighbor_a2av) then
call psi_lswap_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -157,21 +191,8 @@ contains
end subroutine psi_lswapdata_vect
!
!
! Subroutine: psi_lswap_vidx_vect
! Data exchange among processes.
!
! Takes care of Y an exanspulated vector. Relies on the gather/scatter methods
! of vectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_lswap_vidx_vect(ctxt,flag,beta,y,idx, &
& totxch,totsnd,totrcv,info)
subroutine psi_lswap_baseline_vect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -181,13 +202,13 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_vect_type) :: y
integer(psb_lpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me
@ -202,8 +223,8 @@ contains
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
info = psb_success_
name = 'psi_lswap_baseline_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
@ -221,9 +242,9 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
call idx%sync()
totrcv_ = total_recv * n
totsnd_ = total_send * n
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -238,16 +259,16 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
rcv_pt = 1+pnti+psb_n_elem_recv_
prcid(i) = psb_get_mpi_rank(ctxt,proc_to_comm)
@ -265,13 +286,13 @@ contains
! Then gather for sending.
!
pnti = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
idx_pt = snd_pt
call y%gth(idx_pt,nesd,idx)
call y%gth(idx_pt,nesd,comm_indexes)
pnti = pnti + nerv + nesd + 3
end do
@ -289,10 +310,10 @@ contains
snd_pt = 1
rcv_pt = 1
p2ptag = psb_long_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -322,15 +343,15 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
p2ptag = psb_long_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -366,17 +387,17 @@ contains
pnti = 1
snd_pt = 1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(rcv_pt:rcv_pt+nerv-1)
call y%sct(rcv_pt,nerv,idx,beta)
call y%sct(rcv_pt,nerv,comm_indexes,beta)
pnti = pnti + nerv + nesd + 3
end do
!
@ -406,7 +427,7 @@ contains
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_lswap_vidx_vect
end subroutine psi_lswap_baseline_vect
!
!
@ -425,22 +446,27 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_multivect_type) :: y
integer(psb_lpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_multivect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
! Decide which communication scheme to use: baseline (point-to-point / alltoallv)
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_lswapdata_multivect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -464,14 +490,39 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
if (info /= psb_success_) then
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swapdata(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_lswap_baseline_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else if (neighbor_a2av) then
call psi_lswap_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -482,21 +533,8 @@ contains
end subroutine psi_lswapdata_multivect
!
!
! Subroutine: psi_lswap_vidx_multivect
! Data exchange among processes.
!
! Takes care of Y an encapsulated multivector. Relies on the gather/scatter methods
! of multivectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_lswap_vidx_multivect(ctxt,flag,beta,y,idx, &
& totxch,totsnd,totrcv,info)
subroutine psi_lswap_baseline_multivect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -509,10 +547,10 @@ contains
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_multivect_type) :: y
integer(psb_lpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
class(psb_l_base_multivect_type), intent(inout) :: y
integer(psb_lpk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
@ -546,10 +584,10 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
totrcv_ = total_recv * n
totsnd_ = total_send * n
call idx%sync()
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -564,18 +602,18 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
prcid(i) = psb_get_mpi_rank(ctxt,proc_to_comm)
if ((nerv>0).and.(proc_to_comm /= me)) then
if (debug) write(*,*) me,'Posting receive from',prcid(i),rcv_pt
@ -595,11 +633,11 @@ contains
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+nerv+psb_n_elem_send_
call y%gth(idx_pt,snd_pt,nesd,idx)
call y%gth(idx_pt,snd_pt,nesd,comm_indexes)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -619,10 +657,10 @@ contains
snd_pt = totrcv_+1
rcv_pt = 1
p2ptag = psb_long_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
if ((nesd>0).and.(proc_to_comm /= me)) then
call mpi_isend(y%combuf(snd_pt),n*nesd,&
@ -651,17 +689,17 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
p2ptag = psb_long_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm /= me)then
if (nesd>0) then
call mpi_wait(y%comid(i,1),p2pstat,iret)
@ -696,15 +734,15 @@ contains
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(rcv_pt:rcv_pt+n*nerv-1)
call y%sct(idx_pt,rcv_pt,nerv,idx,beta)
call y%sct(idx_pt,rcv_pt,nerv,comm_indexes,beta)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -736,6 +774,281 @@ contains
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_lswap_vidx_multivect
end subroutine psi_lswap_baseline_multivect
subroutine psi_lswap_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_lswap_neighbor_topology_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_swap: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_swap: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_swap: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
call y%device_wait()
if (debug) write(*,*) me,' nbr_swap: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_lpk_, &
& y%combuf(topology_total_send + 1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_lpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_swap: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_swap: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_swap: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_lswap_neighbor_topology_vect
subroutine psi_lswap_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_multivect_type), intent(inout) :: y
integer(psb_lpk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_lswap_neighbor_topology_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_mv: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_mv: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_mv: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
call y%device_wait()
if (debug) write(*,*) me,' nbr_mv: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_lpk_, &
& y%combuf(topology_total_send + 1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_lpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_mv: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_mv: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_mv: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_lswap_neighbor_topology_multivect
end submodule psi_l_swapdata_impl

@ -102,22 +102,26 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_vect_type) :: y
integer(psb_lpk_) :: beta
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_vect_type), intent(inout) :: y
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
info=psb_success_
name='psi_swap_tranv'
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_lswaptran_vect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -130,7 +134,7 @@ contains
endif
if (.not.psb_is_asb_desc(desc_a)) then
info=psb_err_invalid_cd_state_
info = psb_err_invalid_cd_state_
call psb_errpush(info,name)
goto 9999
endif
@ -141,14 +145,39 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
if (info /= psb_success_) then
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_ltran_baseline_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else if (neighbor_a2av) then
call psi_ltran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -158,21 +187,9 @@ contains
return
end subroutine psi_lswaptran_vect
!
!
! Subroutine: psi_ltran_vidx_vect
! Data exchange among processes.
!
! Takes care of Y an encapsulated vector. Relies on the gather/scatter methods
! of vectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_ltran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
subroutine psi_ltran_baseline_vect(ctxt,flag,beta,y,comm_indexes,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -182,32 +199,32 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_vect_type) :: y
integer(psb_lpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
integer(psb_mpk_) :: icomm
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
integer(psb_ipk_) :: err_act, i, idx_pt, total_send_, total_recv_,&
& snd_pt, rcv_pt, pnti
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_swap_tran'
info = psb_success_
name = 'psi_ltran_baseline_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
@ -221,10 +238,10 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
total_recv_ = total_recv * n
total_send_ = total_send * n
call idx%sync()
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -239,17 +256,17 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
p2ptag = psb_long_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -269,14 +286,14 @@ contains
!
pnti = 1
snd_pt = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
idx_pt = rcv_pt
call y%gth(idx_pt,nerv,idx)
call y%gth(idx_pt,nerv,comm_indexes)
pnti = pnti + nerv + nesd + 3
end do
@ -295,10 +312,10 @@ contains
snd_pt = 1
rcv_pt = 1
p2ptag = psb_long_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -328,15 +345,15 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
p2ptag = psb_long_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -372,17 +389,17 @@ contains
pnti = 1
snd_pt = 1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(snd_pt:snd_pt+nesd-1)
call y%sct(snd_pt,nesd,idx,beta)
call y%sct(snd_pt,nesd,comm_indexes,beta)
pnti = pnti + nerv + nesd + 3
end do
!
@ -413,21 +430,10 @@ contains
return
end subroutine psi_ltran_vidx_vect
end subroutine psi_ltran_baseline_vect
!
!
!
!
! Subroutine: psi_lswaptran_multivect
! Data exchange among processes.
!
! Takes care of Y an encaspulated multivector.
!
!
module subroutine psi_lswaptran_multivect(flag,beta,y,desc_a,info,data)
subroutine psi_lswaptran_multivect(flag,beta,y,desc_a,info,data)
#ifdef PSB_MPI_MOD
use mpi
#endif
@ -436,22 +442,25 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_multivect_type) :: y
integer(psb_lpk_) :: beta
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_multivect_type), intent(inout) :: y
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
info=psb_success_
name='psi_swap_tranv'
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_lswaptran_multivect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -475,14 +484,39 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
if (info /= psb_success_) then
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_ltran_baseline_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else if (neighbor_a2av) then
call psi_ltran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) goto 9999
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -493,21 +527,8 @@ contains
end subroutine psi_lswaptran_multivect
!
!
! Subroutine: psi_ltran_vidx_multivect
! Data exchange among processes.
!
! Takes care of Y an encapsulated multivector. Relies on the gather/scatter methods
! of multivectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_ltran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
subroutine psi_ltran_baseline_multivect(ctxt,flag,beta,y,comm_indexes,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -517,28 +538,28 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_multivect_type) :: y
integer(psb_lpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
integer(psb_mpk_) :: icomm
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
integer(psb_ipk_) :: err_act, i, idx_pt, total_send_, total_recv_,&
& snd_pt, rcv_pt, pnti
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_swap_tran'
info = psb_success_
name = 'psi_ltran_baseline_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
@ -557,10 +578,10 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
total_recv_ = total_recv * n
total_send_ = total_send * n
call idx%sync()
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -575,19 +596,19 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_long_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
prcid(i) = psb_get_mpi_rank(ctxt,proc_to_comm)
if ((nesd>0).and.(proc_to_comm /= me)) then
if (debug) write(*,*) me,'Posting receive from',prcid(i),snd_pt
@ -605,13 +626,13 @@ contains
! Then gather for sending.
!
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
call y%gth(idx_pt,rcv_pt,nerv,idx)
call y%gth(idx_pt,rcv_pt,nerv,comm_indexes)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -628,13 +649,13 @@ contains
!
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_long_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
if ((nerv>0).and.(proc_to_comm /= me)) then
@ -664,17 +685,17 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_long_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm /= me)then
if (nerv>0) then
call mpi_wait(y%comid(i,1),p2pstat,iret)
@ -707,17 +728,17 @@ contains
if (debug) write(*,*) me,' scatter'
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+nerv+psb_n_elem_send_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(snd_pt:snd_pt+n*nesd-1)
call y%sct(idx_pt,snd_pt,nesd,idx,beta)
call y%sct(idx_pt,snd_pt,nesd,comm_indexes,beta)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -752,6 +773,282 @@ contains
return
end subroutine psi_ltran_vidx_multivect
end subroutine psi_ltran_baseline_multivect
subroutine psi_ltran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_vect_type) :: y
integer(psb_lpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_ltran_neighbor_topology_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_tran: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_tran: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_tran: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
call y%device_wait()
if (debug) write(*,*) me,' nbr_tran: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_lpk_, &
& y%combuf(topology_total_recv + 1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_lpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_tran: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_ltran_neighbor_topology_vect
subroutine psi_ltran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_multivect_type) :: y
integer(psb_lpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_ltran_neighbor_topology_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_mv: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_mv: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_mv: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
call y%device_wait()
if (debug) write(*,*) me,' nbr_mv: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_lpk_, &
& y%combuf(topology_total_send + 1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_lpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_mv: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_mv: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_mv: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_ltran_neighbor_topology_multivect
end submodule psi_l_swaptran_impl

@ -103,26 +103,26 @@ contains
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_s_base_vect_type), intent(inout) :: y
real(psb_spk_), intent(in) :: beta
class(psb_s_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
! error handling variables
integer(psb_ipk_) :: err_act
integer(psb_mpk_) :: me, np
character(len=30) :: name
integer(psb_ipk_) :: err_act
integer(psb_mpk_) :: me, np
character(len=30) :: name
info = psb_success_

@ -80,7 +80,6 @@
! beta - real Choose overwrite or sum.
! y - type(psb_s_vect_type) The data area
! desc_a - type(psb_desc_type). The communication descriptor.
! our own internal allocation.
! info - integer. return code.
! data - integer which list is to be used to exchange data
! default psb_comm_halo_
@ -111,11 +110,15 @@ contains
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_sswaptran_vect'
@ -131,7 +134,7 @@ contains
endif
if (.not.psb_is_asb_desc(desc_a)) then
info=psb_err_invalid_cd_state_
info = psb_err_invalid_cd_state_
call psb_errpush(info,name)
goto 9999
endif
@ -142,14 +145,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_stran_baseline_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_stran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -159,21 +193,9 @@ contains
return
end subroutine psi_sswaptran_vect
!
!
! Subroutine: psi_stran_vidx_vect
! Data exchange among processes.
!
! Takes care of Y an encapsulated vector. Relies on the gather/scatter methods
! of vectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_stran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
module subroutine psi_stran_baseline_vect(ctxt,flag,beta,y,comm_indexes,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -185,18 +207,18 @@ contains
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
real(psb_spk_), intent(in) :: beta
class(psb_s_base_vect_type), intent(inout) :: y
real(psb_spk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
integer(psb_mpk_) :: icomm
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
integer(psb_ipk_) :: err_act, i, idx_pt, total_send_, total_recv_,&
& snd_pt, rcv_pt, pnti
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
@ -204,7 +226,7 @@ contains
character(len=20) :: name
info=psb_success_
name='psi_swap_tran'
name='psi_stran_baseline_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
@ -222,10 +244,10 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
total_recv_ = total_recv * n
total_send_ = total_send * n
call idx%sync()
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -240,17 +262,17 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
p2ptag = psb_real_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -270,14 +292,14 @@ contains
!
pnti = 1
snd_pt = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
idx_pt = rcv_pt
call y%gth(idx_pt,nerv,idx)
call y%gth(idx_pt,nerv,comm_indexes)
pnti = pnti + nerv + nesd + 3
end do
@ -296,10 +318,10 @@ contains
snd_pt = 1
rcv_pt = 1
p2ptag = psb_real_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -329,15 +351,15 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
p2ptag = psb_real_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -373,17 +395,17 @@ contains
pnti = 1
snd_pt = 1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(snd_pt:snd_pt+nesd-1)
call y%sct(snd_pt,nesd,idx,beta)
call y%sct(snd_pt,nesd,comm_indexes,beta)
pnti = pnti + nerv + nesd + 3
end do
!
@ -414,7 +436,155 @@ contains
return
end subroutine psi_stran_vidx_vect
end subroutine psi_stran_baseline_vect
subroutine psi_stran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
real(psb_spk_), intent(in) :: beta
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_stran_neighbor_topology_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_tran_vect: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_tran_vect: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_tran_vect: gathering (recv) data,', topology_total_recv,' elems'
call y%gth(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
call y%device_wait()
if (debug) write(*,*) me,' nbr_tran_vect: posting MPI_Ineighbor_alltoallv (swapped)'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_r_spk_, &
& y%combuf(topology_total_recv + 1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_r_spk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_tran_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: scattering (send) data,', topology_total_send,' elems'
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_stran_neighbor_topology_vect
!
@ -445,11 +615,15 @@ contains
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_sswaptran_multivect'
@ -476,14 +650,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_stran_baseline_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_stran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -494,21 +699,8 @@ contains
end subroutine psi_sswaptran_multivect
!
!
! Subroutine: psi_stran_vidx_multivect
! Data exchange among processes.
!
! Takes care of Y an encapsulated multivector. Relies on the gather/scatter methods
! of multivectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_stran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
subroutine psi_stran_baseline_multivect(ctxt,flag,beta,y,comm_indexes,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -522,8 +714,8 @@ contains
integer(psb_ipk_), intent(in) :: flag
real(psb_spk_), intent(in) :: beta
class(psb_s_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd,totrcv
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
@ -532,7 +724,7 @@ contains
integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
integer(psb_mpk_) :: icomm
integer(psb_mpk_), allocatable :: prcid(:)
integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,&
integer(psb_ipk_) :: err_act, i, idx_pt, total_send_, total_recv_,&
& snd_pt, rcv_pt, pnti
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
@ -559,10 +751,10 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
total_recv_ = total_recv * n
total_send_ = total_send * n
call idx%sync()
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -577,19 +769,19 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_real_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
prcid(i) = psb_get_mpi_rank(ctxt,proc_to_comm)
if ((nesd>0).and.(proc_to_comm /= me)) then
if (debug) write(*,*) me,'Posting receive from',prcid(i),snd_pt
@ -607,13 +799,13 @@ contains
! Then gather for sending.
!
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
call y%gth(idx_pt,rcv_pt,nerv,idx)
call y%gth(idx_pt,rcv_pt,nerv,comm_indexes)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -630,13 +822,13 @@ contains
!
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_real_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
if ((nerv>0).and.(proc_to_comm /= me)) then
@ -666,17 +858,17 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
p2ptag = psb_real_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm /= me)then
if (nerv>0) then
call mpi_wait(y%comid(i,1),p2pstat,iret)
@ -709,17 +901,17 @@ contains
if (debug) write(*,*) me,' scatter'
pnti = 1
snd_pt = totrcv_+1
snd_pt = total_recv_+1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+nerv+psb_n_elem_send_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(snd_pt:snd_pt+n*nesd-1)
call y%sct(idx_pt,snd_pt,nesd,idx,beta)
call y%sct(idx_pt,snd_pt,nesd,comm_indexes,beta)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -754,6 +946,150 @@ contains
return
end subroutine psi_stran_vidx_multivect
end subroutine psi_stran_baseline_multivect
subroutine psi_stran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
real(psb_spk_), intent(in) :: beta
class(psb_s_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_stran_neighbor_topology_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_tran_vect: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_tran_vect: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_tran_vect: gathering (recv) data,', topology_total_recv,' elems'
call y%gth(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
call y%device_wait()
if (debug) write(*,*) me,' nbr_tran_vect: posting MPI_Ineighbor_alltoallv (swapped)'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_r_spk_, &
& y%combuf(topology_total_recv + 1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_r_spk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_tran_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: scattering (send) data,', topology_total_send,' elems'
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_stran_neighbor_topology_multivect
end submodule psi_s_swaptran_impl

@ -88,9 +88,10 @@
!
!
submodule (psi_z_comm_v_mod) psi_z_swapdata_impl
use psb_desc_const_mod, only: psb_swap_start_, psb_swap_wait_
use psb_base_mod
contains
subroutine psi_zswapdata_vect(flag,beta,y,desc_a,info,data)
module subroutine psi_zswapdata_vect(flag,beta,y,desc_a,info,data)
#ifdef PSB_MPI_MOD
use mpi
@ -100,34 +101,37 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
class(psb_z_base_vect_type), intent(inout) :: y
complex(psb_dpk_), intent(in) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=30) :: name
info=psb_success_
name='psi_swap_datav'
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_zswapdata_vect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
icomm = ctxt%get_mpic()
call psb_info(ctxt,me,np)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.psb_is_asb_desc(desc_a)) then
if (.not.psb_is_asb_desc(desc_a)) then
info=psb_err_invalid_cd_state_
call psb_errpush(info,name)
goto 9999
@ -139,14 +143,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='desc_a%get_list_p')
goto 9999
end if
call psi_swapdata(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_zswap_baseline_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_zswap_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -157,6 +192,163 @@ contains
end subroutine psi_zswapdata_vect
subroutine psi_zswap_baseline_vect(ctxt,flag,beta,y,idx, &
& num_neighbors,total_send,total_recv,info)
implicit none
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
call psi_zswap_vidx_vect(ctxt,flag,beta,y,idx,num_neighbors,total_send,total_recv,info)
end subroutine psi_zswap_baseline_vect
subroutine psi_zswap_neighbor_topology_vect(ctxt,flag,beta,y,idx, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type), intent(inout) :: y
complex(psb_dpk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
! locals
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_zswap_nbr_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call idx%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology'
call y%neighbor_topology%init(idx%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
call y%device_wait()
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_c_dpk_, &
& y%combuf(topology_total_send + 1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_c_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_zswap_neighbor_topology_vect
!
!
! Subroutine: psi_zswap_vidx_vect
@ -425,34 +617,36 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_multivect_type) :: y
complex(psb_dpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
class(psb_z_base_multivect_type), intent(inout) :: y
complex(psb_dpk_), intent(in) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, data_, err_act
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=30) :: name
info=psb_success_
name='psi_swap_datav'
info = psb_success_
name = 'psi_zswapdata_multivect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
icomm = ctxt%get_mpic()
call psb_info(ctxt,me,np)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.psb_is_asb_desc(desc_a)) then
if (.not.psb_is_asb_desc(desc_a)) then
info=psb_err_invalid_cd_state_
call psb_errpush(info,name)
goto 9999
@ -464,14 +658,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='desc_a%get_list_p')
goto 9999
end if
call psi_swapdata(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info=psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_zswap_baseline_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_zswap_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -482,6 +707,163 @@ contains
end subroutine psi_zswapdata_multivect
subroutine psi_zswap_baseline_multivect(ctxt,flag,beta,y,idx, &
& num_neighbors,total_send,total_recv,info)
implicit none
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
call psi_zswap_vidx_multivect(ctxt,flag,beta,y,idx,num_neighbors,total_send,total_recv,info)
end subroutine psi_zswap_baseline_multivect
subroutine psi_zswap_neighbor_topology_multivect(ctxt,flag,beta,y,idx, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_multivect_type), intent(inout) :: y
complex(psb_dpk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
! locals
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_zswap_neighbor_topology_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call idx%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology'
call y%neighbor_topology%init(idx%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(1:topology_total_send))
call y%device_wait()
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_c_dpk_, &
& y%combuf(topology_total_send + 1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_c_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_vect: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_zswap_neighbor_topology_multivect
!
!
! Subroutine: psi_zswap_vidx_multivect

@ -102,22 +102,27 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_vect_type), intent(inout) :: y
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info=psb_success_
name='psi_swap_tranv'
info = psb_success_
name = 'psi_zswaptran_vect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -141,14 +146,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_ztran_baseline_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_ztran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psb_erractionrestore(err_act)
return
@ -158,21 +194,8 @@ contains
return
end subroutine psi_zswaptran_vect
!
!
! Subroutine: psi_ztran_vidx_vect
! Data exchange among processes.
!
! Takes care of Y an encapsulated vector. Relies on the gather/scatter methods
! of vectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_ztran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
subroutine psi_ztran_baseline_vect(ctxt,flag,beta,y,comm_indexes,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -182,13 +205,13 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
@ -202,8 +225,8 @@ contains
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_swap_tran'
info = psb_success_
name = 'psi_ztran_baseline_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
@ -221,10 +244,10 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
totrcv_ = total_recv * n
totsnd_ = total_send * n
call idx%sync()
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -239,17 +262,17 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
p2ptag = psb_dcomplex_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -269,14 +292,14 @@ contains
!
pnti = 1
snd_pt = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
idx_pt = rcv_pt
call y%gth(idx_pt,nerv,idx)
call y%gth(idx_pt,nerv,comm_indexes)
pnti = pnti + nerv + nesd + 3
end do
@ -295,10 +318,10 @@ contains
snd_pt = 1
rcv_pt = 1
p2ptag = psb_dcomplex_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -328,15 +351,15 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
p2ptag = psb_dcomplex_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
@ -372,17 +395,17 @@ contains
pnti = 1
snd_pt = 1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(snd_pt:snd_pt+nesd-1)
call y%sct(snd_pt,nesd,idx,beta)
call y%sct(snd_pt,nesd,comm_indexes,beta)
pnti = pnti + nerv + nesd + 3
end do
!
@ -413,19 +436,151 @@ contains
return
end subroutine psi_ztran_vidx_vect
end subroutine psi_ztran_baseline_vect
subroutine psi_ztran_neighbor_topology_vect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_ztran_neighbor_topology_vect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_tran_vect: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_tran_vect: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_tran_vect: gathering (recv) data,', topology_total_recv,' elems'
call y%gth(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
call y%device_wait()
if (debug) write(*,*) me,' nbr_tran_vect: posting MPI_Ineighbor_alltoallv (swapped)'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_c_dpk_, &
& y%combuf(topology_total_recv + 1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_c_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_tran_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: scattering (send) data,', topology_total_send,' elems'
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_ztran_neighbor_topology_vect
!
!
!
!
! Subroutine: psi_zswaptran_multivect
! Data exchange among processes.
!
! Takes care of Y an encaspulated multivector.
!
!
module subroutine psi_zswaptran_multivect(flag,beta,y,desc_a,info,data)
#ifdef PSB_MPI_MOD
@ -436,22 +591,26 @@ contains
include 'mpif.h'
#endif
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_multivect_type) :: y
complex(psb_dpk_) :: beta
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_multivect_type), intent(inout) :: y
type(psb_desc_type),target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
character(len=20) :: name
info=psb_success_
name='psi_swap_tranv'
type(psb_ctxt_type) :: ctxt
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np, me, total_send, total_recv, num_neighbors, err_act, data_
class(psb_i_base_vect_type), pointer :: comm_indexes
character(len=20) :: name
! local variables used to detect the communication scheme
logical :: swap_mpi, swap_sync, swap_send, swap_recv, swap_start, swap_wait
logical :: baseline, neighbor_a2av
info = psb_success_
name = 'psi_zswaptran_multivect'
call psb_erractionsave(err_act)
ctxt = desc_a%get_context()
@ -475,14 +634,45 @@ contains
data_ = psb_comm_halo_
end if
call desc_a%get_list_p(data_,d_vidx,totxch,idxr,idxs,info)
call desc_a%get_list_p(data_,comm_indexes,num_neighbors,total_recv,total_send,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
swap_start = iand(flag,psb_swap_start_) /= 0
swap_wait = iand(flag,psb_swap_wait_) /= 0
baseline = swap_mpi .or. swap_send .or. swap_recv .or. swap_sync
neighbor_a2av = swap_start .or. swap_wait
if( (baseline.eqv..true.).and.(neighbor_a2av.eqv..true.) ) then
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: both baseline and neighbor_a2av are true')
goto 9999
end if
if (baseline) then
call psi_ztran_baseline_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='baseline swap')
goto 9999
end if
else if (neighbor_a2av) then
call psi_ztran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes,num_neighbors,total_send,total_recv,info)
if (info /= psb_success_) then
call psb_errpush(info,name,a_err='neighbor a2av swap')
goto 9999
end if
else
info = psb_err_mpi_error_
call psb_errpush(info,name,a_err='Incompatible flag settings: neither baseline nor neighbor_a2av is true')
goto 9999
end if
call psi_swaptran(ctxt,flag,beta,y,d_vidx,totxch,idxs,idxr,info)
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
return
@ -493,21 +683,8 @@ contains
end subroutine psi_zswaptran_multivect
!
!
! Subroutine: psi_ztran_vidx_multivect
! Data exchange among processes.
!
! Takes care of Y an encapsulated multivector. Relies on the gather/scatter methods
! of multivectors.
!
! The real workhorse: the outer routine will only choose the index list
! this one takes the index list and does the actual exchange.
!
!
!
module subroutine psi_ztran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
module subroutine psi_ztran_baseline_multivect(ctxt,flag,beta,y,comm_indexes,&
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
@ -517,13 +694,13 @@ contains
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_multivect_type) :: y
complex(psb_dpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send, total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: np, me, nesd, nerv, n
@ -537,8 +714,8 @@ contains
logical, parameter :: usersend=.false., debug=.false.
character(len=20) :: name
info=psb_success_
name='psi_swap_tran'
info = psb_success_
name = 'psi_ztran_baseline_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
@ -557,10 +734,10 @@ contains
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
totrcv_ = totrcv * n
totsnd_ = totsnd * n
totrcv_ = total_recv * n
totsnd_ = total_send * n
call idx%sync()
call comm_indexes%sync()
if (debug) write(*,*) me,'Internal buffer'
if (do_send) then
@ -575,19 +752,19 @@ contains
end if
end if
if (debug) write(*,*) me,'do_send start'
call y%new_buffer(ione*size(idx%v),info)
call y%new_comid(totxch,info)
call y%new_buffer(ione*size(comm_indexes%v),info)
call y%new_comid(num_neighbors,info)
y%comid = mpi_request_null
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
! First I post all the non blocking receives
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
p2ptag = psb_dcomplex_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
prcid(i) = psb_get_mpi_rank(ctxt,proc_to_comm)
if ((nesd>0).and.(proc_to_comm /= me)) then
if (debug) write(*,*) me,'Posting receive from',prcid(i),snd_pt
@ -607,11 +784,11 @@ contains
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
do i=1, totxch
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
call y%gth(idx_pt,rcv_pt,nerv,idx)
call y%gth(idx_pt,rcv_pt,nerv,comm_indexes)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -631,10 +808,10 @@ contains
snd_pt = totrcv_+1
rcv_pt = 1
p2ptag = psb_dcomplex_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+psb_n_elem_recv_
if ((nerv>0).and.(proc_to_comm /= me)) then
@ -664,17 +841,17 @@ contains
call psb_errpush(info,name,m_err=(/-2/))
goto 9999
end if
call psb_realloc(totxch,prcid,info)
call psb_realloc(num_neighbors,prcid,info)
if (debug) write(*,*) me,' wait'
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
p2ptag = psb_dcomplex_swap_tag
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm /= me)then
if (nerv>0) then
call mpi_wait(y%comid(i,1),p2pstat,iret)
@ -709,15 +886,15 @@ contains
pnti = 1
snd_pt = totrcv_+1
rcv_pt = 1
do i=1, totxch
proc_to_comm = idx%v(pnti+psb_proc_id_)
nerv = idx%v(pnti+psb_n_elem_recv_)
nesd = idx%v(pnti+nerv+psb_n_elem_send_)
do i=1, num_neighbors
proc_to_comm = comm_indexes%v(pnti+psb_proc_id_)
nerv = comm_indexes%v(pnti+psb_n_elem_recv_)
nesd = comm_indexes%v(pnti+nerv+psb_n_elem_send_)
idx_pt = 1+pnti+nerv+psb_n_elem_send_
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(snd_pt:snd_pt+n*nesd-1)
call y%sct(idx_pt,snd_pt,nesd,idx,beta)
call y%sct(idx_pt,snd_pt,nesd,comm_indexes,beta)
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
@ -752,6 +929,149 @@ contains
return
end subroutine psi_ztran_vidx_multivect
end subroutine psi_ztran_baseline_multivect
subroutine psi_ztran_neighbor_topology_multivect(ctxt,flag,beta,y,comm_indexes, &
& num_neighbors,total_send,total_recv,info)
#ifdef PSB_MPI_MOD
use mpi
#endif
implicit none
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: comm_indexes
integer(psb_ipk_), intent(in) :: num_neighbors,total_send,total_recv
integer(psb_ipk_), intent(out) :: info
! locals
integer(psb_mpk_) :: icomm
integer(psb_mpk_) :: np, me
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size)
integer(psb_ipk_) :: err_act, topology_total_send, topology_total_recv, buffer_size
logical :: do_start, do_wait
logical, parameter :: debug = .false.
character(len=30) :: name
info = psb_success_
name = 'psi_ztran_neighbor_topology_multivect'
call psb_erractionsave(err_act)
call psb_info(ctxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
icomm = ctxt%get_mpic()
do_start = iand(flag,psb_swap_start_) /= 0
do_wait = iand(flag,psb_swap_wait_) /= 0
call comm_indexes%sync()
if (do_start) then
if(debug) write(*,*) me,' nbr_tran_vect: starting data exchange'
if (.not. y%neighbor_topology%is_initialized) then
if (debug) write(*,*) me,' nbr_tran_vect: building topology'
call y%neighbor_topology%init(comm_indexes%v, num_neighbors, total_send, total_recv, &
& ctxt, icomm, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, &
& a_err='neighbor_topology_init')
goto 9999
end if
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
buffer_size = topology_total_send + topology_total_recv
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
y%communication_handle = mpi_request_null
if (debug) write(*,*) me,' nbr_tran_vect: gathering (recv) data,', topology_total_recv,' elems'
call y%gth(int(topology_total_recv,psb_mpk_), &
& y%neighbor_topology%recv_indexes, &
& y%combuf(1:topology_total_recv))
call y%device_wait()
if (debug) write(*,*) me,' nbr_tran_vect: posting MPI_Ineighbor_alltoallv (swapped)'
call mpi_ineighbor_alltoallv( &
& y%combuf(1), &
& y%neighbor_topology%recv_counts, &
& y%neighbor_topology%recv_displs, &
& psb_mpi_c_dpk_, &
& y%combuf(topology_total_recv + 1), &
& y%neighbor_topology%send_counts, &
& y%neighbor_topology%send_displs, &
& psb_mpi_c_dpk_, &
& y%neighbor_topology%graph_comm, &
& y%communication_handle, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
end if
if (do_wait) then
if (y%communication_handle == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
topology_total_send = y%neighbor_topology%total_send
topology_total_recv = y%neighbor_topology%total_recv
if (debug) write(*,*) me,' nbr_tran_vect: waiting on MPI request'
call mpi_wait(y%communication_handle, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: scattering (send) data,', topology_total_send,' elems'
call y%sct(int(topology_total_send,psb_mpk_), &
& y%neighbor_topology%send_indexes, &
& y%combuf(topology_total_recv+1:topology_total_recv+topology_total_send), &
& beta)
y%communication_handle = mpi_request_null
call y%device_wait()
call y%maybe_free_buffer(info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
if (debug) write(*,*) me,' nbr_tran_vect: done'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psi_ztran_neighbor_topology_multivect
end submodule psi_z_swaptran_impl

@ -264,7 +264,7 @@ serial/psb_c_base_mat_mod.o: serial/psb_c_base_vect_mod.o
serial/psb_z_base_mat_mod.o: serial/psb_z_base_vect_mod.o
serial/psb_l_base_vect_mod.o: serial/psb_i_base_vect_mod.o
serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o comm/psb_neighbor_topology_mod.o
serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: auxil/psi_serial_mod.o psb_realloc_mod.o
serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: auxil/psi_serial_mod.o psb_realloc_mod.o comm/psb_neighbor_topology_mod.o
serial/psb_s_mat_mod.o: serial/psb_s_base_mat_mod.o serial/psb_s_csr_mat_mod.o serial/psb_s_csc_mat_mod.o serial/psb_s_vect_mod.o \
serial/psb_i_vect_mod.o serial/psb_l_vect_mod.o
serial/psb_d_mat_mod.o: serial/psb_d_base_mat_mod.o serial/psb_d_csr_mat_mod.o serial/psb_d_csc_mat_mod.o serial/psb_d_vect_mod.o \

@ -37,81 +37,41 @@ module psi_c_comm_v_mod
interface psi_swapdata
module subroutine psi_cswapdata_vect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_cswapdata_vect
module subroutine psi_cswapdata_multivect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_multivect_type) :: y
complex(psb_spk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
class(psb_c_base_multivect_type), intent(inout) :: y
complex(psb_spk_), intent(in) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_cswapdata_multivect
module subroutine psi_cswap_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_cswap_vidx_vect
module subroutine psi_cswap_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_multivect_type) :: y
complex(psb_spk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_cswap_vidx_multivect
end interface psi_swapdata
interface psi_swaptran
module subroutine psi_cswaptran_vect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_cswaptran_vect
module subroutine psi_cswaptran_multivect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_multivect_type) :: y
complex(psb_spk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_spk_), intent(in) :: beta
class(psb_c_base_multivect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_cswaptran_multivect
module subroutine psi_ctran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_ctran_vidx_vect
module subroutine psi_ctran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_multivect_type) :: y
complex(psb_spk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_ctran_vidx_multivect
end interface psi_swaptran
interface psi_ovrl_upd

@ -61,7 +61,9 @@ module psi_d_comm_v_mod
interface psi_swaptran
! ---------------------------------------------------------------
! Upper call in order to populate idx using desc_a%get_list_p
! Upper call in order to populate idx using desc_a%get_list_p
! and then call different communications schemes depending
! on flag variable
! ---------------------------------------------------------------
module subroutine psi_dswaptran_vect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
@ -79,33 +81,6 @@ module psi_d_comm_v_mod
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_dswaptran_multivect
! ---------------------------------------------------------------
! ---------------------------------------------------------------
! Wrapper that calls different communications schemes depending on
! flag variable
! ---------------------------------------------------------------
module subroutine psi_dtran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type), intent(inout) :: y
real(psb_dpk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_dtran_vidx_vect
module subroutine psi_dtran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_multivect_type), intent(inout) :: y
real(psb_dpk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_dtran_vidx_multivect
! ---------------------------------------------------------------
end interface psi_swaptran

@ -38,81 +38,41 @@ module psi_i_comm_v_mod
interface psi_swapdata
module subroutine psi_iswapdata_vect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_iswapdata_vect
module subroutine psi_iswapdata_multivect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_multivect_type) :: y
integer(psb_ipk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_multivect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_iswapdata_multivect
module subroutine psi_iswap_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_iswap_vidx_vect
module subroutine psi_iswap_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_multivect_type) :: y
integer(psb_ipk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_iswap_vidx_multivect
end interface psi_swapdata
interface psi_swaptran
module subroutine psi_iswaptran_vect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_iswaptran_vect
module subroutine psi_iswaptran_multivect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_multivect_type) :: y
integer(psb_ipk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(in) :: beta
class(psb_i_base_multivect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_iswaptran_multivect
module subroutine psi_itran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_itran_vidx_vect
module subroutine psi_itran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_multivect_type) :: y
integer(psb_ipk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_itran_vidx_multivect
end interface psi_swaptran
interface psi_ovrl_upd

@ -39,81 +39,41 @@ module psi_l_comm_v_mod
interface psi_swapdata
module subroutine psi_lswapdata_vect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_vect_type) :: y
integer(psb_lpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_lswapdata_vect
module subroutine psi_lswapdata_multivect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_multivect_type) :: y
integer(psb_lpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_multivect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_lswapdata_multivect
module subroutine psi_lswap_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_vect_type) :: y
integer(psb_lpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_lswap_vidx_vect
module subroutine psi_lswap_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_multivect_type) :: y
integer(psb_lpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_lswap_vidx_multivect
end interface psi_swapdata
interface psi_swaptran
module subroutine psi_lswaptran_vect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_vect_type) :: y
integer(psb_lpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_lswaptran_vect
module subroutine psi_lswaptran_multivect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_multivect_type) :: y
integer(psb_lpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
integer(psb_lpk_), intent(in) :: beta
class(psb_l_base_multivect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_lswaptran_multivect
module subroutine psi_ltran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_vect_type) :: y
integer(psb_lpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_ltran_vidx_vect
module subroutine psi_ltran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_l_base_multivect_type) :: y
integer(psb_lpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_ltran_vidx_multivect
end interface psi_swaptran
interface psi_ovrl_upd

@ -73,26 +73,6 @@ module psi_s_comm_v_mod
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_sswaptran_multivect
module subroutine psi_stran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
real(psb_spk_), intent(in) :: beta
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
integer(psb_ipk_), intent(out) :: info
end subroutine psi_stran_vidx_vect
module subroutine psi_stran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
real(psb_spk_), intent(in) :: beta
class(psb_s_base_multivect_type), intent(inout) :: y
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
integer(psb_ipk_), intent(out) :: info
end subroutine psi_stran_vidx_multivect
end interface psi_swaptran
interface psi_ovrl_upd

@ -37,81 +37,41 @@ module psi_z_comm_v_mod
interface psi_swapdata
module subroutine psi_zswapdata_vect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_zswapdata_vect
module subroutine psi_zswapdata_multivect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_multivect_type) :: y
complex(psb_dpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_multivect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_zswapdata_multivect
module subroutine psi_zswap_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_zswap_vidx_vect
module subroutine psi_zswap_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_multivect_type) :: y
complex(psb_dpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_zswap_vidx_multivect
end interface psi_swapdata
interface psi_swaptran
module subroutine psi_zswaptran_vect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_vect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_zswaptran_vect
module subroutine psi_zswaptran_multivect(flag,beta,y,desc_a,info,data)
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_multivect_type) :: y
complex(psb_dpk_) :: beta
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
integer(psb_ipk_), intent(in) :: flag
complex(psb_dpk_), intent(in) :: beta
class(psb_z_base_multivect_type), intent(inout) :: y
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional :: data
end subroutine psi_zswaptran_multivect
module subroutine psi_ztran_vidx_vect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_ztran_vidx_vect
module subroutine psi_ztran_vidx_multivect(ctxt,flag,beta,y,idx,&
& totxch,totsnd,totrcv,info)
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_multivect_type) :: y
complex(psb_dpk_) :: beta
class(psb_i_base_vect_type), intent(inout) :: idx
integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv
end subroutine psi_ztran_vidx_multivect
end interface psi_swaptran
interface psi_ovrl_upd

@ -49,6 +49,7 @@ module psb_c_base_vect_mod
use psb_realloc_mod
use psb_i_base_vect_mod
use psb_l_base_vect_mod
use psb_neighbor_topology_mod
!> \namespace psb_base_mod \class psb_c_base_vect_type
!! The psb_c_base_vect_type
@ -65,6 +66,7 @@ module psb_c_base_vect_mod
complex(psb_spk_), allocatable :: v(:)
complex(psb_spk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:)
integer(psb_mpk_) :: communication_handle ! This is used only for Isend/Irecv scheme, to store the communication handle for the whole halo exchange
!> vector bldstate:
!! null: pristine;
!! build: it's being filled with entries;
@ -77,6 +79,8 @@ module psb_c_base_vect_mod
integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:)
type(psb_neighbor_topology_type) :: neighbor_topology
contains
!
! Constructors/allocators
@ -241,6 +245,10 @@ module psb_c_base_vect_mod
procedure, pass(z) :: addconst_v2 => c_base_addconst_v2
generic, public :: addconst => addconst_a2,addconst_v2
! Methods used to handle topology in neighbor_alltoallv communication scheme
procedure, pass(x) :: init_topology => c_base_init_topology
procedure, pass(x) :: free_topology => c_base_free_topology
end type psb_c_base_vect_type
@ -2430,6 +2438,35 @@ contains
if (x%is_dev()) call x%sync()
call z%addconst(x%v,b,info)
end subroutine c_base_addconst_v2
! --------------------------------------------------------------------
! Implementation of methods used for neighbor alltoallv communication
! --------------------------------------------------------------------
subroutine c_base_init_topology(x, halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
implicit none
class(psb_c_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_exchanges, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%init(halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
end subroutine c_base_init_topology
subroutine c_base_free_topology(x, info)
implicit none
class(psb_c_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%free(info)
end subroutine c_base_free_topology
! --------------------------------------------------------------------
end module psb_c_base_vect_mod
@ -2439,6 +2476,7 @@ module psb_c_base_multivect_mod
use psb_error_mod
use psb_realloc_mod
use psb_c_base_vect_mod
use psb_neighbor_topology_mod
!> \namespace psb_base_mod \class psb_c_base_vect_type
!! The psb_c_base_vect_type
@ -2458,6 +2496,7 @@ module psb_c_base_multivect_mod
complex(psb_spk_), allocatable :: v(:,:)
complex(psb_spk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:)
integer(psb_mpk_) :: communication_handle ! This is used only for Isend/Irecv scheme, to store the communication handle for the whole halo exchange
!> vector bldstate:
!! null: pristine;
!! build: it's being filled with entries;
@ -2470,6 +2509,8 @@ module psb_c_base_multivect_mod
integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:)
type(psb_neighbor_topology_type) :: neighbor_topology
contains
!
! Constructors/allocators
@ -2595,6 +2636,10 @@ module psb_c_base_multivect_mod
procedure, pass(y) :: sctb_x => c_base_mlv_sctb_x
procedure, pass(y) :: sctb_buf => c_base_mlv_sctb_buf
generic, public :: sct => sctb, sctbr2, sctb_x, sctb_buf
! Neighbor alltoallv communication topology handling
procedure, pass(x) :: init_topology => c_base_mlv_init_topology
procedure, pass(x) :: free_topology => c_base_mlv_free_topology
end type psb_c_base_multivect_type
interface psb_c_base_multivect
@ -4118,4 +4163,33 @@ contains
end subroutine c_base_mlv_device_wait
! --------------------------------------------------------------------
! Implementation of methods used for neighbor alltoallv communication
! --------------------------------------------------------------------
subroutine c_base_mlv_init_topology(x, halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
implicit none
class(psb_c_base_multivect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_exchanges, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%init(halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
end subroutine c_base_mlv_init_topology
subroutine c_base_mlv_free_topology(x, info)
implicit none
class(psb_c_base_multivect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%free(info)
end subroutine c_base_mlv_free_topology
! --------------------------------------------------------------------
end module psb_c_base_multivect_mod

@ -47,6 +47,7 @@ module psb_i_base_vect_mod
use psb_const_mod
use psb_error_mod
use psb_realloc_mod
use psb_neighbor_topology_mod
!> \namespace psb_base_mod \class psb_i_base_vect_type
!! The psb_i_base_vect_type
@ -62,7 +63,9 @@ module psb_i_base_vect_mod
!> Values.
integer(psb_ipk_), allocatable :: v(:)
integer(psb_ipk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:)
integer(psb_mpk_), allocatable :: comid(:,:) ! This is used only for Isend/Irecv scheme, to store the communication handles for each neighbor
integer(psb_mpk_) :: communication_handle ! This is used only for Isend/Irecv scheme, to store the communication handle for the whole halo exchange
!> vector bldstate:
!! null: pristine;
!! build: it's being filled with entries;
@ -75,6 +78,9 @@ module psb_i_base_vect_mod
integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:)
type(psb_neighbor_topology_type) :: neighbor_topology
contains
!
! Constructors/allocators
@ -170,9 +176,9 @@ module psb_i_base_vect_mod
procedure, pass(x) :: check_addr => i_base_check_addr
! Methods used to handle topology in neighbor_alltoallv communication scheme
procedure, pass(x) :: init_topology => i_base_init_topology
procedure, pass(x) :: free_topology => i_base_free_topology
end type psb_i_base_vect_type
@ -1387,6 +1393,33 @@ contains
end subroutine i_base_sctb_buf
! --------------------------------------------------------------------
! Implementation of methods used for neighbor alltoallv communication
! --------------------------------------------------------------------
subroutine i_base_init_topology(x, halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
implicit none
class(psb_i_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_exchanges, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%init(halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
end subroutine i_base_init_topology
subroutine i_base_free_topology(x, info)
implicit none
class(psb_i_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%free(info)
end subroutine i_base_free_topology
! --------------------------------------------------------------------
end module psb_i_base_vect_mod
@ -1397,6 +1430,7 @@ module psb_i_base_multivect_mod
use psb_error_mod
use psb_realloc_mod
use psb_i_base_vect_mod
use psb_neighbor_topology_mod
!> \namespace psb_base_mod \class psb_i_base_vect_type
!! The psb_i_base_vect_type
@ -1415,7 +1449,9 @@ module psb_i_base_multivect_mod
!> Values.
integer(psb_ipk_), allocatable :: v(:,:)
integer(psb_ipk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:)
integer(psb_mpk_), allocatable :: comid(:,:) ! This is used only for Isend/Irecv scheme, to store the communication handles for each neighbor
integer(psb_mpk_) :: communication_handle ! This is used only for Isend/Irecv scheme, to store the communication handle for the whole halo exchange
!> vector bldstate:
!! null: pristine;
!! build: it's being filled with entries;
@ -1428,6 +1464,9 @@ module psb_i_base_multivect_mod
integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:)
type(psb_neighbor_topology_type) :: neighbor_topology
contains
!
! Constructors/allocators
@ -1520,6 +1559,11 @@ module psb_i_base_multivect_mod
procedure, pass(y) :: sctb_x => i_base_mlv_sctb_x
procedure, pass(y) :: sctb_buf => i_base_mlv_sctb_buf
generic, public :: sct => sctb, sctbr2, sctb_x, sctb_buf
! Neighbor alltoallv communication topology handling
procedure, pass(x) :: init_topology => i_base_mlv_init_topology
procedure, pass(x) :: free_topology => i_base_mlv_free_topology
end type psb_i_base_multivect_type
interface psb_i_base_multivect
@ -2561,4 +2605,33 @@ contains
end subroutine i_base_mlv_device_wait
! --------------------------------------------------------------------
! Implementation of methods used for neighbor alltoallv communication
! --------------------------------------------------------------------
subroutine i_base_mlv_init_topology(x, halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
implicit none
class(psb_i_base_multivect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_exchanges, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%init(halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
end subroutine i_base_mlv_init_topology
subroutine i_base_mlv_free_topology(x, info)
implicit none
class(psb_i_base_multivect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%free(info)
end subroutine i_base_mlv_free_topology
! --------------------------------------------------------------------
end module psb_i_base_multivect_mod

@ -48,6 +48,7 @@ module psb_l_base_vect_mod
use psb_error_mod
use psb_realloc_mod
use psb_i_base_vect_mod
use psb_neighbor_topology_mod
!> \namespace psb_base_mod \class psb_l_base_vect_type
!! The psb_l_base_vect_type
@ -63,7 +64,9 @@ module psb_l_base_vect_mod
!> Values.
integer(psb_lpk_), allocatable :: v(:)
integer(psb_lpk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:)
integer(psb_mpk_), allocatable :: comid(:,:) ! This is used only for Isend/Irecv scheme, to store the communication handles for each neighbor
integer(psb_mpk_) :: communication_handle ! This is used only for Isend/Irecv scheme, to store the communication handle for the whole halo exchange
!> vector bldstate:
!! null: pristine;
!! build: it's being filled with entries;
@ -76,6 +79,10 @@ module psb_l_base_vect_mod
integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:)
type(psb_neighbor_topology_type) :: neighbor_topology
contains
!
! Constructors/allocators
@ -172,9 +179,9 @@ module psb_l_base_vect_mod
procedure, pass(x) :: check_addr => l_base_check_addr
! Methods used to handle topology in neighbor_alltoallv communication scheme
procedure, pass(x) :: init_topology => l_base_init_topology
procedure, pass(x) :: free_topology => l_base_free_topology
end type psb_l_base_vect_type
@ -1388,6 +1395,34 @@ contains
end subroutine l_base_sctb_buf
! --------------------------------------------------------------------
! Implementation of methods used for neighbor alltoallv communication
! --------------------------------------------------------------------
subroutine l_base_init_topology(x, halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
implicit none
class(psb_l_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_exchanges, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%init(halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
end subroutine l_base_init_topology
subroutine l_base_free_topology(x, info)
implicit none
class(psb_l_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%free(info)
end subroutine l_base_free_topology
! --------------------------------------------------------------------
end module psb_l_base_vect_mod
@ -1398,6 +1433,7 @@ module psb_l_base_multivect_mod
use psb_error_mod
use psb_realloc_mod
use psb_l_base_vect_mod
use psb_neighbor_topology_mod
!> \namespace psb_base_mod \class psb_l_base_vect_type
!! The psb_l_base_vect_type
@ -1416,7 +1452,9 @@ module psb_l_base_multivect_mod
!> Values.
integer(psb_lpk_), allocatable :: v(:,:)
integer(psb_lpk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:)
integer(psb_mpk_), allocatable :: comid(:,:) ! This is used only for Isend/Irecv scheme, to store the communication handles for each neighbor
integer(psb_mpk_) :: communication_handle ! This is used only for Isend/Irecv scheme, to store the communication handle for the whole halo exchange
!> vector bldstate:
!! null: pristine;
!! build: it's being filled with entries;
@ -1429,6 +1467,9 @@ module psb_l_base_multivect_mod
integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:)
type(psb_neighbor_topology_type) :: neighbor_topology
contains
!
! Constructors/allocators
@ -1521,6 +1562,12 @@ module psb_l_base_multivect_mod
procedure, pass(y) :: sctb_x => l_base_mlv_sctb_x
procedure, pass(y) :: sctb_buf => l_base_mlv_sctb_buf
generic, public :: sct => sctb, sctbr2, sctb_x, sctb_buf
! Neighbor alltoallv communication topology handling
procedure, pass(x) :: init_topology => l_base_mlv_init_topology
procedure, pass(x) :: free_topology => l_base_mlv_free_topology
end type psb_l_base_multivect_type
interface psb_l_base_multivect
@ -2562,4 +2609,37 @@ contains
end subroutine l_base_mlv_device_wait
! --------------------------------------------------------------------
! Implementation of methods used for neighbor alltoallv communication
! --------------------------------------------------------------------
subroutine l_base_mlv_init_topology(x, halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
implicit none
class(psb_l_base_multivect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_exchanges, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%init(halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
end subroutine l_base_mlv_init_topology
subroutine l_base_mlv_free_topology(x, info)
implicit none
class(psb_l_base_multivect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%free(info)
end subroutine l_base_mlv_free_topology
! --------------------------------------------------------------------
end module psb_l_base_multivect_mod

@ -49,6 +49,7 @@ module psb_z_base_vect_mod
use psb_realloc_mod
use psb_i_base_vect_mod
use psb_l_base_vect_mod
use psb_neighbor_topology_mod
!> \namespace psb_base_mod \class psb_z_base_vect_type
!! The psb_z_base_vect_type
@ -65,6 +66,7 @@ module psb_z_base_vect_mod
complex(psb_dpk_), allocatable :: v(:)
complex(psb_dpk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:)
integer(psb_mpk_) :: communication_handle ! This is used only for Isend/Irecv scheme, to store the communication handle for the whole halo exchange
!> vector bldstate:
!! null: pristine;
!! build: it's being filled with entries;
@ -77,6 +79,8 @@ module psb_z_base_vect_mod
integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:)
type(psb_neighbor_topology_type) :: neighbor_topology
contains
!
! Constructors/allocators
@ -241,6 +245,10 @@ module psb_z_base_vect_mod
procedure, pass(z) :: addconst_v2 => z_base_addconst_v2
generic, public :: addconst => addconst_a2,addconst_v2
! Methods used to handle topology in neighbor_alltoallv communication scheme
procedure, pass(x) :: init_topology => z_base_init_topology
procedure, pass(x) :: free_topology => z_base_free_topology
end type psb_z_base_vect_type
@ -2430,6 +2438,35 @@ contains
if (x%is_dev()) call x%sync()
call z%addconst(x%v,b,info)
end subroutine z_base_addconst_v2
! --------------------------------------------------------------------
! Implementation of methods used for neighbor alltoallv communication
! --------------------------------------------------------------------
subroutine z_base_init_topology(x, halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
implicit none
class(psb_z_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_exchanges, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%init(halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
end subroutine z_base_init_topology
subroutine z_base_free_topology(x, info)
implicit none
class(psb_z_base_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%free(info)
end subroutine z_base_free_topology
! --------------------------------------------------------------------
end module psb_z_base_vect_mod
@ -2439,6 +2476,7 @@ module psb_z_base_multivect_mod
use psb_error_mod
use psb_realloc_mod
use psb_z_base_vect_mod
use psb_neighbor_topology_mod
!> \namespace psb_base_mod \class psb_z_base_vect_type
!! The psb_z_base_vect_type
@ -2458,6 +2496,7 @@ module psb_z_base_multivect_mod
complex(psb_dpk_), allocatable :: v(:,:)
complex(psb_dpk_), allocatable :: combuf(:)
integer(psb_mpk_), allocatable :: comid(:,:)
integer(psb_mpk_) :: communication_handle ! This is used only for Isend/Irecv scheme, to store the communication handle for the whole halo exchange
!> vector bldstate:
!! null: pristine;
!! build: it's being filled with entries;
@ -2470,6 +2509,8 @@ module psb_z_base_multivect_mod
integer(psb_ipk_), private :: dupl = psb_dupl_null_
integer(psb_ipk_), private :: ncfs = 0
integer(psb_ipk_), allocatable :: iv(:)
type(psb_neighbor_topology_type) :: neighbor_topology
contains
!
! Constructors/allocators
@ -2595,6 +2636,10 @@ module psb_z_base_multivect_mod
procedure, pass(y) :: sctb_x => z_base_mlv_sctb_x
procedure, pass(y) :: sctb_buf => z_base_mlv_sctb_buf
generic, public :: sct => sctb, sctbr2, sctb_x, sctb_buf
! Neighbor alltoallv communication topology handling
procedure, pass(x) :: init_topology => z_base_mlv_init_topology
procedure, pass(x) :: free_topology => z_base_mlv_free_topology
end type psb_z_base_multivect_type
interface psb_z_base_multivect
@ -4118,4 +4163,33 @@ contains
end subroutine z_base_mlv_device_wait
! --------------------------------------------------------------------
! Implementation of methods used for neighbor alltoallv communication
! --------------------------------------------------------------------
subroutine z_base_mlv_init_topology(x, halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
implicit none
class(psb_z_base_multivect_type), intent(inout) :: x
integer(psb_ipk_), intent(in) :: halo_index(:)
integer(psb_ipk_), intent(in) :: num_exchanges, total_send_elems, total_recv_elems
type(psb_ctxt_type), intent(in) :: ctxt
integer(psb_mpk_), intent(in) :: icomm
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%init(halo_index, num_exchanges, &
& total_send_elems, total_recv_elems, ctxt, icomm, info)
end subroutine z_base_mlv_init_topology
subroutine z_base_mlv_free_topology(x, info)
implicit none
class(psb_z_base_multivect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
call x%neighbor_topology%free(info)
end subroutine z_base_mlv_free_topology
! --------------------------------------------------------------------
end module psb_z_base_multivect_mod

1819
log.txt

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

@ -413,3 +413,4 @@ Comparison between serial/sol_x4_y1.mtx and parallel/sol_x4_y1.mtx: 0 difference
Comparison between serial/sol_x4_y2.mtx and parallel/sol_x4_y2.mtx: 0 differences
Comparison between serial/sol_x4_y3.mtx and parallel/sol_x4_y3.mtx: 0 differences
Comparison between serial/sol_x4_y4.mtx and parallel/sol_x4_y4.mtx: 0 differences

@ -9536,3 +9536,715 @@ Comparison between serial/sol_x4_y2.mtx and parallel/sol_x4_y2.mtx: 0 difference
Comparison between serial/sol_x4_y3.mtx and parallel/sol_x4_y3.mtx: 0 differences
Comparison between serial/sol_x4_y4.mtx and parallel/sol_x4_y4.mtx: 0 differences
Welcome to PSBLAS version: 3.9.1
This is the psb_geaxpby_test sample program
Number of processes used in this computation: 1
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y1_a1_b1.mtx 1/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y1_a1_b2.mtx 2/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y1_a1_b3.mtx 3/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y1_a2_b1.mtx 4/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y1_a2_b2.mtx 5/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y1_a2_b3.mtx 6/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y1_a3_b1.mtx 7/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y1_a3_b2.mtx 8/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y1_a3_b3.mtx 9/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y2_a1_b1.mtx 10/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y2_a1_b2.mtx 11/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y2_a1_b3.mtx 12/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y2_a2_b1.mtx 13/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y2_a2_b2.mtx 14/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y2_a2_b3.mtx 15/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y2_a3_b1.mtx 16/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y2_a3_b2.mtx 17/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y2_a3_b3.mtx 18/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y3_a1_b1.mtx 19/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y3_a1_b2.mtx 20/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y3_a1_b3.mtx 21/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y3_a2_b1.mtx 22/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y3_a2_b2.mtx 23/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y3_a2_b3.mtx 24/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y3_a3_b1.mtx 25/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y3_a3_b2.mtx 26/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y3_a3_b3.mtx 27/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y4_a1_b1.mtx 28/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y4_a1_b2.mtx 29/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y4_a1_b3.mtx 30/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y4_a2_b1.mtx 31/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y4_a2_b2.mtx 32/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y4_a2_b3.mtx 33/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y4_a3_b1.mtx 34/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y4_a3_b2.mtx 35/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x1_y4_a3_b3.mtx 36/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x2_y1_a1_b1.mtx 37/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x2_y1_a1_b2.mtx 38/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x2_y1_a1_b3.mtx 39/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x2_y1_a2_b1.mtx 40/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x2_y1_a2_b2.mtx 41/144 [OK]
[2026-03-23 10:37:01] Generation geaxpby single precision result file serial/sol_x2_y1_a2_b3.mtx 42/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y1_a3_b1.mtx 43/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y1_a3_b2.mtx 44/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y1_a3_b3.mtx 45/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y2_a1_b1.mtx 46/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y2_a1_b2.mtx 47/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y2_a1_b3.mtx 48/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y2_a2_b1.mtx 49/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y2_a2_b2.mtx 50/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y2_a2_b3.mtx 51/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y2_a3_b1.mtx 52/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y2_a3_b2.mtx 53/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y2_a3_b3.mtx 54/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y3_a1_b1.mtx 55/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y3_a1_b2.mtx 56/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y3_a1_b3.mtx 57/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y3_a2_b1.mtx 58/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y3_a2_b2.mtx 59/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y3_a2_b3.mtx 60/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y3_a3_b1.mtx 61/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y3_a3_b2.mtx 62/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y3_a3_b3.mtx 63/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y4_a1_b1.mtx 64/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y4_a1_b2.mtx 65/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y4_a1_b3.mtx 66/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y4_a2_b1.mtx 67/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y4_a2_b2.mtx 68/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y4_a2_b3.mtx 69/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y4_a3_b1.mtx 70/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y4_a3_b2.mtx 71/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x2_y4_a3_b3.mtx 72/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y1_a1_b1.mtx 73/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y1_a1_b2.mtx 74/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y1_a1_b3.mtx 75/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y1_a2_b1.mtx 76/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y1_a2_b2.mtx 77/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y1_a2_b3.mtx 78/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y1_a3_b1.mtx 79/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y1_a3_b2.mtx 80/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y1_a3_b3.mtx 81/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y2_a1_b1.mtx 82/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y2_a1_b2.mtx 83/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y2_a1_b3.mtx 84/144 [OK]
[2026-03-23 10:37:02] Generation geaxpby single precision result file serial/sol_x3_y2_a2_b1.mtx 85/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y2_a2_b2.mtx 86/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y2_a2_b3.mtx 87/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y2_a3_b1.mtx 88/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y2_a3_b2.mtx 89/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y2_a3_b3.mtx 90/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y3_a1_b1.mtx 91/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y3_a1_b2.mtx 92/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y3_a1_b3.mtx 93/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y3_a2_b1.mtx 94/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y3_a2_b2.mtx 95/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y3_a2_b3.mtx 96/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y3_a3_b1.mtx 97/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y3_a3_b2.mtx 98/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y3_a3_b3.mtx 99/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y4_a1_b1.mtx 100/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y4_a1_b2.mtx 101/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y4_a1_b3.mtx 102/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y4_a2_b1.mtx 103/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y4_a2_b2.mtx 104/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y4_a2_b3.mtx 105/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y4_a3_b1.mtx 106/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y4_a3_b2.mtx 107/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x3_y4_a3_b3.mtx 108/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y1_a1_b1.mtx 109/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y1_a1_b2.mtx 110/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y1_a1_b3.mtx 111/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y1_a2_b1.mtx 112/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y1_a2_b2.mtx 113/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y1_a2_b3.mtx 114/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y1_a3_b1.mtx 115/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y1_a3_b2.mtx 116/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y1_a3_b3.mtx 117/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y2_a1_b1.mtx 118/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y2_a1_b2.mtx 119/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y2_a1_b3.mtx 120/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y2_a2_b1.mtx 121/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y2_a2_b2.mtx 122/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y2_a2_b3.mtx 123/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y2_a3_b1.mtx 124/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y2_a3_b2.mtx 125/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y2_a3_b3.mtx 126/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y3_a1_b1.mtx 127/144 [OK]
[2026-03-23 10:37:03] Generation geaxpby single precision result file serial/sol_x4_y3_a1_b2.mtx 128/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y3_a1_b3.mtx 129/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y3_a2_b1.mtx 130/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y3_a2_b2.mtx 131/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y3_a2_b3.mtx 132/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y3_a3_b1.mtx 133/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y3_a3_b2.mtx 134/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y3_a3_b3.mtx 135/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y4_a1_b1.mtx 136/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y4_a1_b2.mtx 137/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y4_a1_b3.mtx 138/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y4_a2_b1.mtx 139/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y4_a2_b2.mtx 140/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y4_a2_b3.mtx 141/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y4_a3_b1.mtx 142/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y4_a3_b2.mtx 143/144 [OK]
[2026-03-23 10:37:04] Generation geaxpby single precision result file serial/sol_x4_y4_a3_b3.mtx 144/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y1_a1_b1.mtx 1/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y1_a1_b2.mtx 2/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y1_a1_b3.mtx 3/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y1_a2_b1.mtx 4/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y1_a2_b2.mtx 5/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y1_a2_b3.mtx 6/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y1_a3_b1.mtx 7/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y1_a3_b2.mtx 8/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y1_a3_b3.mtx 9/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y2_a1_b1.mtx 10/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y2_a1_b2.mtx 11/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y2_a1_b3.mtx 12/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y2_a2_b1.mtx 13/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y2_a2_b2.mtx 14/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y2_a2_b3.mtx 15/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y2_a3_b1.mtx 16/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y2_a3_b2.mtx 17/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y2_a3_b3.mtx 18/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y3_a1_b1.mtx 19/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y3_a1_b2.mtx 20/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y3_a1_b3.mtx 21/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y3_a2_b1.mtx 22/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y3_a2_b2.mtx 23/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y3_a2_b3.mtx 24/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y3_a3_b1.mtx 25/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y3_a3_b2.mtx 26/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y3_a3_b3.mtx 27/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y4_a1_b1.mtx 28/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y4_a1_b2.mtx 29/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y4_a1_b3.mtx 30/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y4_a2_b1.mtx 31/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y4_a2_b2.mtx 32/144 [OK]
[2026-03-23 10:37:04] Double precision check on file serial/sol_x1_y4_a2_b3.mtx 33/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x1_y4_a3_b1.mtx 34/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x1_y4_a3_b2.mtx 35/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x1_y4_a3_b3.mtx 36/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y1_a1_b1.mtx 37/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y1_a1_b2.mtx 38/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y1_a1_b3.mtx 39/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y1_a2_b1.mtx 40/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y1_a2_b2.mtx 41/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y1_a2_b3.mtx 42/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y1_a3_b1.mtx 43/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y1_a3_b2.mtx 44/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y1_a3_b3.mtx 45/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y2_a1_b1.mtx 46/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y2_a1_b2.mtx 47/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y2_a1_b3.mtx 48/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y2_a2_b1.mtx 49/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y2_a2_b2.mtx 50/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y2_a2_b3.mtx 51/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y2_a3_b1.mtx 52/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y2_a3_b2.mtx 53/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y2_a3_b3.mtx 54/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y3_a1_b1.mtx 55/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y3_a1_b2.mtx 56/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y3_a1_b3.mtx 57/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y3_a2_b1.mtx 58/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y3_a2_b2.mtx 59/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y3_a2_b3.mtx 60/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y3_a3_b1.mtx 61/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y3_a3_b2.mtx 62/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y3_a3_b3.mtx 63/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y4_a1_b1.mtx 64/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y4_a1_b2.mtx 65/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y4_a1_b3.mtx 66/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y4_a2_b1.mtx 67/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y4_a2_b2.mtx 68/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y4_a2_b3.mtx 69/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y4_a3_b1.mtx 70/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y4_a3_b2.mtx 71/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x2_y4_a3_b3.mtx 72/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x3_y1_a1_b1.mtx 73/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x3_y1_a1_b2.mtx 74/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x3_y1_a1_b3.mtx 75/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x3_y1_a2_b1.mtx 76/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x3_y1_a2_b2.mtx 77/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x3_y1_a2_b3.mtx 78/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x3_y1_a3_b1.mtx 79/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x3_y1_a3_b2.mtx 80/144 [OK]
[2026-03-23 10:37:05] Double precision check on file serial/sol_x3_y1_a3_b3.mtx 81/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y2_a1_b1.mtx 82/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y2_a1_b2.mtx 83/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y2_a1_b3.mtx 84/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y2_a2_b1.mtx 85/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y2_a2_b2.mtx 86/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y2_a2_b3.mtx 87/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y2_a3_b1.mtx 88/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y2_a3_b2.mtx 89/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y2_a3_b3.mtx 90/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y3_a1_b1.mtx 91/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y3_a1_b2.mtx 92/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y3_a1_b3.mtx 93/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y3_a2_b1.mtx 94/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y3_a2_b2.mtx 95/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y3_a2_b3.mtx 96/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y3_a3_b1.mtx 97/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y3_a3_b2.mtx 98/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y3_a3_b3.mtx 99/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y4_a1_b1.mtx 100/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y4_a1_b2.mtx 101/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y4_a1_b3.mtx 102/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y4_a2_b1.mtx 103/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y4_a2_b2.mtx 104/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y4_a2_b3.mtx 105/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y4_a3_b1.mtx 106/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y4_a3_b2.mtx 107/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x3_y4_a3_b3.mtx 108/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y1_a1_b1.mtx 109/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y1_a1_b2.mtx 110/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y1_a1_b3.mtx 111/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y1_a2_b1.mtx 112/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y1_a2_b2.mtx 113/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y1_a2_b3.mtx 114/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y1_a3_b1.mtx 115/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y1_a3_b2.mtx 116/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y1_a3_b3.mtx 117/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y2_a1_b1.mtx 118/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y2_a1_b2.mtx 119/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y2_a1_b3.mtx 120/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y2_a2_b1.mtx 121/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y2_a2_b2.mtx 122/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y2_a2_b3.mtx 123/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y2_a3_b1.mtx 124/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y2_a3_b2.mtx 125/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y2_a3_b3.mtx 126/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y3_a1_b1.mtx 127/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y3_a1_b2.mtx 128/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y3_a1_b3.mtx 129/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y3_a2_b1.mtx 130/144 [OK]
[2026-03-23 10:37:06] Double precision check on file serial/sol_x4_y3_a2_b2.mtx 131/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y3_a2_b3.mtx 132/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y3_a3_b1.mtx 133/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y3_a3_b2.mtx 134/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y3_a3_b3.mtx 135/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y4_a1_b1.mtx 136/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y4_a1_b2.mtx 137/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y4_a1_b3.mtx 138/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y4_a2_b1.mtx 139/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y4_a2_b2.mtx 140/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y4_a2_b3.mtx 141/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y4_a3_b1.mtx 142/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y4_a3_b2.mtx 143/144 [OK]
[2026-03-23 10:37:07] Double precision check on file serial/sol_x4_y4_a3_b3.mtx 144/144 [OK]
Welcome to PSBLAS version: 3.9.0
This is the psb_gedot_test sample program
Number of processes used in this computation: 1
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:27:44] Generation gedot single precision result file serial/sol_x4_y4.mtx 16/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:27:44] Double precision check on file serial/sol_x4_y4.mtx 16/144 [OK]
Welcome to PSBLAS version: 3.9.0
This is the psb_gedot_test sample program
Number of processes used in this computation: 40
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:28:19] Generation gedot single precision result file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:28:20] Generation gedot single precision result file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:28:20] Generation gedot single precision result file parallel/sol_x4_y4.mtx 16/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:28:20] Double precision check on file parallel/sol_x4_y4.mtx 16/144 [OK]
Comparison between serial/sol_x1_y1.mtx and parallel/sol_x1_y1.mtx: 4 differences
Comparison between serial/sol_x1_y2.mtx and parallel/sol_x1_y2.mtx: 4 differences
Comparison between serial/sol_x1_y3.mtx and parallel/sol_x1_y3.mtx: 4 differences
Comparison between serial/sol_x1_y4.mtx and parallel/sol_x1_y4.mtx: 0 differences
Comparison between serial/sol_x2_y1.mtx and parallel/sol_x2_y1.mtx: 4 differences
Comparison between serial/sol_x2_y2.mtx and parallel/sol_x2_y2.mtx: 4 differences
Comparison between serial/sol_x2_y3.mtx and parallel/sol_x2_y3.mtx: 4 differences
Comparison between serial/sol_x2_y4.mtx and parallel/sol_x2_y4.mtx: 0 differences
Comparison between serial/sol_x3_y1.mtx and parallel/sol_x3_y1.mtx: 4 differences
Comparison between serial/sol_x3_y2.mtx and parallel/sol_x3_y2.mtx: 4 differences
Comparison between serial/sol_x3_y3.mtx and parallel/sol_x3_y3.mtx: 4 differences
Comparison between serial/sol_x3_y4.mtx and parallel/sol_x3_y4.mtx: 0 differences
Comparison between serial/sol_x4_y1.mtx and parallel/sol_x4_y1.mtx: 0 differences
Comparison between serial/sol_x4_y2.mtx and parallel/sol_x4_y2.mtx: 0 differences
Comparison between serial/sol_x4_y3.mtx and parallel/sol_x4_y3.mtx: 0 differences
Comparison between serial/sol_x4_y4.mtx and parallel/sol_x4_y4.mtx: 0 differences
Welcome to PSBLAS version: 3.9.0
This is the psb_gedot_test sample program
Number of processes used in this computation: 40
[2025-06-12 13:32:44] Generation gedot single precision result file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:32:44] Generation gedot single precision result file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:32:45] Generation gedot single precision result file parallel/sol_x4_y4.mtx 16/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:32:45] Double precision check on file parallel/sol_x4_y4.mtx 16/144 [OK]
Comparison between serial/sol_x1_y1.mtx and parallel/sol_x1_y1.mtx: 4 differences
Comparison between serial/sol_x1_y2.mtx and parallel/sol_x1_y2.mtx: 4 differences
Comparison between serial/sol_x1_y3.mtx and parallel/sol_x1_y3.mtx: 4 differences
Comparison between serial/sol_x1_y4.mtx and parallel/sol_x1_y4.mtx: 0 differences
Comparison between serial/sol_x2_y1.mtx and parallel/sol_x2_y1.mtx: 4 differences
Comparison between serial/sol_x2_y2.mtx and parallel/sol_x2_y2.mtx: 4 differences
Comparison between serial/sol_x2_y3.mtx and parallel/sol_x2_y3.mtx: 4 differences
Comparison between serial/sol_x2_y4.mtx and parallel/sol_x2_y4.mtx: 0 differences
Comparison between serial/sol_x3_y1.mtx and parallel/sol_x3_y1.mtx: 4 differences
Comparison between serial/sol_x3_y2.mtx and parallel/sol_x3_y2.mtx: 4 differences
Comparison between serial/sol_x3_y3.mtx and parallel/sol_x3_y3.mtx: 4 differences
Comparison between serial/sol_x3_y4.mtx and parallel/sol_x3_y4.mtx: 0 differences
Comparison between serial/sol_x4_y1.mtx and parallel/sol_x4_y1.mtx: 0 differences
Comparison between serial/sol_x4_y2.mtx and parallel/sol_x4_y2.mtx: 0 differences
Comparison between serial/sol_x4_y3.mtx and parallel/sol_x4_y3.mtx: 0 differences
Comparison between serial/sol_x4_y4.mtx and parallel/sol_x4_y4.mtx: 0 differences
Welcome to PSBLAS version: 3.9.0
This is the psb_gedot_test sample program
Number of processes used in this computation: 40
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:33:40] Generation gedot single precision result file parallel/sol_x4_y4.mtx 16/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:33:41] Double precision check on file parallel/sol_x4_y4.mtx 16/144 [OK]
Comparison between serial/sol_x1_y1.mtx and parallel/sol_x1_y1.mtx: 4 differences
Comparison between serial/sol_x1_y2.mtx and parallel/sol_x1_y2.mtx: 4 differences
Comparison between serial/sol_x1_y3.mtx and parallel/sol_x1_y3.mtx: 4 differences
Comparison between serial/sol_x1_y4.mtx and parallel/sol_x1_y4.mtx: 0 differences
Comparison between serial/sol_x2_y1.mtx and parallel/sol_x2_y1.mtx: 4 differences
Comparison between serial/sol_x2_y2.mtx and parallel/sol_x2_y2.mtx: 4 differences
Comparison between serial/sol_x2_y3.mtx and parallel/sol_x2_y3.mtx: 4 differences
Comparison between serial/sol_x2_y4.mtx and parallel/sol_x2_y4.mtx: 0 differences
Comparison between serial/sol_x3_y1.mtx and parallel/sol_x3_y1.mtx: 4 differences
Comparison between serial/sol_x3_y2.mtx and parallel/sol_x3_y2.mtx: 4 differences
Comparison between serial/sol_x3_y3.mtx and parallel/sol_x3_y3.mtx: 4 differences
Comparison between serial/sol_x3_y4.mtx and parallel/sol_x3_y4.mtx: 0 differences
Comparison between serial/sol_x4_y1.mtx and parallel/sol_x4_y1.mtx: 0 differences
Comparison between serial/sol_x4_y2.mtx and parallel/sol_x4_y2.mtx: 0 differences
Comparison between serial/sol_x4_y3.mtx and parallel/sol_x4_y3.mtx: 0 differences
Comparison between serial/sol_x4_y4.mtx and parallel/sol_x4_y4.mtx: 0 differences
Welcome to PSBLAS version: 3.9.0
This is the psb_gedot_test sample program
Number of processes used in this computation: 40
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:34:42] Generation gedot single precision result file parallel/sol_x4_y4.mtx 16/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:34:42] Double precision check on file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:34:43] Double precision check on file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:34:43] Double precision check on file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:34:43] Double precision check on file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:34:43] Double precision check on file parallel/sol_x4_y4.mtx 16/144 [OK]
Comparison between serial/sol_x1_y1.mtx and parallel/sol_x1_y1.mtx: 4 differences
Comparison between serial/sol_x1_y2.mtx and parallel/sol_x1_y2.mtx: 4 differences
Comparison between serial/sol_x1_y3.mtx and parallel/sol_x1_y3.mtx: 4 differences
Comparison between serial/sol_x1_y4.mtx and parallel/sol_x1_y4.mtx: 0 differences
Comparison between serial/sol_x2_y1.mtx and parallel/sol_x2_y1.mtx: 4 differences
Comparison between serial/sol_x2_y2.mtx and parallel/sol_x2_y2.mtx: 4 differences
Comparison between serial/sol_x2_y3.mtx and parallel/sol_x2_y3.mtx: 4 differences
Comparison between serial/sol_x2_y4.mtx and parallel/sol_x2_y4.mtx: 0 differences
Comparison between serial/sol_x3_y1.mtx and parallel/sol_x3_y1.mtx: 4 differences
Comparison between serial/sol_x3_y2.mtx and parallel/sol_x3_y2.mtx: 4 differences
Comparison between serial/sol_x3_y3.mtx and parallel/sol_x3_y3.mtx: 4 differences
Comparison between serial/sol_x3_y4.mtx and parallel/sol_x3_y4.mtx: 0 differences
Comparison between serial/sol_x4_y1.mtx and parallel/sol_x4_y1.mtx: 0 differences
Comparison between serial/sol_x4_y2.mtx and parallel/sol_x4_y2.mtx: 0 differences
Comparison between serial/sol_x4_y3.mtx and parallel/sol_x4_y3.mtx: 0 differences
Comparison between serial/sol_x4_y4.mtx and parallel/sol_x4_y4.mtx: 0 differences
Welcome to PSBLAS version: 3.9.0
This is the psb_gedot_test sample program
Number of processes used in this computation: 40
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:35:32] Generation gedot single precision result file parallel/sol_x4_y4.mtx 16/144 [OK]
[2025-06-12 13:35:32] Double precision check on file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:35:32] Double precision check on file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:35:33] Double precision check on file parallel/sol_x4_y4.mtx 16/144 [OK]
Comparison between serial/sol_x1_y1.mtx and parallel/sol_x1_y1.mtx: 4 differences
Comparison between serial/sol_x1_y2.mtx and parallel/sol_x1_y2.mtx: 4 differences
Comparison between serial/sol_x1_y3.mtx and parallel/sol_x1_y3.mtx: 4 differences
Comparison between serial/sol_x1_y4.mtx and parallel/sol_x1_y4.mtx: 0 differences
Comparison between serial/sol_x2_y1.mtx and parallel/sol_x2_y1.mtx: 4 differences
Comparison between serial/sol_x2_y2.mtx and parallel/sol_x2_y2.mtx: 4 differences
Comparison between serial/sol_x2_y3.mtx and parallel/sol_x2_y3.mtx: 4 differences
Comparison between serial/sol_x2_y4.mtx and parallel/sol_x2_y4.mtx: 0 differences
Comparison between serial/sol_x3_y1.mtx and parallel/sol_x3_y1.mtx: 4 differences
Comparison between serial/sol_x3_y2.mtx and parallel/sol_x3_y2.mtx: 4 differences
Comparison between serial/sol_x3_y3.mtx and parallel/sol_x3_y3.mtx: 4 differences
Comparison between serial/sol_x3_y4.mtx and parallel/sol_x3_y4.mtx: 0 differences
Comparison between serial/sol_x4_y1.mtx and parallel/sol_x4_y1.mtx: 0 differences
Comparison between serial/sol_x4_y2.mtx and parallel/sol_x4_y2.mtx: 0 differences
Comparison between serial/sol_x4_y3.mtx and parallel/sol_x4_y3.mtx: 0 differences
Comparison between serial/sol_x4_y4.mtx and parallel/sol_x4_y4.mtx: 0 differences
Welcome to PSBLAS version: 3.9.0
This is the psb_gedot_test sample program
Number of processes used in this computation: 40
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:37:17] Generation gedot single precision result file parallel/sol_x4_y4.mtx 16/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:37:17] Double precision check on file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:37:18] Double precision check on file parallel/sol_x4_y4.mtx 16/144 [OK]
Comparison between serial/sol_x1_y1.mtx and parallel/sol_x1_y1.mtx: 4 differences
Comparison between serial/sol_x1_y2.mtx and parallel/sol_x1_y2.mtx: 4 differences
Comparison between serial/sol_x1_y3.mtx and parallel/sol_x1_y3.mtx: 4 differences
Comparison between serial/sol_x1_y4.mtx and parallel/sol_x1_y4.mtx: 0 differences
Comparison between serial/sol_x2_y1.mtx and parallel/sol_x2_y1.mtx: 4 differences
Comparison between serial/sol_x2_y2.mtx and parallel/sol_x2_y2.mtx: 4 differences
Comparison between serial/sol_x2_y3.mtx and parallel/sol_x2_y3.mtx: 4 differences
Comparison between serial/sol_x2_y4.mtx and parallel/sol_x2_y4.mtx: 0 differences
Comparison between serial/sol_x3_y1.mtx and parallel/sol_x3_y1.mtx: 4 differences
Comparison between serial/sol_x3_y2.mtx and parallel/sol_x3_y2.mtx: 4 differences
Comparison between serial/sol_x3_y3.mtx and parallel/sol_x3_y3.mtx: 4 differences
Comparison between serial/sol_x3_y4.mtx and parallel/sol_x3_y4.mtx: 0 differences
Comparison between serial/sol_x4_y1.mtx and parallel/sol_x4_y1.mtx: 0 differences
Comparison between serial/sol_x4_y2.mtx and parallel/sol_x4_y2.mtx: 0 differences
Comparison between serial/sol_x4_y3.mtx and parallel/sol_x4_y3.mtx: 0 differences
Comparison between serial/sol_x4_y4.mtx and parallel/sol_x4_y4.mtx: 0 differences
Welcome to PSBLAS version: 3.9.0
This is the psb_gedot_test sample program
Number of processes used in this computation: 40
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:37:56] Generation gedot single precision result file parallel/sol_x4_y4.mtx 16/144 [OK]
[2025-06-12 13:37:56] Double precision check on file parallel/sol_x1_y1.mtx 1/144 [OK]
[2025-06-12 13:37:56] Double precision check on file parallel/sol_x1_y2.mtx 2/144 [OK]
[2025-06-12 13:37:56] Double precision check on file parallel/sol_x1_y3.mtx 3/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x1_y4.mtx 4/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x2_y1.mtx 5/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x2_y2.mtx 6/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x2_y3.mtx 7/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x2_y4.mtx 8/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x3_y1.mtx 9/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x3_y2.mtx 10/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x3_y3.mtx 11/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x3_y4.mtx 12/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x4_y1.mtx 13/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x4_y2.mtx 14/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x4_y3.mtx 15/144 [OK]
[2025-06-12 13:37:57] Double precision check on file parallel/sol_x4_y4.mtx 16/144 [OK]
Comparison between serial/sol_x1_y1.mtx and parallel/sol_x1_y1.mtx: 4 differences
Comparison between serial/sol_x1_y2.mtx and parallel/sol_x1_y2.mtx: 4 differences
Comparison between serial/sol_x1_y3.mtx and parallel/sol_x1_y3.mtx: 4 differences
Comparison between serial/sol_x1_y4.mtx and parallel/sol_x1_y4.mtx: 0 differences
Comparison between serial/sol_x2_y1.mtx and parallel/sol_x2_y1.mtx: 4 differences
Comparison between serial/sol_x2_y2.mtx and parallel/sol_x2_y2.mtx: 4 differences
Comparison between serial/sol_x2_y3.mtx and parallel/sol_x2_y3.mtx: 4 differences
Comparison between serial/sol_x2_y4.mtx and parallel/sol_x2_y4.mtx: 0 differences
Comparison between serial/sol_x3_y1.mtx and parallel/sol_x3_y1.mtx: 4 differences
Comparison between serial/sol_x3_y2.mtx and parallel/sol_x3_y2.mtx: 4 differences
Comparison between serial/sol_x3_y3.mtx and parallel/sol_x3_y3.mtx: 4 differences
Comparison between serial/sol_x3_y4.mtx and parallel/sol_x3_y4.mtx: 0 differences
Comparison between serial/sol_x4_y1.mtx and parallel/sol_x4_y1.mtx: 0 differences
Comparison between serial/sol_x4_y2.mtx and parallel/sol_x4_y2.mtx: 0 differences
Comparison between serial/sol_x4_y3.mtx and parallel/sol_x4_y3.mtx: 0 differences
Comparison between serial/sol_x4_y4.mtx and parallel/sol_x4_y4.mtx: 0 differences

@ -39,35 +39,35 @@ echo -e "${BLUE}[INFO]\t Starting environment check for required modules...${RE
# Check and load required modules
required_modules=("gnu/12.2.1-sys" "mpich/4.2.2" "cuda/12.5")
for module in "${required_modules[@]}"; do
if ! module list 2>&1 | grep -q "$module"; then
echo -e "${YELLOW}[WARNING] Module not found, loading $module${RESET}"
module load "$module"
flag=1
if ! grep -q "module load $module" "$HOME/.bashrc"; then
echo -e "[INFO]\t Adding 'module load $module' to $bashrc..."
echo "module load $module" >> "$HOME/.bashrc"
# else
# echo "'module load $module' is already present in $bashrc."
fi
else
echo -e "[INFO]\t Found module $module."
fi
done
# Update .bashrc if necessary
if [ $flag -eq 1 ]; then
echo -e "[INFO]\t Reloading $HOME/.bashrc..."
source ~/.bashrc
fi
# Inform the user about environment persistence
if [ "$$" -eq "$PPID" ]; then
echo -e "${YELLOW}[WARNING] Modules loaded in this script will not persist after the script finishes.${RESET}"
echo -e "${YELLOW}[WARNING] Run the script using 'source autotest.sh' to make the changes persist.${RESET}"
fi
# required_modules=("gnu/12.2.1-sys" "mpich/4.2.2" "cuda/12.5")
# for module in "${required_modules[@]}"; do
# if ! module list 2>&1 | grep -q "$module"; then
# echo -e "${YELLOW}[WARNING] Module not found, loading $module${RESET}"
# module load "$module"
# flag=1
# if ! grep -q "module load $module" "$HOME/.bashrc"; then
# echo -e "[INFO]\t Adding 'module load $module' to $bashrc..."
# echo "module load $module" >> "$HOME/.bashrc"
# # else
# # echo "'module load $module' is already present in $bashrc."
# fi
# else
# echo -e "[INFO]\t Found module $module."
# fi
# done
#
# # Update .bashrc if necessary
# if [ $flag -eq 1 ]; then
# echo -e "[INFO]\t Reloading $HOME/.bashrc..."
# source ~/.bashrc
# fi
#
# # Inform the user about environment persistence
# if [ "$$" -eq "$PPID" ]; then
# echo -e "${YELLOW}[WARNING] Modules loaded in this script will not persist after the script finishes.${RESET}"
# echo -e "${YELLOW}[WARNING] Run the script using 'source autotest.sh' to make the changes persist.${RESET}"
# fi
echo -e "${BLUE}[INFO]\t Environment check for required modules completed.${RESET}"
echo ""

Loading…
Cancel
Save