[UPDATE] Minor changes to debug

communication_v2
Stack-1 4 weeks ago
parent fcae4a1633
commit 75d86864b0

@ -92,6 +92,10 @@ submodule (psi_d_comm_v_mod) psi_d_swapdata_impl
integer(psb_ipk_), save :: psb_swap_timing_wrapper_calls = 0
integer(psb_ipk_), save :: psb_swap_timing_baseline_calls = 0
integer(psb_ipk_), save :: psb_swap_timing_neighbor_calls = 0
logical, save :: psb_swap_start_debug_inited = .false.
logical, save :: psb_swap_start_debug_enabled = .false.
integer(psb_ipk_), save :: psb_swap_start_debug_max_report = 128
integer(psb_ipk_), save :: psb_swap_start_debug_report_count = 0
contains
@ -137,6 +141,47 @@ contains
psb_swap_timing_should_report = .true.
end function psb_swap_timing_should_report
subroutine psb_swap_start_debug_setup()
implicit none
character(len=64) :: env_buf
integer(psb_ipk_) :: env_len, env_status, ios
if (psb_swap_start_debug_inited) return
psb_swap_start_debug_inited = .true.
psb_swap_start_debug_enabled = .false.
psb_swap_start_debug_max_report = 128
call get_environment_variable('PSB_SWAP_DEBUG_START', env_buf, length=env_len, status=env_status)
if ((env_status == 0) .and. (env_len > 0)) then
select case(env_buf(1:1))
case('1','t','T','y','Y')
psb_swap_start_debug_enabled = .true.
case default
psb_swap_start_debug_enabled = .false.
end select
end if
call get_environment_variable('PSB_SWAP_DEBUG_START_MAX_REPORT', env_buf, length=env_len, status=env_status)
if ((env_status == 0) .and. (env_len > 0)) then
read(env_buf(1:env_len), *, iostat=ios) psb_swap_start_debug_max_report
if ((ios /= 0) .or. (psb_swap_start_debug_max_report < 1)) psb_swap_start_debug_max_report = 128
end if
end subroutine psb_swap_start_debug_setup
logical function psb_swap_start_debug_should_report()
implicit none
call psb_swap_start_debug_setup()
psb_swap_start_debug_should_report = .false.
if (.not. psb_swap_start_debug_enabled) return
if (psb_swap_start_debug_report_count >= psb_swap_start_debug_max_report) return
psb_swap_start_debug_report_count = psb_swap_start_debug_report_count + 1
psb_swap_start_debug_should_report = .true.
end function psb_swap_start_debug_should_report
module subroutine psi_dswapdata_vect(swap_status,beta,y,desc_a,info,data)
#ifdef PSB_MPI_MOD
@ -919,21 +964,37 @@ contains
#ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT
if (buffer_size > 0) then
! Count the attempt before MPI_Start so we can diagnose call reachability.
neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1
if (psb_swap_start_debug_should_report()) then
write(psb_out_unit,'("SWAP_DEBUG MPI_Start(pre) kind=vect rank=",i0,", bsz=",i0,", ready=",l1)') &
& me, buffer_size, neighbor_comm_handle%persistent_request_ready
write(psb_out_unit,'(" inflight=",l1,", req_null=",l1,", dstart=",i0)') &
& neighbor_comm_handle%persistent_in_flight, &
& (neighbor_comm_handle%persistent_request == mpi_request_null), &
& neighbor_comm_handle%diag_start_calls
end if
if (timing_on) t1 = psb_wtime()
call mpi_start(neighbor_comm_handle%persistent_request, iret)
if (timing_on) t_post = t_post + (psb_wtime() - t1)
if (psb_swap_start_debug_should_report()) then
write(psb_out_unit,'("SWAP_DEBUG MPI_Start(post) kind=vect rank=",i0,", iret=",i0)') &
& me, iret
write(psb_out_unit,'(" inflight=",l1,", dstart=",i0)') &
& neighbor_comm_handle%persistent_in_flight, neighbor_comm_handle%diag_start_calls
end if
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1
neighbor_comm_handle%persistent_in_flight = .true.
else
neighbor_comm_handle%persistent_in_flight = .false.
end if
#else
if (buffer_size > 0) then
neighbor_comm_handle%diag_ineighbor_calls = neighbor_comm_handle%diag_ineighbor_calls + 1
if (timing_on) t1 = psb_wtime()
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer
@ -962,6 +1023,7 @@ contains
! Post non-blocking neighborhood alltoallv
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
if (buffer_size > 0) then
neighbor_comm_handle%diag_ineighbor_calls = neighbor_comm_handle%diag_ineighbor_calls + 1
if (timing_on) t1 = psb_wtime()
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer
@ -1671,15 +1733,31 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in
end if
#ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT
! Count the attempt before MPI_Start so we can diagnose call reachability.
neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1
if (psb_swap_start_debug_should_report()) then
write(psb_out_unit,'("SWAP_DEBUG MPI_Start(pre) kind=multivect rank=",i0,", bsz=",i0,", ready=",l1)') &
& me, buffer_size, neighbor_comm_handle%persistent_request_ready
write(psb_out_unit,'(" inflight=",l1,", req_null=",l1,", dstart=",i0)') &
& neighbor_comm_handle%persistent_in_flight, &
& (neighbor_comm_handle%persistent_request == mpi_request_null), &
& neighbor_comm_handle%diag_start_calls
end if
call mpi_start(neighbor_comm_handle%persistent_request, iret)
if (psb_swap_start_debug_should_report()) then
write(psb_out_unit,'("SWAP_DEBUG MPI_Start(post) kind=multivect rank=",i0,", iret=",i0)') &
& me, iret
write(psb_out_unit,'(" inflight=",l1,", dstart=",i0)') &
& neighbor_comm_handle%persistent_in_flight, neighbor_comm_handle%diag_start_calls
end if
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1
neighbor_comm_handle%persistent_in_flight = .true.
#else
neighbor_comm_handle%diag_ineighbor_calls = neighbor_comm_handle%diag_ineighbor_calls + 1
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer
& neighbor_comm_handle%send_counts, &
@ -1701,6 +1779,7 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in
else
! Post non-blocking neighborhood alltoallv
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
neighbor_comm_handle%diag_ineighbor_calls = neighbor_comm_handle%diag_ineighbor_calls + 1
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer
& neighbor_comm_handle%send_counts, &

@ -32,6 +32,7 @@ module psb_comm_neighbor_impl_mod
integer(psb_ipk_) :: persistent_buffer_size = 0
integer(psb_ipk_) :: diag_init_calls = 0
integer(psb_ipk_) :: diag_start_calls = 0
integer(psb_ipk_) :: diag_ineighbor_calls = 0
integer(psb_ipk_) :: diag_wait_calls = 0
integer(psb_ipk_) :: diag_buffer_reallocs = 0
contains
@ -399,6 +400,7 @@ contains
this%persistent_buffer_size = 0
this%diag_init_calls = 0
this%diag_start_calls = 0
this%diag_ineighbor_calls = 0
this%diag_wait_calls = 0
this%diag_buffer_reallocs = 0
call this%free(info)
@ -440,10 +442,12 @@ contains
this%persistent_buffer_size = 0
this%diag_init_calls = 0
this%diag_start_calls = 0
this%diag_ineighbor_calls = 0
this%diag_wait_calls = 0
this%diag_buffer_reallocs = 0
this%diag_init_calls = 0
this%diag_start_calls = 0
this%diag_ineighbor_calls = 0
this%diag_wait_calls = 0
this%diag_buffer_reallocs = 0
end subroutine psb_comm_neighbor_init

@ -43,8 +43,8 @@ program psb_comm_cg_test
itmax = 1000
nrep = 5
nwarm = 1
! Keep itrace positive to avoid modulo-by-zero paths in convergence logging.
itrace = 0
! Disable per-iteration tracing; avoids modulo-by-zero paths in some logging branches.
itrace = -1
istop = 2
eps = 1.d-6
scheme_type = (/ psb_comm_isend_irecv_, psb_comm_ineighbor_alltoallv_, &
@ -95,7 +95,7 @@ program psb_comm_cg_test
! call probe_ieee('before psb_init')
call psb_init(ctxt)
! call probe_ieee('after psb_init')
! call clear_ieee_flags()
call clear_ieee_flags()
! call probe_ieee('after clear_ieee_flags')
call psb_info(ctxt, iam, np)
@ -129,14 +129,6 @@ program psb_comm_cg_test
! call probe_ieee('after psb_d_gen_pde3d')
if (info /= psb_success_) goto 9999
! desc_ctxt = desc_a%get_context()
! call psb_info(desc_ctxt, desc_me, desc_np)
! if (desc_np == -1) then
! info = psb_err_context_error_
! write(psb_err_unit,*) 'Invalid descriptor context after psb_d_gen_pde3d'
! goto 9999
! end if
do prec_idx = 1, n_precs
do scheme_idx = 1, n_schemes
do rep = 1, nrep
@ -214,20 +206,20 @@ program psb_comm_cg_test
final_error(prec_idx,scheme_idx,rep) = err
solve_info(prec_idx,scheme_idx,rep) = info
! if (iam == psb_root_) then
! select type(ch => x%v%comm_handle)
! type is(psb_comm_neighbor_handle)
! write(psb_out_unit,'("DIAG_COMM scheme=",a,", prec=",a,", rep=",i0)') &
! & trim(scheme_name(scheme_idx)), trim(prec_name(prec_idx)), rep
! write(psb_out_unit,'("DIAG_COMM counters: init=",i0,", start=",i0,", wait=",i0,", realloc=",i0)') &
! & ch%diag_init_calls, ch%diag_start_calls, ch%diag_wait_calls, &
! & ch%diag_buffer_reallocs
! write(psb_out_unit,'("DIAG_COMM state: ready=",l1,", bsz=",i0)') &
! & ch%persistent_request_ready, ch%persistent_buffer_size
! class default
! continue
! end select
! end if
if (iam == psb_root_) then
select type(ch => x%v%comm_handle)
type is(psb_comm_neighbor_handle)
write(psb_out_unit,'("DIAG_COMM scheme=",a,", prec=",a,", rep=",i0)') &
& trim(scheme_name(scheme_idx)), trim(prec_name(prec_idx)), rep
write(psb_out_unit,'("DIAG_COMM counters: init=",i0,", start=",i0,", ineighbor=",i0,", wait=",i0,", realloc=",i0)') &
& ch%diag_init_calls, ch%diag_start_calls, ch%diag_ineighbor_calls, &
& ch%diag_wait_calls, ch%diag_buffer_reallocs
write(psb_out_unit,'("DIAG_COMM state: ready=",l1,", bsz=",i0)') &
& ch%persistent_request_ready, ch%persistent_buffer_size
class default
continue
end select
end if
if (info /= psb_success_) goto 9999
end do

@ -23,9 +23,10 @@ runsd:
spmv_overlap: $(TOBJS)
$(FLINK) $(LOPT) $(TOBJS) -o spmv_overlap $(PSBLAS_LIB) $(LDLIBS)
/bin/mv spmv_overlap $(EXEDIR)
/bin/cp -f $(EXEDIR)/spmv_overlap $(EXEDIR)/psb_spmv_overlap_test
clean:
/bin/rm -f $(TOBJS) $(TOBJS_API) *$(.mod) $(EXEDIR)/spmv_overlap
/bin/rm -f $(TOBJS) $(TOBJS_API) *$(.mod) $(EXEDIR)/spmv_overlap $(EXEDIR)/psb_spmv_overlap_test
lib:
(cd ../../; make library)

@ -11,6 +11,7 @@ module psb_spmv_overlap_test
use psb_comm_factory_mod, only: psb_comm_set
use psb_comm_schemes_mod, only: psb_comm_isend_irecv_, psb_comm_ineighbor_alltoallv_, &
& psb_comm_persistent_ineighbor_alltoallv_
use psb_comm_neighbor_impl_mod, only: psb_comm_neighbor_handle
implicit none
@ -540,6 +541,22 @@ contains
real(psb_dpk_) :: t_no_isend, t_no_neighbor, t_no_persistent
real(psb_dpk_) :: err_isend, err_neighbor, err_persistent, tol
real(psb_dpk_) :: avg_ov, avg_no, speedup, gain_pct
integer(psb_ipk_) :: n_init_neighbor_l, n_start_neighbor_l, n_wait_neighbor_l, n_realloc_neighbor_l
integer(psb_ipk_) :: n_init_persist_l, n_start_persist_l, n_wait_persist_l, n_realloc_persist_l
integer(psb_ipk_) :: n_init_neighbor_g, n_start_neighbor_g, n_wait_neighbor_g, n_realloc_neighbor_g
integer(psb_ipk_) :: n_init_persist_g, n_start_persist_g, n_wait_persist_g, n_realloc_persist_g
integer(psb_ipk_) :: n_ineighbor_neighbor_l, n_ineighbor_neighbor_g
integer(psb_ipk_) :: peak_start_neighbor_l, peak_start_persist_l
integer(psb_ipk_) :: peak_start_neighbor_g, peak_start_persist_g
integer(psb_ipk_) :: last_start_neighbor_l, last_start_persist_l
integer(psb_ipk_) :: accum_start_neighbor_l, accum_start_persist_l
integer(psb_ipk_) :: accum_start_neighbor_g, accum_start_persist_g
integer(psb_ipk_) :: comm_type_neighbor_l, comm_type_persist_l
integer(psb_ipk_) :: comm_type_neighbor_g, comm_type_persist_g
integer(psb_ipk_) :: is_neighbor_handle_l, is_persist_handle_l
integer(psb_ipk_) :: is_neighbor_handle_g, is_persist_handle_g
integer(psb_ipk_) :: use_persistent_neighbor_l, use_persistent_persist_l
integer(psb_ipk_) :: use_persistent_neighbor_g, use_persistent_persist_g
info = psb_success_
tol = 1.0d-10
@ -550,6 +567,19 @@ contains
t_no_isend = 0.0_psb_dpk_
t_no_neighbor = 0.0_psb_dpk_
t_no_persistent = 0.0_psb_dpk_
peak_start_neighbor_l = 0
peak_start_persist_l = 0
n_ineighbor_neighbor_l = 0
last_start_neighbor_l = -1
last_start_persist_l = -1
accum_start_neighbor_l = 0
accum_start_persist_l = 0
comm_type_neighbor_l = -1
comm_type_persist_l = -1
is_neighbor_handle_l = 0
is_persist_handle_l = 0
use_persistent_neighbor_l = 0
use_persistent_persist_l = 0
idim = 10
call psb_erractionsave(err_act)
@ -615,18 +645,123 @@ contains
call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, x_persistent%v%comm_handle, info)
if (info /= psb_success_) goto 9999
select type(ch => x_neighbor%v%comm_handle)
type is(psb_comm_neighbor_handle)
is_neighbor_handle_l = 1
comm_type_neighbor_l = ch%comm_type
if (ch%use_persistent_buffers) use_persistent_neighbor_l = 1
class default
continue
end select
select type(ch => x_persistent%v%comm_handle)
type is(psb_comm_neighbor_handle)
is_persist_handle_l = 1
comm_type_persist_l = ch%comm_type
if (ch%use_persistent_buffers) use_persistent_persist_l = 1
class default
continue
end select
! Warm-up all schemes once: overlap and non-overlap paths.
call psb_spmm(alpha, a, x_isend, beta, y_ov_isend, desc_a, info, doswap=.true.)
call psb_halo(x_isend, desc_a, info)
call psb_spmm(alpha, a, x_isend, beta, y_no_isend, desc_a, info, doswap=.false.)
call psb_spmm(alpha, a, x_neighbor, beta, y_ov_neighbor, desc_a, info, doswap=.true.)
select type(ch => x_neighbor%v%comm_handle)
type is(psb_comm_neighbor_handle)
n_ineighbor_neighbor_l = ch%diag_ineighbor_calls
if (last_start_neighbor_l < 0) then
accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls
else if (ch%diag_start_calls >= last_start_neighbor_l) then
accum_start_neighbor_l = accum_start_neighbor_l + (ch%diag_start_calls - last_start_neighbor_l)
else
accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls
end if
last_start_neighbor_l = ch%diag_start_calls
peak_start_neighbor_l = max(peak_start_neighbor_l, ch%diag_start_calls)
class default
continue
end select
call psb_halo(x_neighbor, desc_a, info)
select type(ch => x_neighbor%v%comm_handle)
type is(psb_comm_neighbor_handle)
n_ineighbor_neighbor_l = ch%diag_ineighbor_calls
if (last_start_neighbor_l < 0) then
accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls
else if (ch%diag_start_calls >= last_start_neighbor_l) then
accum_start_neighbor_l = accum_start_neighbor_l + (ch%diag_start_calls - last_start_neighbor_l)
else
accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls
end if
last_start_neighbor_l = ch%diag_start_calls
peak_start_neighbor_l = max(peak_start_neighbor_l, ch%diag_start_calls)
class default
continue
end select
call psb_spmm(alpha, a, x_neighbor, beta, y_no_neighbor, desc_a, info, doswap=.false.)
select type(ch => x_neighbor%v%comm_handle)
type is(psb_comm_neighbor_handle)
n_ineighbor_neighbor_l = ch%diag_ineighbor_calls
if (last_start_neighbor_l < 0) then
accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls
else if (ch%diag_start_calls >= last_start_neighbor_l) then
accum_start_neighbor_l = accum_start_neighbor_l + (ch%diag_start_calls - last_start_neighbor_l)
else
accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls
end if
last_start_neighbor_l = ch%diag_start_calls
peak_start_neighbor_l = max(peak_start_neighbor_l, ch%diag_start_calls)
class default
continue
end select
call psb_spmm(alpha, a, x_persistent, beta, y_ov_persistent, desc_a, info, doswap=.true.)
select type(ch => x_persistent%v%comm_handle)
type is(psb_comm_neighbor_handle)
if (last_start_persist_l < 0) then
accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls
else if (ch%diag_start_calls >= last_start_persist_l) then
accum_start_persist_l = accum_start_persist_l + (ch%diag_start_calls - last_start_persist_l)
else
accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls
end if
last_start_persist_l = ch%diag_start_calls
peak_start_persist_l = max(peak_start_persist_l, ch%diag_start_calls)
class default
continue
end select
call psb_halo(x_persistent, desc_a, info)
select type(ch => x_persistent%v%comm_handle)
type is(psb_comm_neighbor_handle)
if (last_start_persist_l < 0) then
accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls
else if (ch%diag_start_calls >= last_start_persist_l) then
accum_start_persist_l = accum_start_persist_l + (ch%diag_start_calls - last_start_persist_l)
else
accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls
end if
last_start_persist_l = ch%diag_start_calls
peak_start_persist_l = max(peak_start_persist_l, ch%diag_start_calls)
class default
continue
end select
call psb_spmm(alpha, a, x_persistent, beta, y_no_persistent, desc_a, info, doswap=.false.)
select type(ch => x_persistent%v%comm_handle)
type is(psb_comm_neighbor_handle)
if (last_start_persist_l < 0) then
accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls
else if (ch%diag_start_calls >= last_start_persist_l) then
accum_start_persist_l = accum_start_persist_l + (ch%diag_start_calls - last_start_persist_l)
else
accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls
end if
last_start_persist_l = ch%diag_start_calls
peak_start_persist_l = max(peak_start_persist_l, ch%diag_start_calls)
class default
continue
end select
if (info /= psb_success_) goto 9999
! -----------------------------
@ -666,6 +801,20 @@ contains
t0 = psb_wtime()
do i = 1, times
call psb_spmm(alpha, a, x_neighbor, beta, y_ov_neighbor, desc_a, info, doswap=.true.)
select type(ch => x_neighbor%v%comm_handle)
type is(psb_comm_neighbor_handle)
if (last_start_neighbor_l < 0) then
accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls
else if (ch%diag_start_calls >= last_start_neighbor_l) then
accum_start_neighbor_l = accum_start_neighbor_l + (ch%diag_start_calls - last_start_neighbor_l)
else
accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls
end if
last_start_neighbor_l = ch%diag_start_calls
peak_start_neighbor_l = max(peak_start_neighbor_l, ch%diag_start_calls)
class default
continue
end select
end do
t1 = psb_wtime()
dt = t1 - t0
@ -679,6 +828,20 @@ contains
do i = 1, times
call psb_halo(x_neighbor, desc_a, info)
call psb_spmm(alpha, a, x_neighbor, beta, y_no_neighbor, desc_a, info, doswap=.false.)
select type(ch => x_neighbor%v%comm_handle)
type is(psb_comm_neighbor_handle)
if (last_start_neighbor_l < 0) then
accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls
else if (ch%diag_start_calls >= last_start_neighbor_l) then
accum_start_neighbor_l = accum_start_neighbor_l + (ch%diag_start_calls - last_start_neighbor_l)
else
accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls
end if
last_start_neighbor_l = ch%diag_start_calls
peak_start_neighbor_l = max(peak_start_neighbor_l, ch%diag_start_calls)
class default
continue
end select
end do
t1 = psb_wtime()
dt = t1 - t0
@ -694,6 +857,20 @@ contains
t0 = psb_wtime()
do i = 1, times
call psb_spmm(alpha, a, x_persistent, beta, y_ov_persistent, desc_a, info, doswap=.true.)
select type(ch => x_persistent%v%comm_handle)
type is(psb_comm_neighbor_handle)
if (last_start_persist_l < 0) then
accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls
else if (ch%diag_start_calls >= last_start_persist_l) then
accum_start_persist_l = accum_start_persist_l + (ch%diag_start_calls - last_start_persist_l)
else
accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls
end if
last_start_persist_l = ch%diag_start_calls
peak_start_persist_l = max(peak_start_persist_l, ch%diag_start_calls)
class default
continue
end select
end do
t1 = psb_wtime()
dt = t1 - t0
@ -707,6 +884,20 @@ contains
do i = 1, times
call psb_halo(x_persistent, desc_a, info)
call psb_spmm(alpha, a, x_persistent, beta, y_no_persistent, desc_a, info, doswap=.false.)
select type(ch => x_persistent%v%comm_handle)
type is(psb_comm_neighbor_handle)
if (last_start_persist_l < 0) then
accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls
else if (ch%diag_start_calls >= last_start_persist_l) then
accum_start_persist_l = accum_start_persist_l + (ch%diag_start_calls - last_start_persist_l)
else
accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls
end if
last_start_persist_l = ch%diag_start_calls
peak_start_persist_l = max(peak_start_persist_l, ch%diag_start_calls)
class default
continue
end select
end do
t1 = psb_wtime()
dt = t1 - t0
@ -722,6 +913,75 @@ contains
call psb_amx(ctxt, err_neighbor)
call psb_amx(ctxt, err_persistent)
n_init_neighbor_l = 0
n_start_neighbor_l = 0
n_wait_neighbor_l = 0
n_realloc_neighbor_l = 0
n_init_persist_l = 0
n_start_persist_l = 0
n_wait_persist_l = 0
n_realloc_persist_l = 0
select type(ch => x_neighbor%v%comm_handle)
type is(psb_comm_neighbor_handle)
n_init_neighbor_l = ch%diag_init_calls
n_start_neighbor_l = ch%diag_start_calls
n_wait_neighbor_l = ch%diag_wait_calls
n_realloc_neighbor_l = ch%diag_buffer_reallocs
class default
continue
end select
select type(ch => x_persistent%v%comm_handle)
type is(psb_comm_neighbor_handle)
n_init_persist_l = ch%diag_init_calls
n_start_persist_l = ch%diag_start_calls
n_wait_persist_l = ch%diag_wait_calls
n_realloc_persist_l = ch%diag_buffer_reallocs
class default
continue
end select
n_init_neighbor_g = n_init_neighbor_l
n_start_neighbor_g = n_start_neighbor_l
n_wait_neighbor_g = n_wait_neighbor_l
n_realloc_neighbor_g = n_realloc_neighbor_l
n_init_persist_g = n_init_persist_l
n_start_persist_g = n_start_persist_l
n_wait_persist_g = n_wait_persist_l
n_realloc_persist_g = n_realloc_persist_l
n_ineighbor_neighbor_g = n_ineighbor_neighbor_l
peak_start_neighbor_g = peak_start_neighbor_l
peak_start_persist_g = peak_start_persist_l
accum_start_neighbor_g = accum_start_neighbor_l
accum_start_persist_g = accum_start_persist_l
comm_type_neighbor_g = comm_type_neighbor_l
comm_type_persist_g = comm_type_persist_l
is_neighbor_handle_g = is_neighbor_handle_l
is_persist_handle_g = is_persist_handle_l
use_persistent_neighbor_g = use_persistent_neighbor_l
use_persistent_persist_g = use_persistent_persist_l
call psb_sum(ctxt, n_init_neighbor_g)
call psb_sum(ctxt, n_start_neighbor_g)
call psb_sum(ctxt, n_wait_neighbor_g)
call psb_sum(ctxt, n_realloc_neighbor_g)
call psb_sum(ctxt, n_init_persist_g)
call psb_sum(ctxt, n_start_persist_g)
call psb_sum(ctxt, n_wait_persist_g)
call psb_sum(ctxt, n_realloc_persist_g)
call psb_sum(ctxt, n_ineighbor_neighbor_g)
call psb_sum(ctxt, peak_start_neighbor_g)
call psb_sum(ctxt, peak_start_persist_g)
call psb_sum(ctxt, accum_start_neighbor_g)
call psb_sum(ctxt, accum_start_persist_g)
call psb_sum(ctxt, comm_type_neighbor_g)
call psb_sum(ctxt, comm_type_persist_g)
call psb_sum(ctxt, is_neighbor_handle_g)
call psb_sum(ctxt, is_persist_handle_g)
call psb_sum(ctxt, use_persistent_neighbor_g)
call psb_sum(ctxt, use_persistent_persist_g)
if (my_rank == 0) then
write(psb_out_unit,'(/,"SpMV overlap benchmark")')
write(psb_out_unit,'(" idim : ",i0)') idim
@ -769,6 +1029,21 @@ contains
write(psb_out_unit,'(" gain (%) : ",f10.4)') gain_pct
write(psb_out_unit,'(" overlap vs no_overlap err = ",es12.5)') err_persistent
write(psb_out_unit,'(/,"Communication diagnostics (global sum over MPI ranks):")')
write(psb_out_unit,'(" ineighbor_alltoallv: init=",i0,", start=",i0,", wait=",i0,", realloc=",i0)') &
& n_init_neighbor_g, n_start_neighbor_g, n_wait_neighbor_g, n_realloc_neighbor_g
write(psb_out_unit,'(" ineighbor_alltoallv calls : ",i0)') n_ineighbor_neighbor_g
write(psb_out_unit,'(" ineighbor peak start observed during run: ",i0)') peak_start_neighbor_g
write(psb_out_unit,'(" ineighbor accumulated starts (robust): ",i0)') accum_start_neighbor_g
write(psb_out_unit,'(" persistent_ineighbor_a2av: init=",i0,", start=",i0,", wait=",i0,", realloc=",i0)') &
& n_init_persist_g, n_start_persist_g, n_wait_persist_g, n_realloc_persist_g
write(psb_out_unit,'(" persistent peak start observed during run: ",i0)') peak_start_persist_g
write(psb_out_unit,'(" persistent accumulated starts (robust): ",i0)') accum_start_persist_g
write(psb_out_unit,'(" handle check (sum over ranks): neighbor is_type=",i0,", comm_type=",i0,", use_persistent=",i0)') &
& is_neighbor_handle_g, comm_type_neighbor_g, use_persistent_neighbor_g
write(psb_out_unit,'(" handle check (sum over ranks): persistent is_type=",i0,", comm_type=",i0,", use_persistent=",i0)') &
& is_persist_handle_g, comm_type_persist_g, use_persistent_persist_g
if ((err_isend > tol) .or. (err_neighbor > tol) .or. (err_persistent > tol)) then
write(psb_out_unit,'(" WARNING: mismatch exceeds tolerance ",es12.5)') tol
end if

Loading…
Cancel
Save