From 75d86864b0f2f60ad1eb11d4c966d80084e3a4b5 Mon Sep 17 00:00:00 2001 From: Stack-1 Date: Sun, 19 Apr 2026 17:56:39 +0200 Subject: [PATCH] [UPDATE] Minor changes to debug --- base/comm/internals/psi_dswapdata.F90 | 83 +++++- .../psb_comm_neighbor_impl_mod.F90 | 4 + test/comm/cg/psb_comm_cg_test.F90 | 42 ++- test/comm/spmv/Makefile | 3 +- test/comm/spmv/psb_spmv_overlap_test.f90 | 275 ++++++++++++++++++ 5 files changed, 379 insertions(+), 28 deletions(-) diff --git a/base/comm/internals/psi_dswapdata.F90 b/base/comm/internals/psi_dswapdata.F90 index 6c0262dd..f7030eb1 100644 --- a/base/comm/internals/psi_dswapdata.F90 +++ b/base/comm/internals/psi_dswapdata.F90 @@ -92,6 +92,10 @@ submodule (psi_d_comm_v_mod) psi_d_swapdata_impl integer(psb_ipk_), save :: psb_swap_timing_wrapper_calls = 0 integer(psb_ipk_), save :: psb_swap_timing_baseline_calls = 0 integer(psb_ipk_), save :: psb_swap_timing_neighbor_calls = 0 + logical, save :: psb_swap_start_debug_inited = .false. + logical, save :: psb_swap_start_debug_enabled = .false. + integer(psb_ipk_), save :: psb_swap_start_debug_max_report = 128 + integer(psb_ipk_), save :: psb_swap_start_debug_report_count = 0 contains @@ -137,6 +141,47 @@ contains psb_swap_timing_should_report = .true. end function psb_swap_timing_should_report + subroutine psb_swap_start_debug_setup() + implicit none + character(len=64) :: env_buf + integer(psb_ipk_) :: env_len, env_status, ios + + if (psb_swap_start_debug_inited) return + + psb_swap_start_debug_inited = .true. + psb_swap_start_debug_enabled = .false. + psb_swap_start_debug_max_report = 128 + + call get_environment_variable('PSB_SWAP_DEBUG_START', env_buf, length=env_len, status=env_status) + if ((env_status == 0) .and. (env_len > 0)) then + select case(env_buf(1:1)) + case('1','t','T','y','Y') + psb_swap_start_debug_enabled = .true. + case default + psb_swap_start_debug_enabled = .false. + end select + end if + + call get_environment_variable('PSB_SWAP_DEBUG_START_MAX_REPORT', env_buf, length=env_len, status=env_status) + if ((env_status == 0) .and. (env_len > 0)) then + read(env_buf(1:env_len), *, iostat=ios) psb_swap_start_debug_max_report + if ((ios /= 0) .or. (psb_swap_start_debug_max_report < 1)) psb_swap_start_debug_max_report = 128 + end if + end subroutine psb_swap_start_debug_setup + + logical function psb_swap_start_debug_should_report() + implicit none + + call psb_swap_start_debug_setup() + + psb_swap_start_debug_should_report = .false. + if (.not. psb_swap_start_debug_enabled) return + if (psb_swap_start_debug_report_count >= psb_swap_start_debug_max_report) return + + psb_swap_start_debug_report_count = psb_swap_start_debug_report_count + 1 + psb_swap_start_debug_should_report = .true. + end function psb_swap_start_debug_should_report + module subroutine psi_dswapdata_vect(swap_status,beta,y,desc_a,info,data) #ifdef PSB_MPI_MOD @@ -919,21 +964,37 @@ contains #ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT if (buffer_size > 0) then + ! Count the attempt before MPI_Start so we can diagnose call reachability. + neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1 + if (psb_swap_start_debug_should_report()) then + write(psb_out_unit,'("SWAP_DEBUG MPI_Start(pre) kind=vect rank=",i0,", bsz=",i0,", ready=",l1)') & + & me, buffer_size, neighbor_comm_handle%persistent_request_ready + write(psb_out_unit,'(" inflight=",l1,", req_null=",l1,", dstart=",i0)') & + & neighbor_comm_handle%persistent_in_flight, & + & (neighbor_comm_handle%persistent_request == mpi_request_null), & + & neighbor_comm_handle%diag_start_calls + end if if (timing_on) t1 = psb_wtime() call mpi_start(neighbor_comm_handle%persistent_request, iret) if (timing_on) t_post = t_post + (psb_wtime() - t1) + if (psb_swap_start_debug_should_report()) then + write(psb_out_unit,'("SWAP_DEBUG MPI_Start(post) kind=vect rank=",i0,", iret=",i0)') & + & me, iret + write(psb_out_unit,'(" inflight=",l1,", dstart=",i0)') & + & neighbor_comm_handle%persistent_in_flight, neighbor_comm_handle%diag_start_calls + end if if (iret /= mpi_success) then info = psb_err_mpi_error_ call psb_errpush(info, name, m_err=(/iret/)) goto 9999 end if - neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1 neighbor_comm_handle%persistent_in_flight = .true. else neighbor_comm_handle%persistent_in_flight = .false. end if #else if (buffer_size > 0) then + neighbor_comm_handle%diag_ineighbor_calls = neighbor_comm_handle%diag_ineighbor_calls + 1 if (timing_on) t1 = psb_wtime() call mpi_ineighbor_alltoallv( & & y%combuf(1), & ! send buffer @@ -962,6 +1023,7 @@ contains ! Post non-blocking neighborhood alltoallv if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv' if (buffer_size > 0) then + neighbor_comm_handle%diag_ineighbor_calls = neighbor_comm_handle%diag_ineighbor_calls + 1 if (timing_on) t1 = psb_wtime() call mpi_ineighbor_alltoallv( & & y%combuf(1), & ! send buffer @@ -1671,15 +1733,31 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in end if #ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT + ! Count the attempt before MPI_Start so we can diagnose call reachability. + neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1 + if (psb_swap_start_debug_should_report()) then + write(psb_out_unit,'("SWAP_DEBUG MPI_Start(pre) kind=multivect rank=",i0,", bsz=",i0,", ready=",l1)') & + & me, buffer_size, neighbor_comm_handle%persistent_request_ready + write(psb_out_unit,'(" inflight=",l1,", req_null=",l1,", dstart=",i0)') & + & neighbor_comm_handle%persistent_in_flight, & + & (neighbor_comm_handle%persistent_request == mpi_request_null), & + & neighbor_comm_handle%diag_start_calls + end if call mpi_start(neighbor_comm_handle%persistent_request, iret) + if (psb_swap_start_debug_should_report()) then + write(psb_out_unit,'("SWAP_DEBUG MPI_Start(post) kind=multivect rank=",i0,", iret=",i0)') & + & me, iret + write(psb_out_unit,'(" inflight=",l1,", dstart=",i0)') & + & neighbor_comm_handle%persistent_in_flight, neighbor_comm_handle%diag_start_calls + end if if (iret /= mpi_success) then info = psb_err_mpi_error_ call psb_errpush(info, name, m_err=(/iret/)) goto 9999 end if - neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1 neighbor_comm_handle%persistent_in_flight = .true. #else + neighbor_comm_handle%diag_ineighbor_calls = neighbor_comm_handle%diag_ineighbor_calls + 1 call mpi_ineighbor_alltoallv( & & y%combuf(1), & ! send buffer & neighbor_comm_handle%send_counts, & @@ -1701,6 +1779,7 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in else ! Post non-blocking neighborhood alltoallv if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv' + neighbor_comm_handle%diag_ineighbor_calls = neighbor_comm_handle%diag_ineighbor_calls + 1 call mpi_ineighbor_alltoallv( & & y%combuf(1), & ! send buffer & neighbor_comm_handle%send_counts, & diff --git a/base/modules/comm/comm_schemes/psb_comm_neighbor_impl_mod.F90 b/base/modules/comm/comm_schemes/psb_comm_neighbor_impl_mod.F90 index 106edc25..4f8a29b8 100644 --- a/base/modules/comm/comm_schemes/psb_comm_neighbor_impl_mod.F90 +++ b/base/modules/comm/comm_schemes/psb_comm_neighbor_impl_mod.F90 @@ -32,6 +32,7 @@ module psb_comm_neighbor_impl_mod integer(psb_ipk_) :: persistent_buffer_size = 0 integer(psb_ipk_) :: diag_init_calls = 0 integer(psb_ipk_) :: diag_start_calls = 0 + integer(psb_ipk_) :: diag_ineighbor_calls = 0 integer(psb_ipk_) :: diag_wait_calls = 0 integer(psb_ipk_) :: diag_buffer_reallocs = 0 contains @@ -399,6 +400,7 @@ contains this%persistent_buffer_size = 0 this%diag_init_calls = 0 this%diag_start_calls = 0 + this%diag_ineighbor_calls = 0 this%diag_wait_calls = 0 this%diag_buffer_reallocs = 0 call this%free(info) @@ -440,10 +442,12 @@ contains this%persistent_buffer_size = 0 this%diag_init_calls = 0 this%diag_start_calls = 0 + this%diag_ineighbor_calls = 0 this%diag_wait_calls = 0 this%diag_buffer_reallocs = 0 this%diag_init_calls = 0 this%diag_start_calls = 0 + this%diag_ineighbor_calls = 0 this%diag_wait_calls = 0 this%diag_buffer_reallocs = 0 end subroutine psb_comm_neighbor_init diff --git a/test/comm/cg/psb_comm_cg_test.F90 b/test/comm/cg/psb_comm_cg_test.F90 index abc339b0..1b75dad7 100644 --- a/test/comm/cg/psb_comm_cg_test.F90 +++ b/test/comm/cg/psb_comm_cg_test.F90 @@ -43,8 +43,8 @@ program psb_comm_cg_test itmax = 1000 nrep = 5 nwarm = 1 - ! Keep itrace positive to avoid modulo-by-zero paths in convergence logging. - itrace = 0 + ! Disable per-iteration tracing; avoids modulo-by-zero paths in some logging branches. + itrace = -1 istop = 2 eps = 1.d-6 scheme_type = (/ psb_comm_isend_irecv_, psb_comm_ineighbor_alltoallv_, & @@ -95,7 +95,7 @@ program psb_comm_cg_test ! call probe_ieee('before psb_init') call psb_init(ctxt) ! call probe_ieee('after psb_init') - ! call clear_ieee_flags() + call clear_ieee_flags() ! call probe_ieee('after clear_ieee_flags') call psb_info(ctxt, iam, np) @@ -129,14 +129,6 @@ program psb_comm_cg_test ! call probe_ieee('after psb_d_gen_pde3d') if (info /= psb_success_) goto 9999 - ! desc_ctxt = desc_a%get_context() - ! call psb_info(desc_ctxt, desc_me, desc_np) - ! if (desc_np == -1) then - ! info = psb_err_context_error_ - ! write(psb_err_unit,*) 'Invalid descriptor context after psb_d_gen_pde3d' - ! goto 9999 - ! end if - do prec_idx = 1, n_precs do scheme_idx = 1, n_schemes do rep = 1, nrep @@ -214,20 +206,20 @@ program psb_comm_cg_test final_error(prec_idx,scheme_idx,rep) = err solve_info(prec_idx,scheme_idx,rep) = info - ! if (iam == psb_root_) then - ! select type(ch => x%v%comm_handle) - ! type is(psb_comm_neighbor_handle) - ! write(psb_out_unit,'("DIAG_COMM scheme=",a,", prec=",a,", rep=",i0)') & - ! & trim(scheme_name(scheme_idx)), trim(prec_name(prec_idx)), rep - ! write(psb_out_unit,'("DIAG_COMM counters: init=",i0,", start=",i0,", wait=",i0,", realloc=",i0)') & - ! & ch%diag_init_calls, ch%diag_start_calls, ch%diag_wait_calls, & - ! & ch%diag_buffer_reallocs - ! write(psb_out_unit,'("DIAG_COMM state: ready=",l1,", bsz=",i0)') & - ! & ch%persistent_request_ready, ch%persistent_buffer_size - ! class default - ! continue - ! end select - ! end if + if (iam == psb_root_) then + select type(ch => x%v%comm_handle) + type is(psb_comm_neighbor_handle) + write(psb_out_unit,'("DIAG_COMM scheme=",a,", prec=",a,", rep=",i0)') & + & trim(scheme_name(scheme_idx)), trim(prec_name(prec_idx)), rep + write(psb_out_unit,'("DIAG_COMM counters: init=",i0,", start=",i0,", ineighbor=",i0,", wait=",i0,", realloc=",i0)') & + & ch%diag_init_calls, ch%diag_start_calls, ch%diag_ineighbor_calls, & + & ch%diag_wait_calls, ch%diag_buffer_reallocs + write(psb_out_unit,'("DIAG_COMM state: ready=",l1,", bsz=",i0)') & + & ch%persistent_request_ready, ch%persistent_buffer_size + class default + continue + end select + end if if (info /= psb_success_) goto 9999 end do diff --git a/test/comm/spmv/Makefile b/test/comm/spmv/Makefile index 1555f006..1416389d 100644 --- a/test/comm/spmv/Makefile +++ b/test/comm/spmv/Makefile @@ -23,9 +23,10 @@ runsd: spmv_overlap: $(TOBJS) $(FLINK) $(LOPT) $(TOBJS) -o spmv_overlap $(PSBLAS_LIB) $(LDLIBS) /bin/mv spmv_overlap $(EXEDIR) + /bin/cp -f $(EXEDIR)/spmv_overlap $(EXEDIR)/psb_spmv_overlap_test clean: - /bin/rm -f $(TOBJS) $(TOBJS_API) *$(.mod) $(EXEDIR)/spmv_overlap + /bin/rm -f $(TOBJS) $(TOBJS_API) *$(.mod) $(EXEDIR)/spmv_overlap $(EXEDIR)/psb_spmv_overlap_test lib: (cd ../../; make library) diff --git a/test/comm/spmv/psb_spmv_overlap_test.f90 b/test/comm/spmv/psb_spmv_overlap_test.f90 index eaf0793b..25226fcf 100644 --- a/test/comm/spmv/psb_spmv_overlap_test.f90 +++ b/test/comm/spmv/psb_spmv_overlap_test.f90 @@ -11,6 +11,7 @@ module psb_spmv_overlap_test use psb_comm_factory_mod, only: psb_comm_set use psb_comm_schemes_mod, only: psb_comm_isend_irecv_, psb_comm_ineighbor_alltoallv_, & & psb_comm_persistent_ineighbor_alltoallv_ + use psb_comm_neighbor_impl_mod, only: psb_comm_neighbor_handle implicit none @@ -540,6 +541,22 @@ contains real(psb_dpk_) :: t_no_isend, t_no_neighbor, t_no_persistent real(psb_dpk_) :: err_isend, err_neighbor, err_persistent, tol real(psb_dpk_) :: avg_ov, avg_no, speedup, gain_pct + integer(psb_ipk_) :: n_init_neighbor_l, n_start_neighbor_l, n_wait_neighbor_l, n_realloc_neighbor_l + integer(psb_ipk_) :: n_init_persist_l, n_start_persist_l, n_wait_persist_l, n_realloc_persist_l + integer(psb_ipk_) :: n_init_neighbor_g, n_start_neighbor_g, n_wait_neighbor_g, n_realloc_neighbor_g + integer(psb_ipk_) :: n_init_persist_g, n_start_persist_g, n_wait_persist_g, n_realloc_persist_g + integer(psb_ipk_) :: n_ineighbor_neighbor_l, n_ineighbor_neighbor_g + integer(psb_ipk_) :: peak_start_neighbor_l, peak_start_persist_l + integer(psb_ipk_) :: peak_start_neighbor_g, peak_start_persist_g + integer(psb_ipk_) :: last_start_neighbor_l, last_start_persist_l + integer(psb_ipk_) :: accum_start_neighbor_l, accum_start_persist_l + integer(psb_ipk_) :: accum_start_neighbor_g, accum_start_persist_g + integer(psb_ipk_) :: comm_type_neighbor_l, comm_type_persist_l + integer(psb_ipk_) :: comm_type_neighbor_g, comm_type_persist_g + integer(psb_ipk_) :: is_neighbor_handle_l, is_persist_handle_l + integer(psb_ipk_) :: is_neighbor_handle_g, is_persist_handle_g + integer(psb_ipk_) :: use_persistent_neighbor_l, use_persistent_persist_l + integer(psb_ipk_) :: use_persistent_neighbor_g, use_persistent_persist_g info = psb_success_ tol = 1.0d-10 @@ -550,6 +567,19 @@ contains t_no_isend = 0.0_psb_dpk_ t_no_neighbor = 0.0_psb_dpk_ t_no_persistent = 0.0_psb_dpk_ + peak_start_neighbor_l = 0 + peak_start_persist_l = 0 + n_ineighbor_neighbor_l = 0 + last_start_neighbor_l = -1 + last_start_persist_l = -1 + accum_start_neighbor_l = 0 + accum_start_persist_l = 0 + comm_type_neighbor_l = -1 + comm_type_persist_l = -1 + is_neighbor_handle_l = 0 + is_persist_handle_l = 0 + use_persistent_neighbor_l = 0 + use_persistent_persist_l = 0 idim = 10 call psb_erractionsave(err_act) @@ -615,18 +645,123 @@ contains call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, x_persistent%v%comm_handle, info) if (info /= psb_success_) goto 9999 + select type(ch => x_neighbor%v%comm_handle) + type is(psb_comm_neighbor_handle) + is_neighbor_handle_l = 1 + comm_type_neighbor_l = ch%comm_type + if (ch%use_persistent_buffers) use_persistent_neighbor_l = 1 + class default + continue + end select + + select type(ch => x_persistent%v%comm_handle) + type is(psb_comm_neighbor_handle) + is_persist_handle_l = 1 + comm_type_persist_l = ch%comm_type + if (ch%use_persistent_buffers) use_persistent_persist_l = 1 + class default + continue + end select + ! Warm-up all schemes once: overlap and non-overlap paths. call psb_spmm(alpha, a, x_isend, beta, y_ov_isend, desc_a, info, doswap=.true.) call psb_halo(x_isend, desc_a, info) call psb_spmm(alpha, a, x_isend, beta, y_no_isend, desc_a, info, doswap=.false.) call psb_spmm(alpha, a, x_neighbor, beta, y_ov_neighbor, desc_a, info, doswap=.true.) + select type(ch => x_neighbor%v%comm_handle) + type is(psb_comm_neighbor_handle) + n_ineighbor_neighbor_l = ch%diag_ineighbor_calls + if (last_start_neighbor_l < 0) then + accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls + else if (ch%diag_start_calls >= last_start_neighbor_l) then + accum_start_neighbor_l = accum_start_neighbor_l + (ch%diag_start_calls - last_start_neighbor_l) + else + accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls + end if + last_start_neighbor_l = ch%diag_start_calls + peak_start_neighbor_l = max(peak_start_neighbor_l, ch%diag_start_calls) + class default + continue + end select call psb_halo(x_neighbor, desc_a, info) + select type(ch => x_neighbor%v%comm_handle) + type is(psb_comm_neighbor_handle) + n_ineighbor_neighbor_l = ch%diag_ineighbor_calls + if (last_start_neighbor_l < 0) then + accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls + else if (ch%diag_start_calls >= last_start_neighbor_l) then + accum_start_neighbor_l = accum_start_neighbor_l + (ch%diag_start_calls - last_start_neighbor_l) + else + accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls + end if + last_start_neighbor_l = ch%diag_start_calls + peak_start_neighbor_l = max(peak_start_neighbor_l, ch%diag_start_calls) + class default + continue + end select call psb_spmm(alpha, a, x_neighbor, beta, y_no_neighbor, desc_a, info, doswap=.false.) + select type(ch => x_neighbor%v%comm_handle) + type is(psb_comm_neighbor_handle) + n_ineighbor_neighbor_l = ch%diag_ineighbor_calls + if (last_start_neighbor_l < 0) then + accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls + else if (ch%diag_start_calls >= last_start_neighbor_l) then + accum_start_neighbor_l = accum_start_neighbor_l + (ch%diag_start_calls - last_start_neighbor_l) + else + accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls + end if + last_start_neighbor_l = ch%diag_start_calls + peak_start_neighbor_l = max(peak_start_neighbor_l, ch%diag_start_calls) + class default + continue + end select call psb_spmm(alpha, a, x_persistent, beta, y_ov_persistent, desc_a, info, doswap=.true.) + select type(ch => x_persistent%v%comm_handle) + type is(psb_comm_neighbor_handle) + if (last_start_persist_l < 0) then + accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls + else if (ch%diag_start_calls >= last_start_persist_l) then + accum_start_persist_l = accum_start_persist_l + (ch%diag_start_calls - last_start_persist_l) + else + accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls + end if + last_start_persist_l = ch%diag_start_calls + peak_start_persist_l = max(peak_start_persist_l, ch%diag_start_calls) + class default + continue + end select call psb_halo(x_persistent, desc_a, info) + select type(ch => x_persistent%v%comm_handle) + type is(psb_comm_neighbor_handle) + if (last_start_persist_l < 0) then + accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls + else if (ch%diag_start_calls >= last_start_persist_l) then + accum_start_persist_l = accum_start_persist_l + (ch%diag_start_calls - last_start_persist_l) + else + accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls + end if + last_start_persist_l = ch%diag_start_calls + peak_start_persist_l = max(peak_start_persist_l, ch%diag_start_calls) + class default + continue + end select call psb_spmm(alpha, a, x_persistent, beta, y_no_persistent, desc_a, info, doswap=.false.) + select type(ch => x_persistent%v%comm_handle) + type is(psb_comm_neighbor_handle) + if (last_start_persist_l < 0) then + accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls + else if (ch%diag_start_calls >= last_start_persist_l) then + accum_start_persist_l = accum_start_persist_l + (ch%diag_start_calls - last_start_persist_l) + else + accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls + end if + last_start_persist_l = ch%diag_start_calls + peak_start_persist_l = max(peak_start_persist_l, ch%diag_start_calls) + class default + continue + end select if (info /= psb_success_) goto 9999 ! ----------------------------- @@ -666,6 +801,20 @@ contains t0 = psb_wtime() do i = 1, times call psb_spmm(alpha, a, x_neighbor, beta, y_ov_neighbor, desc_a, info, doswap=.true.) + select type(ch => x_neighbor%v%comm_handle) + type is(psb_comm_neighbor_handle) + if (last_start_neighbor_l < 0) then + accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls + else if (ch%diag_start_calls >= last_start_neighbor_l) then + accum_start_neighbor_l = accum_start_neighbor_l + (ch%diag_start_calls - last_start_neighbor_l) + else + accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls + end if + last_start_neighbor_l = ch%diag_start_calls + peak_start_neighbor_l = max(peak_start_neighbor_l, ch%diag_start_calls) + class default + continue + end select end do t1 = psb_wtime() dt = t1 - t0 @@ -679,6 +828,20 @@ contains do i = 1, times call psb_halo(x_neighbor, desc_a, info) call psb_spmm(alpha, a, x_neighbor, beta, y_no_neighbor, desc_a, info, doswap=.false.) + select type(ch => x_neighbor%v%comm_handle) + type is(psb_comm_neighbor_handle) + if (last_start_neighbor_l < 0) then + accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls + else if (ch%diag_start_calls >= last_start_neighbor_l) then + accum_start_neighbor_l = accum_start_neighbor_l + (ch%diag_start_calls - last_start_neighbor_l) + else + accum_start_neighbor_l = accum_start_neighbor_l + ch%diag_start_calls + end if + last_start_neighbor_l = ch%diag_start_calls + peak_start_neighbor_l = max(peak_start_neighbor_l, ch%diag_start_calls) + class default + continue + end select end do t1 = psb_wtime() dt = t1 - t0 @@ -694,6 +857,20 @@ contains t0 = psb_wtime() do i = 1, times call psb_spmm(alpha, a, x_persistent, beta, y_ov_persistent, desc_a, info, doswap=.true.) + select type(ch => x_persistent%v%comm_handle) + type is(psb_comm_neighbor_handle) + if (last_start_persist_l < 0) then + accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls + else if (ch%diag_start_calls >= last_start_persist_l) then + accum_start_persist_l = accum_start_persist_l + (ch%diag_start_calls - last_start_persist_l) + else + accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls + end if + last_start_persist_l = ch%diag_start_calls + peak_start_persist_l = max(peak_start_persist_l, ch%diag_start_calls) + class default + continue + end select end do t1 = psb_wtime() dt = t1 - t0 @@ -707,6 +884,20 @@ contains do i = 1, times call psb_halo(x_persistent, desc_a, info) call psb_spmm(alpha, a, x_persistent, beta, y_no_persistent, desc_a, info, doswap=.false.) + select type(ch => x_persistent%v%comm_handle) + type is(psb_comm_neighbor_handle) + if (last_start_persist_l < 0) then + accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls + else if (ch%diag_start_calls >= last_start_persist_l) then + accum_start_persist_l = accum_start_persist_l + (ch%diag_start_calls - last_start_persist_l) + else + accum_start_persist_l = accum_start_persist_l + ch%diag_start_calls + end if + last_start_persist_l = ch%diag_start_calls + peak_start_persist_l = max(peak_start_persist_l, ch%diag_start_calls) + class default + continue + end select end do t1 = psb_wtime() dt = t1 - t0 @@ -722,6 +913,75 @@ contains call psb_amx(ctxt, err_neighbor) call psb_amx(ctxt, err_persistent) + n_init_neighbor_l = 0 + n_start_neighbor_l = 0 + n_wait_neighbor_l = 0 + n_realloc_neighbor_l = 0 + n_init_persist_l = 0 + n_start_persist_l = 0 + n_wait_persist_l = 0 + n_realloc_persist_l = 0 + + select type(ch => x_neighbor%v%comm_handle) + type is(psb_comm_neighbor_handle) + n_init_neighbor_l = ch%diag_init_calls + n_start_neighbor_l = ch%diag_start_calls + n_wait_neighbor_l = ch%diag_wait_calls + n_realloc_neighbor_l = ch%diag_buffer_reallocs + class default + continue + end select + + select type(ch => x_persistent%v%comm_handle) + type is(psb_comm_neighbor_handle) + n_init_persist_l = ch%diag_init_calls + n_start_persist_l = ch%diag_start_calls + n_wait_persist_l = ch%diag_wait_calls + n_realloc_persist_l = ch%diag_buffer_reallocs + class default + continue + end select + + n_init_neighbor_g = n_init_neighbor_l + n_start_neighbor_g = n_start_neighbor_l + n_wait_neighbor_g = n_wait_neighbor_l + n_realloc_neighbor_g = n_realloc_neighbor_l + n_init_persist_g = n_init_persist_l + n_start_persist_g = n_start_persist_l + n_wait_persist_g = n_wait_persist_l + n_realloc_persist_g = n_realloc_persist_l + n_ineighbor_neighbor_g = n_ineighbor_neighbor_l + peak_start_neighbor_g = peak_start_neighbor_l + peak_start_persist_g = peak_start_persist_l + accum_start_neighbor_g = accum_start_neighbor_l + accum_start_persist_g = accum_start_persist_l + comm_type_neighbor_g = comm_type_neighbor_l + comm_type_persist_g = comm_type_persist_l + is_neighbor_handle_g = is_neighbor_handle_l + is_persist_handle_g = is_persist_handle_l + use_persistent_neighbor_g = use_persistent_neighbor_l + use_persistent_persist_g = use_persistent_persist_l + + call psb_sum(ctxt, n_init_neighbor_g) + call psb_sum(ctxt, n_start_neighbor_g) + call psb_sum(ctxt, n_wait_neighbor_g) + call psb_sum(ctxt, n_realloc_neighbor_g) + call psb_sum(ctxt, n_init_persist_g) + call psb_sum(ctxt, n_start_persist_g) + call psb_sum(ctxt, n_wait_persist_g) + call psb_sum(ctxt, n_realloc_persist_g) + call psb_sum(ctxt, n_ineighbor_neighbor_g) + call psb_sum(ctxt, peak_start_neighbor_g) + call psb_sum(ctxt, peak_start_persist_g) + call psb_sum(ctxt, accum_start_neighbor_g) + call psb_sum(ctxt, accum_start_persist_g) + call psb_sum(ctxt, comm_type_neighbor_g) + call psb_sum(ctxt, comm_type_persist_g) + call psb_sum(ctxt, is_neighbor_handle_g) + call psb_sum(ctxt, is_persist_handle_g) + call psb_sum(ctxt, use_persistent_neighbor_g) + call psb_sum(ctxt, use_persistent_persist_g) + if (my_rank == 0) then write(psb_out_unit,'(/,"SpMV overlap benchmark")') write(psb_out_unit,'(" idim : ",i0)') idim @@ -769,6 +1029,21 @@ contains write(psb_out_unit,'(" gain (%) : ",f10.4)') gain_pct write(psb_out_unit,'(" overlap vs no_overlap err = ",es12.5)') err_persistent + write(psb_out_unit,'(/,"Communication diagnostics (global sum over MPI ranks):")') + write(psb_out_unit,'(" ineighbor_alltoallv: init=",i0,", start=",i0,", wait=",i0,", realloc=",i0)') & + & n_init_neighbor_g, n_start_neighbor_g, n_wait_neighbor_g, n_realloc_neighbor_g + write(psb_out_unit,'(" ineighbor_alltoallv calls : ",i0)') n_ineighbor_neighbor_g + write(psb_out_unit,'(" ineighbor peak start observed during run: ",i0)') peak_start_neighbor_g + write(psb_out_unit,'(" ineighbor accumulated starts (robust): ",i0)') accum_start_neighbor_g + write(psb_out_unit,'(" persistent_ineighbor_a2av: init=",i0,", start=",i0,", wait=",i0,", realloc=",i0)') & + & n_init_persist_g, n_start_persist_g, n_wait_persist_g, n_realloc_persist_g + write(psb_out_unit,'(" persistent peak start observed during run: ",i0)') peak_start_persist_g + write(psb_out_unit,'(" persistent accumulated starts (robust): ",i0)') accum_start_persist_g + write(psb_out_unit,'(" handle check (sum over ranks): neighbor is_type=",i0,", comm_type=",i0,", use_persistent=",i0)') & + & is_neighbor_handle_g, comm_type_neighbor_g, use_persistent_neighbor_g + write(psb_out_unit,'(" handle check (sum over ranks): persistent is_type=",i0,", comm_type=",i0,", use_persistent=",i0)') & + & is_persist_handle_g, comm_type_persist_g, use_persistent_persist_g + if ((err_isend > tol) .or. (err_neighbor > tol) .or. (err_persistent > tol)) then write(psb_out_unit,'(" WARNING: mismatch exceeds tolerance ",es12.5)') tol end if