[FIX] Fixed comm tests for single process, added guard on neighbor topology data exchange for single process run

communication_v2
Stack-1 2 months ago
parent 6ba327854e
commit 5ed9643fe6

@ -591,38 +591,61 @@ contains
! combuf(total_send+1 : total_send+total_recv) = recv area ! combuf(total_send+1 : total_send+total_recv) = recv area
buffer_size = topology_total_send + topology_total_recv buffer_size = topology_total_send + topology_total_recv
if (neighbor_comm_handle%use_persistent_buffers) then if (buffer_size > 0) then
if ((.not.allocated(y%combuf)) .or. (size(y%combuf) < buffer_size)) then if (neighbor_comm_handle%use_persistent_buffers) then
neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1 if (.not. allocated(y%combuf)) then
if (neighbor_comm_handle%persistent_request_ready) then neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1
if (neighbor_comm_handle%persistent_request /= mpi_request_null) then if (neighbor_comm_handle%persistent_request_ready) then
call mpi_request_free(neighbor_comm_handle%persistent_request, iret) if (neighbor_comm_handle%persistent_request /= mpi_request_null) then
call mpi_request_free(neighbor_comm_handle%persistent_request, iret)
end if
neighbor_comm_handle%persistent_request = mpi_request_null
neighbor_comm_handle%persistent_request_ready = .false.
neighbor_comm_handle%persistent_in_flight = .false.
neighbor_comm_handle%persistent_buffer_size = 0
end if
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
else if (size(y%combuf) < buffer_size) then
neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1
if (neighbor_comm_handle%persistent_request_ready) then
if (neighbor_comm_handle%persistent_request /= mpi_request_null) then
call mpi_request_free(neighbor_comm_handle%persistent_request, iret)
end if
neighbor_comm_handle%persistent_request = mpi_request_null
neighbor_comm_handle%persistent_request_ready = .false.
neighbor_comm_handle%persistent_in_flight = .false.
neighbor_comm_handle%persistent_buffer_size = 0
end if
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if end if
neighbor_comm_handle%persistent_request = mpi_request_null
neighbor_comm_handle%persistent_request_ready = .false.
neighbor_comm_handle%persistent_in_flight = .false.
neighbor_comm_handle%persistent_buffer_size = 0
end if end if
else
call y%new_buffer(buffer_size, info) call y%new_buffer(buffer_size, info)
if (info /= 0) then if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name) call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999 goto 9999
end if end if
end if end if
neighbor_comm_handle%comm_request = mpi_request_null
! Gather send data into contiguous send buffer (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& neighbor_comm_handle%send_indexes, &
& y%combuf(1:topology_total_send))
else else
call y%new_buffer(buffer_size, info) ! No data to send/recv: ensure requests/buffers indicate idle state
if (info /= 0) then neighbor_comm_handle%comm_request = mpi_request_null
call psb_errpush(psb_err_alloc_dealloc_, name) neighbor_comm_handle%persistent_in_flight = .false.
goto 9999 neighbor_comm_handle%persistent_request_ready = neighbor_comm_handle%persistent_request_ready
end if
end if end if
neighbor_comm_handle%comm_request = mpi_request_null
! Gather send data into contiguous send buffer (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems'
call y%gth(int(topology_total_send,psb_mpk_), &
& neighbor_comm_handle%send_indexes, &
& y%combuf(1:topology_total_send))
! Wait for device (important for GPU subclasses) ! Wait for device (important for GPU subclasses)
call y%device_wait() call y%device_wait()
@ -631,27 +654,32 @@ contains
! Lazy persistent-init: build the request once, then reuse with START/WAIT. ! Lazy persistent-init: build the request once, then reuse with START/WAIT.
if (.not. neighbor_comm_handle%persistent_request_ready) then if (.not. neighbor_comm_handle%persistent_request_ready) then
#ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT #ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT
if (debug) write(*,*) me,' nbr_vect: posting MPI_Neighbor_alltoallv_init' if (buffer_size > 0) then
call mpi_neighbor_alltoallv_init( & if (debug) write(*,*) me,' nbr_vect: posting MPI_Neighbor_alltoallv_init'
& y%combuf(1), & ! send buffer call mpi_neighbor_alltoallv_init( &
& neighbor_comm_handle%send_counts, & & y%combuf(1), & ! send buffer
& neighbor_comm_handle%send_displs, & & neighbor_comm_handle%send_counts, &
& psb_mpi_r_dpk_, & & neighbor_comm_handle%send_displs, &
& y%combuf(topology_total_send + 1), & ! recv buffer & psb_mpi_r_dpk_, &
& neighbor_comm_handle%recv_counts, & & y%combuf(topology_total_send + 1), & ! recv buffer
& neighbor_comm_handle%recv_displs, & & neighbor_comm_handle%recv_counts, &
& psb_mpi_r_dpk_, & & neighbor_comm_handle%recv_displs, &
& neighbor_comm_handle%graph_comm, & & psb_mpi_r_dpk_, &
& mpi_info_null, & & neighbor_comm_handle%graph_comm, &
& neighbor_comm_handle%persistent_request, iret) & mpi_info_null, &
if (iret /= mpi_success) then & neighbor_comm_handle%persistent_request, iret)
info = psb_err_mpi_error_ if (iret /= mpi_success) then
call psb_errpush(info, name, m_err=(/iret/)) info = psb_err_mpi_error_
goto 9999 call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
neighbor_comm_handle%diag_init_calls = neighbor_comm_handle%diag_init_calls + 1
neighbor_comm_handle%persistent_request_ready = .true.
neighbor_comm_handle%persistent_buffer_size = buffer_size
else
neighbor_comm_handle%persistent_request_ready = .false.
neighbor_comm_handle%persistent_buffer_size = 0
end if end if
neighbor_comm_handle%diag_init_calls = neighbor_comm_handle%diag_init_calls + 1
neighbor_comm_handle%persistent_request_ready = .true.
neighbor_comm_handle%persistent_buffer_size = buffer_size
#else #else
! Fallback when persistent neighborhood collectives are not available ! Fallback when persistent neighborhood collectives are not available
neighbor_comm_handle%persistent_request_ready = .false. neighbor_comm_handle%persistent_request_ready = .false.
@ -660,51 +688,64 @@ contains
end if end if
#ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT #ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT
call mpi_start(neighbor_comm_handle%persistent_request, iret) if (buffer_size > 0) then
if (iret /= mpi_success) then call mpi_start(neighbor_comm_handle%persistent_request, iret)
info = psb_err_mpi_error_ if (iret /= mpi_success) then
call psb_errpush(info, name, m_err=(/iret/)) info = psb_err_mpi_error_
goto 9999 call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1
neighbor_comm_handle%persistent_in_flight = .true.
else
neighbor_comm_handle%persistent_in_flight = .false.
end if end if
neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1
neighbor_comm_handle%persistent_in_flight = .true.
#else #else
call mpi_ineighbor_alltoallv( & if (buffer_size > 0) then
& y%combuf(1), & ! send buffer call mpi_ineighbor_alltoallv( &
& neighbor_comm_handle%send_counts, & & y%combuf(1), & ! send buffer
& neighbor_comm_handle%send_displs, & & neighbor_comm_handle%send_counts, &
& psb_mpi_r_dpk_, & & neighbor_comm_handle%send_displs, &
& y%combuf(topology_total_send + 1), & ! recv buffer & psb_mpi_r_dpk_, &
& neighbor_comm_handle%recv_counts, & & y%combuf(topology_total_send + 1), & ! recv buffer
& neighbor_comm_handle%recv_displs, & & neighbor_comm_handle%recv_counts, &
& psb_mpi_r_dpk_, & & neighbor_comm_handle%recv_displs, &
& neighbor_comm_handle%graph_comm, & & psb_mpi_r_dpk_, &
& neighbor_comm_handle%comm_request, iret) & neighbor_comm_handle%graph_comm, &
if (iret /= mpi_success) then & neighbor_comm_handle%comm_request, iret)
info = psb_err_mpi_error_ if (iret /= mpi_success) then
call psb_errpush(info, name, m_err=(/iret/)) info = psb_err_mpi_error_
goto 9999 call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
neighbor_comm_handle%persistent_in_flight = .true.
else
neighbor_comm_handle%persistent_in_flight = .false.
neighbor_comm_handle%comm_request = mpi_request_null
end if end if
neighbor_comm_handle%persistent_in_flight = .true.
#endif #endif
else else
! Post non-blocking neighborhood alltoallv ! Post non-blocking neighborhood alltoallv
if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv' if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv'
call mpi_ineighbor_alltoallv( & if (buffer_size > 0) then
& y%combuf(1), & ! send buffer call mpi_ineighbor_alltoallv( &
& neighbor_comm_handle%send_counts, & & y%combuf(1), & ! send buffer
& neighbor_comm_handle%send_displs, & & neighbor_comm_handle%send_counts, &
& psb_mpi_r_dpk_, & & neighbor_comm_handle%send_displs, &
& y%combuf(topology_total_send + 1), & ! recv buffer & psb_mpi_r_dpk_, &
& neighbor_comm_handle%recv_counts, & & y%combuf(topology_total_send + 1), & ! recv buffer
& neighbor_comm_handle%recv_displs, & & neighbor_comm_handle%recv_counts, &
& psb_mpi_r_dpk_, & & neighbor_comm_handle%recv_displs, &
& neighbor_comm_handle%graph_comm, & & psb_mpi_r_dpk_, &
& neighbor_comm_handle%comm_request, iret) & neighbor_comm_handle%graph_comm, &
if (iret /= mpi_success) then & neighbor_comm_handle%comm_request, iret)
info = psb_err_mpi_error_ if (iret /= mpi_success) then
call psb_errpush(info, name, m_err=(/iret/)) info = psb_err_mpi_error_
goto 9999 call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
else
neighbor_comm_handle%comm_request = mpi_request_null
end if end if
end if end if
@ -715,55 +756,69 @@ contains
! --------------------------------------------------------- ! ---------------------------------------------------------
if (do_wait) then if (do_wait) then
if (neighbor_comm_handle%use_persistent_buffers) then topology_total_send = neighbor_comm_handle%total_send
if (.not. neighbor_comm_handle%persistent_in_flight) then topology_total_recv = neighbor_comm_handle%total_recv
info = psb_err_mpi_error_
call psb_errpush(info, name, a_err='Invalid WAIT: no persistent neighbor request in flight') if ((topology_total_send + topology_total_recv) == 0) then
goto 9999 ! Valid no-op exchange: nothing was posted in START and nothing to wait/scatter.
if (neighbor_comm_handle%use_persistent_buffers) then
neighbor_comm_handle%persistent_in_flight = .false.
else
neighbor_comm_handle%comm_request = mpi_request_null
end if end if
else else
if (neighbor_comm_handle%comm_request == mpi_request_null) then if (neighbor_comm_handle%use_persistent_buffers) then
write(psb_err_unit,*) me, 'DBG: neighbor WAIT but comm_request is NULL; is_initialized=', & if (.not. neighbor_comm_handle%persistent_in_flight) then
& neighbor_comm_handle%is_initialized info = psb_err_mpi_error_
info = psb_err_mpi_error_ call psb_errpush(info, name, a_err='Invalid WAIT: no persistent neighbor request in flight')
call psb_errpush(info, name, m_err=(/-2/)) goto 9999
goto 9999 end if
else
if (neighbor_comm_handle%comm_request == mpi_request_null) then
write(psb_err_unit,*) me, 'DBG: neighbor WAIT but comm_request is NULL; is_initialized=', &
& neighbor_comm_handle%is_initialized
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
end if end if
end if end if
topology_total_send = neighbor_comm_handle%total_send ! Only wait and scatter if there's data
topology_total_recv = neighbor_comm_handle%total_recv if ((topology_total_send + topology_total_recv) > 0) then
! Wait for the non-blocking collective to complete
! Wait for the non-blocking collective to complete if (debug) write(*,*) me,' nbr_vect: waiting on MPI request'
if (debug) write(*,*) me,' nbr_vect: waiting on MPI request' if (neighbor_comm_handle%use_persistent_buffers) then
if (neighbor_comm_handle%use_persistent_buffers) then
#ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT #ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT
call mpi_wait(neighbor_comm_handle%persistent_request, p2pstat, iret) call mpi_wait(neighbor_comm_handle%persistent_request, p2pstat, iret)
#else #else
call mpi_wait(neighbor_comm_handle%comm_request, p2pstat, iret) call mpi_wait(neighbor_comm_handle%comm_request, p2pstat, iret)
#endif #endif
else
call mpi_wait(neighbor_comm_handle%comm_request, p2pstat, iret)
end if
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (neighbor_comm_handle%use_persistent_buffers) then
neighbor_comm_handle%diag_wait_calls = neighbor_comm_handle%diag_wait_calls + 1
end if
if (neighbor_comm_handle%use_persistent_buffers) then
neighbor_comm_handle%persistent_in_flight = .false.
end if
! Scatter received data to local vector positions (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& neighbor_comm_handle%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
else else
call mpi_wait(neighbor_comm_handle%comm_request, p2pstat, iret) ! nothing to wait/scatter
end if
if (iret /= mpi_success) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (neighbor_comm_handle%use_persistent_buffers) then
neighbor_comm_handle%diag_wait_calls = neighbor_comm_handle%diag_wait_calls + 1
end if
if (neighbor_comm_handle%use_persistent_buffers) then
neighbor_comm_handle%persistent_in_flight = .false.
end if end if
! Scatter received data to local vector positions (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
call y%sct(int(topology_total_recv,psb_mpk_), &
& neighbor_comm_handle%recv_indexes, &
& y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), &
& beta)
! Clean up ! Clean up
if ((.not. neighbor_comm_handle%use_persistent_buffers) .or. & if ((.not. neighbor_comm_handle%use_persistent_buffers) .or. &
@ -1259,7 +1314,23 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in
buffer_size = topology_total_send + topology_total_recv buffer_size = topology_total_send + topology_total_recv
if (neighbor_comm_handle%use_persistent_buffers) then if (neighbor_comm_handle%use_persistent_buffers) then
if ((.not.allocated(y%combuf)) .or. (size(y%combuf) < buffer_size)) then if (.not. allocated(y%combuf)) then
neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1
if (neighbor_comm_handle%persistent_request_ready) then
if (neighbor_comm_handle%persistent_request /= mpi_request_null) then
call mpi_request_free(neighbor_comm_handle%persistent_request, iret)
end if
neighbor_comm_handle%persistent_request = mpi_request_null
neighbor_comm_handle%persistent_request_ready = .false.
neighbor_comm_handle%persistent_in_flight = .false.
neighbor_comm_handle%persistent_buffer_size = 0
end if
call y%new_buffer(buffer_size, info)
if (info /= 0) then
call psb_errpush(psb_err_alloc_dealloc_, name)
goto 9999
end if
else if (size(y%combuf) < buffer_size) then
neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1 neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1
if (neighbor_comm_handle%persistent_request_ready) then if (neighbor_comm_handle%persistent_request_ready) then
if (neighbor_comm_handle%persistent_request /= mpi_request_null) then if (neighbor_comm_handle%persistent_request /= mpi_request_null) then

@ -0,0 +1,3 @@
**/massif/*
**/**/massif/*
*.kcg

@ -4,6 +4,7 @@ program psb_comm_cg_test
use psb_linsolve_mod use psb_linsolve_mod
use psb_comm_factory_mod use psb_comm_factory_mod
use psb_comm_neighbor_impl_mod, only: psb_comm_neighbor_handle use psb_comm_neighbor_impl_mod, only: psb_comm_neighbor_handle
use, intrinsic :: ieee_arithmetic
implicit none implicit none
@ -32,17 +33,16 @@ program psb_comm_cg_test
character(len=20) :: prec_name(n_precs) character(len=20) :: prec_name(n_precs)
character(len=5) :: afmt character(len=5) :: afmt
character(len=256) :: arg character(len=256) :: arg
logical :: prec_ready
logical :: setup_done logical :: setup_done
info = psb_success_ info = psb_success_
prec_ready = .false.
afmt = 'CSR' afmt = 'CSR'
idim = 40 idim = 40
itmax = 1000 itmax = 1000
nrep = 5 nrep = 5
nwarm = 1 nwarm = 1
itrace = 0 ! Keep itrace positive to avoid modulo-by-zero paths in convergence logging.
itrace = 1
istop = 2 istop = 2
eps = 1.d-6 eps = 1.d-6
scheme_type = (/ psb_comm_isend_irecv_, psb_comm_ineighbor_alltoallv_, & scheme_type = (/ psb_comm_isend_irecv_, psb_comm_ineighbor_alltoallv_, &
@ -87,6 +87,15 @@ program psb_comm_cg_test
info = psb_success_ info = psb_success_
end if end if
end if end if
! call psb_set_debug_level(psb_debug_ext_)
! call probe_ieee('before psb_init')
call psb_init(ctxt)
! call probe_ieee('after psb_init')
! call clear_ieee_flags()
! call probe_ieee('after clear_ieee_flags')
call psb_info(ctxt, iam, np)
allocate(setup_time(n_precs,n_schemes,nrep), solve_time(n_precs,n_schemes,nrep), & allocate(setup_time(n_precs,n_schemes,nrep), solve_time(n_precs,n_schemes,nrep), &
& total_time(n_precs,n_schemes,nrep), final_error(n_precs,n_schemes,nrep), & & total_time(n_precs,n_schemes,nrep), final_error(n_precs,n_schemes,nrep), &
@ -96,9 +105,6 @@ program psb_comm_cg_test
& comm_set_time(n_precs,n_schemes,nrep), krylov_time(n_precs,n_schemes,nrep), stat=info) & comm_set_time(n_precs,n_schemes,nrep), krylov_time(n_precs,n_schemes,nrep), stat=info)
if (info /= psb_success_) stop 1 if (info /= psb_success_) stop 1
call psb_init(ctxt)
call psb_info(ctxt, iam, np)
if (iam == psb_root_) then if (iam == psb_root_) then
write(psb_out_unit,*) 'Welcome to PSBLAS version: ', psb_version_string_ write(psb_out_unit,*) 'Welcome to PSBLAS version: ', psb_version_string_
write(psb_out_unit,*) 'This is the comm/cg test program' write(psb_out_unit,*) 'This is the comm/cg test program'
@ -116,59 +122,53 @@ program psb_comm_cg_test
call psb_barrier(ctxt) call psb_barrier(ctxt)
t_start = psb_wtime() t_start = psb_wtime()
! call probe_ieee('before psb_d_gen_pde3d')
call psb_d_gen_pde3d(ctxt,idim,a,b,x,desc_a,afmt,info) call psb_d_gen_pde3d(ctxt,idim,a,b,x,desc_a,afmt,info)
! call probe_ieee('after psb_d_gen_pde3d')
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
do prec_idx = 1, n_precs do prec_idx = 1, n_precs
do scheme_idx = 1, n_schemes do scheme_idx = 1, n_schemes
setup_done = .false.
do rep = 1, nrep do rep = 1, nrep
call psb_geaxpby(dzero,b,dzero,x,desc_a,info) call psb_geaxpby(dzero,b,dzero,x,desc_a,info)
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
if (.not. setup_done) then call psb_barrier(ctxt)
call psb_barrier(ctxt)
t_start = psb_wtime()
call prec%init(ctxt,trim(prec_type(prec_idx)),info)
if (info /= psb_success_) goto 9999
prec_init_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start
call psb_amx(ctxt,prec_init_time(prec_idx,scheme_idx,rep))
t_start = psb_wtime() t_start = psb_wtime()
call psb_precbld(a,desc_a,prec,info) call prec%init(ctxt,trim(prec_type(prec_idx)),info)
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
prec_bld_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start
call psb_amx(ctxt,prec_bld_time(prec_idx,scheme_idx,rep))
if (.not.allocated(prec%prec)) then prec_init_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start
info = psb_err_internal_error_ call psb_amx(ctxt,prec_init_time(prec_idx,scheme_idx,rep))
write(psb_err_unit,*) 'Preconditioner object not allocated after build'
goto 9999
end if
t_start = psb_wtime() t_start = psb_wtime()
call psb_comm_set(scheme_type(scheme_idx),x%v%comm_handle,info) call prec%build(a,desc_a,info)
comm_set_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start if (info /= psb_success_) goto 9999
call psb_amx(ctxt,comm_set_time(prec_idx,scheme_idx,rep)) prec_bld_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start
call psb_amx(ctxt,prec_bld_time(prec_idx,scheme_idx,rep))
if (info /= psb_success_) goto 9999 if (.not.allocated(prec%prec)) then
if (.not.allocated(prec%prec)) then info = psb_err_internal_error_
info = psb_err_internal_error_ write(psb_err_unit,*) 'Preconditioner object not allocated after build'
write(psb_err_unit,*) 'Preconditioner object lost after psb_comm_set' goto 9999
goto 9999
end if
setup_time(prec_idx,scheme_idx,rep) = prec_init_time(prec_idx,scheme_idx,rep) + &
& prec_bld_time(prec_idx,scheme_idx,rep) + comm_set_time(prec_idx,scheme_idx,rep)
setup_done = .true.
prec_ready = .true.
else
prec_init_time(prec_idx,scheme_idx,rep) = prec_init_time(prec_idx,scheme_idx,1)
prec_bld_time(prec_idx,scheme_idx,rep) = prec_bld_time(prec_idx,scheme_idx,1)
comm_set_time(prec_idx,scheme_idx,rep) = comm_set_time(prec_idx,scheme_idx,1)
setup_time(prec_idx,scheme_idx,rep) = setup_time(prec_idx,scheme_idx,1)
end if end if
t_start = psb_wtime()
call psb_comm_set(scheme_type(scheme_idx),x%v%comm_handle,info)
comm_set_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start
call psb_amx(ctxt,comm_set_time(prec_idx,scheme_idx,rep))
if (info /= psb_success_) goto 9999
if (.not.allocated(prec%prec)) then
info = psb_err_internal_error_
write(psb_err_unit,*) 'Preconditioner object lost after psb_comm_set'
goto 9999
end if
setup_time(prec_idx,scheme_idx,rep) = prec_init_time(prec_idx,scheme_idx,rep) + &
& prec_bld_time(prec_idx,scheme_idx,rep) + comm_set_time(prec_idx,scheme_idx,rep)
do iter = 1, nwarm do iter = 1, nwarm
call psb_geaxpby(dzero,b,dzero,x,desc_a,info) call psb_geaxpby(dzero,b,dzero,x,desc_a,info)
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
@ -181,24 +181,34 @@ program psb_comm_cg_test
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_barrier(ctxt) call psb_barrier(ctxt)
if (.not.allocated(prec%prec)) then
info = psb_err_internal_error_
write(psb_err_unit,*) 'Preconditioner object lost before psb_krylov'
goto 9999
end if
t_start = psb_wtime() t_start = psb_wtime()
call psb_krylov('CG',a,prec,b,x,eps,desc_a,info,& call psb_krylov('CG',a,prec,b,x,eps,desc_a,info,&
& itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istop) & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istop)
krylov_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start krylov_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start
call psb_amx(ctxt,krylov_time(prec_idx,scheme_idx,rep)) call psb_amx(ctxt,krylov_time(prec_idx,scheme_idx,rep))
if (info /= psb_success_) goto 9999
call psb_geaxpby(dzero,b,dzero,x,desc_a,info)
if (info /= psb_success_) goto 9999
call psb_barrier(ctxt)
if (.not.allocated(prec%prec)) then
info = psb_err_internal_error_
write(psb_err_unit,*) 'Preconditioner object lost before psb_krylov'
goto 9999
end if
call prec%free(info)
if (info /= psb_success_) goto 9999
solve_time(prec_idx,scheme_idx,rep) = krylov_time(prec_idx,scheme_idx,rep) solve_time(prec_idx,scheme_idx,rep) = krylov_time(prec_idx,scheme_idx,rep)
total_time(prec_idx,scheme_idx,rep) = setup_time(prec_idx,scheme_idx,rep) + & total_time(prec_idx,scheme_idx,rep) = setup_time(prec_idx,scheme_idx,rep) + &
& solve_time(prec_idx,scheme_idx,rep) & solve_time(prec_idx,scheme_idx,rep)
iter_count(prec_idx,scheme_idx,rep) = iter iter_count(prec_idx,scheme_idx,rep) = iter
iter_denom = real(max(iter,1_psb_ipk_),psb_dpk_) iter_denom = real(max(iter,1_psb_ipk_),psb_dpk_)
krylov_it_time(prec_idx,scheme_idx,rep) = solve_time(prec_idx,scheme_idx,rep)/iter_denom krylov_it_time(prec_idx,scheme_idx,rep) = solve_time(prec_idx,scheme_idx,rep)/iter_denom
total_it_time(prec_idx,scheme_idx,rep) = total_time(prec_idx,scheme_idx,rep)/iter_denom total_it_time(prec_idx,scheme_idx,rep) = total_time(prec_idx,scheme_idx,rep)/iter_denom
final_error(prec_idx,scheme_idx,rep) = err final_error(prec_idx,scheme_idx,rep) = err
solve_info(prec_idx,scheme_idx,rep) = info solve_info(prec_idx,scheme_idx,rep) = info
@ -219,12 +229,6 @@ program psb_comm_cg_test
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
end do end do
if (prec_ready) then
call psb_precfree(prec,info)
if (info /= psb_success_) goto 9999
prec_ready = .false.
end if
end do end do
end do end do
@ -303,7 +307,7 @@ program psb_comm_cg_test
call psb_gefree(b,desc_a,info) call psb_gefree(b,desc_a,info)
call psb_gefree(x,desc_a,info) call psb_gefree(x,desc_a,info)
call psb_spfree(a,desc_a,info) call psb_spfree(a,desc_a,info)
if (prec_ready) call psb_precfree(prec,info) call psb_precfree(prec,info)
call psb_cdfree(desc_a,info) call psb_cdfree(desc_a,info)
deallocate(setup_time,solve_time,total_time,final_error,iter_count,solve_info, & deallocate(setup_time,solve_time,total_time,final_error,iter_count,solve_info, &
& prec_init_time,prec_bld_time,comm_set_time,krylov_time, & & prec_init_time,prec_bld_time,comm_set_time,krylov_time, &
@ -325,7 +329,8 @@ contains
do i = 2, size(v) do i = 2, size(v)
key = v(i) key = v(i)
j = i - 1 j = i - 1
do while ((j >= 1).and.(v(j) > key)) do while (j >= 1)
if (v(j) <= key) exit
v(j+1) = v(j) v(j+1) = v(j)
j = j - 1 j = j - 1
end do end do
@ -428,6 +433,28 @@ contains
end if end if
end function gfun end function gfun
subroutine probe_ieee(where)
character(len=*), intent(in) :: where
logical :: invalid_flag, divzero_flag, overflow_flag, underflow_flag
call ieee_get_flag(ieee_invalid, invalid_flag)
call ieee_get_flag(ieee_divide_by_zero, divzero_flag)
call ieee_get_flag(ieee_overflow, overflow_flag)
call ieee_get_flag(ieee_underflow, underflow_flag)
if (invalid_flag .or. divzero_flag .or. overflow_flag .or. underflow_flag) then
write(psb_out_unit,'("IEEE probe [",a,"] invalid=",l1,", div0=",l1,", overflow=",l1,", underflow=",l1)') &
trim(where), invalid_flag, divzero_flag, overflow_flag, underflow_flag
end if
end subroutine probe_ieee
subroutine clear_ieee_flags()
call ieee_set_flag(ieee_invalid, .false.)
call ieee_set_flag(ieee_divide_by_zero, .false.)
call ieee_set_flag(ieee_overflow, .false.)
call ieee_set_flag(ieee_underflow, .false.)
end subroutine clear_ieee_flags
subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info) subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info)
implicit none implicit none
integer(psb_ipk_), intent(in) :: idim integer(psb_ipk_), intent(in) :: idim
@ -458,13 +485,55 @@ contains
call psb_info(ctxt, iam, np) call psb_info(ctxt, iam, np)
if (idim <= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='idim must be > 0')
goto 9999
end if
if (np <= 0) then
info = psb_err_context_error_
call psb_errpush(info,name,a_err='invalid context: np <= 0')
goto 9999
end if
if (iam < 0) then
info = psb_err_context_error_
call psb_errpush(info,name,a_err='invalid context: iam < 0')
goto 9999
end if
deltah = done/(idim+2) deltah = done/(idim+2)
sqdeltah = deltah*deltah sqdeltah = deltah*deltah
deltah2 = 2.d0*deltah deltah2 = 2.d0*deltah
if (abs(deltah) <= tiny(deltah)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid mesh spacing: deltah ~ 0')
goto 9999
end if
if (abs(sqdeltah) <= tiny(sqdeltah)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid mesh spacing: sqdeltah ~ 0')
goto 9999
end if
if (abs(deltah2) <= tiny(deltah2)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid mesh spacing: deltah2 ~ 0')
goto 9999
end if
m = idim*idim*idim m = idim*idim*idim
n = m n = m
if (n <= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid global size: n <= 0')
goto 9999
end if
nnz = ((n*9)/(np)) nnz = ((n*9)/(np))
if (nnz <= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='invalid local nnz estimate: nnz <= 0')
goto 9999
end if
if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
nt = (m+np-1)/np nt = (m+np-1)/np
@ -481,8 +550,11 @@ contains
call psb_barrier(ctxt) call psb_barrier(ctxt)
t0 = psb_wtime() t0 = psb_wtime()
! call probe_ieee('enter psb_cdall')
call psb_cdall(ctxt,desc_a,info,nl=nr) call psb_cdall(ctxt,desc_a,info,nl=nr)
! call probe_ieee('after psb_cdall')
if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
! call probe_ieee('after psb_spall')
if (info == psb_success_) call psb_geall(xv,desc_a,info) if (info == psb_success_) call psb_geall(xv,desc_a,info)
if (info == psb_success_) call psb_geall(bv,desc_a,info) if (info == psb_success_) call psb_geall(bv,desc_a,info)
call psb_barrier(ctxt) call psb_barrier(ctxt)
@ -592,12 +664,30 @@ contains
end do end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) exit ! call probe_ieee('after psb_spins')
if(info /= psb_success_) then
write(psb_err_unit,'("INSERT FAIL rank=",i0,", call=psb_spins, ii=",i0,", ib=",i0,", icoeff=",i0)') &
iam, ii, ib, icoeff
write(psb_err_unit,'(" glob_row=",i0,", ix=",i0,", iy=",i0,", iz=",i0)') glob_row, ix, iy, iz
exit
end if
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) exit ! call probe_ieee('after psb_geins bv')
if(info /= psb_success_) then
write(psb_err_unit,'("INSERT FAIL rank=",i0,", call=psb_geins bv, ii=",i0,", ib=",i0,", icoeff=",i0)') &
iam, ii, ib, icoeff
write(psb_err_unit,'(" glob_row=",i0,", ix=",i0,", iy=",i0,", iz=",i0)') glob_row, ix, iy, iz
exit
end if
zt(:)=dzero zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit ! call probe_ieee('after psb_geins xv')
if(info /= psb_success_) then
write(psb_err_unit,'("INSERT FAIL rank=",i0,", call=psb_geins xv, ii=",i0,", ib=",i0,", icoeff=",i0)') &
iam, ii, ib, icoeff
write(psb_err_unit,'(" glob_row=",i0,", ix=",i0,", iy=",i0,", iz=",i0)') glob_row, ix, iy, iz
exit
end if
end do end do
tgen = psb_wtime()-t1 tgen = psb_wtime()-t1
@ -612,12 +702,16 @@ contains
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
! call probe_ieee('before psb_cdasb')
call psb_cdasb(desc_a,info) call psb_cdasb(desc_a,info)
! call probe_ieee('after psb_cdasb')
tcdasb = psb_wtime()-t1 tcdasb = psb_wtime()-t1
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
! call probe_ieee('before psb_spasb')
if (info == psb_success_) call psb_spasb(a,desc_a,info,afmt=afmt) if (info == psb_success_) call psb_spasb(a,desc_a,info,afmt=afmt)
! call probe_ieee('after psb_spasb')
call psb_barrier(ctxt) call psb_barrier(ctxt)
if(info /= psb_success_) then if(info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_

@ -1,4 +1,4 @@
INSTALLDIR=../.. INSTALLDIR=../../..
INCDIR=$(INSTALLDIR)/include/ INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/ MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas include $(INCDIR)/Make.inc.psblas

@ -1,4 +1,4 @@
INSTALLDIR=../.. INSTALLDIR=../../..
INCDIR=$(INSTALLDIR)/include/ INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/ MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas include $(INCDIR)/Make.inc.psblas

@ -52,6 +52,7 @@ program psb_comm_test
! ---- error / reporting ---- ! ---- error / reporting ----
integer(psb_ipk_) :: n_pass, n_total, imode integer(psb_ipk_) :: n_pass, n_total, imode
logical :: run_baseline, run_neighbor, run_persistent
logical :: comm_ok logical :: comm_ok
real(psb_dpk_) :: err, tol real(psb_dpk_) :: err, tol
real(psb_dpk_) :: t0, t1, dt, tsum_baseline, tsum_neighbor, tsum_neighbor_persistent real(psb_dpk_) :: t0, t1, dt, tsum_baseline, tsum_neighbor, tsum_neighbor_persistent
@ -94,6 +95,26 @@ program psb_comm_test
end if end if
end do end do
run_baseline = .false.
run_neighbor = .false.
run_persistent = .false.
select case (trim(adjustl(mode)))
case ('both','all')
run_baseline = .true.
run_neighbor = .true.
run_persistent = .true.
case ('baseline')
run_baseline = .true.
case ('neighbor')
run_neighbor = .true.
case ('persistent','persistent_neighbor','persistent-neighbor')
run_persistent = .true.
case default
run_baseline = .true.
run_neighbor = .true.
run_persistent = .true.
end select
if (idim <= 0) then if (idim <= 0) then
write(*,*) 'Invalid dimension specified. Usage: --dim <positive integer>' write(*,*) 'Invalid dimension specified. Usage: --dim <positive integer>'
call psb_abort(ctxt) call psb_abort(ctxt)
@ -190,154 +211,132 @@ program psb_comm_test
! ================================================================== ! ==================================================================
! 6. Baseline halo exchange (Isend/Irecv in one call) ! 6. Baseline halo exchange (Isend/Irecv in one call)
! ================================================================== ! ==================================================================
! v_baseline%v is a psb_d_base_vect_type if (run_baseline) then
call psi_swapdata( & call psi_swapdata( &
swap_status=psb_comm_status_start_, & swap_status=psb_comm_status_start_, &
beta=dzero, &
y=v_baseline%v, &
desc_a=desc_a, &
info=info, &
data=psb_comm_halo_)
if (info /= psb_success_) then
write(psb_err_unit,*) my_rank, 'baseline swap error:', info
call psb_abort(ctxt)
end if
call psi_swapdata( &
swap_status=psb_comm_status_wait_, &
beta=dzero, & beta=dzero, &
y=v_baseline%v, & y=v_baseline%v, &
desc_a=desc_a, & desc_a=desc_a, &
info=info, & info=info, &
data=psb_comm_halo_) data=psb_comm_halo_)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) my_rank, 'baseline swap error:', info write(psb_err_unit,*) my_rank, 'baseline swap error:', info
call psb_abort(ctxt) call psb_abort(ctxt)
end if
call psi_swapdata( &
swap_status=psb_comm_status_wait_, &
beta=dzero, &
y=v_baseline%v, &
desc_a=desc_a, &
info=info, &
data=psb_comm_halo_)
if (info /= psb_success_) then
write(psb_err_unit,*) my_rank, 'baseline swap error:', info
call psb_abort(ctxt)
end if
end if end if
! ================================================================== ! ==================================================================
! 7. Neighbor topology halo exchange (start + wait) ! 7. Neighbor topology halo exchange (start + wait)
! ================================================================== ! ==================================================================
call psb_comm_set(psb_comm_ineighbor_alltoallv_, v_neighbor%v%comm_handle, info) if (run_neighbor) then
if (info /= 0) then call psb_comm_set(psb_comm_ineighbor_alltoallv_, v_neighbor%v%comm_handle, info)
write(psb_err_unit,*) my_rank, 'psb_comm_set neighbor error:', info if (info /= 0) then
call psb_abort(ctxt) write(psb_err_unit,*) my_rank, 'psb_comm_set neighbor error:', info
end if call psb_abort(ctxt)
call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) end if
if (info /= psb_success_) then call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_)
write(psb_err_unit,*) my_rank, 'neighbor start error:', info if (info /= psb_success_) then
call psb_abort(ctxt) write(psb_err_unit,*) my_rank, 'neighbor start error:', info
end if call psb_abort(ctxt)
end if
call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) my_rank, 'neighbor wait error:', info write(psb_err_unit,*) my_rank, 'neighbor wait error:', info
call psb_abort(ctxt) call psb_abort(ctxt)
end if
end if end if
! ================================================================== ! ==================================================================
! 7b. Persistent-neighbor halo exchange (start + wait) ! 7b. Persistent-neighbor halo exchange (start + wait)
! ================================================================== ! ==================================================================
call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, v_neighbor_persistent%v%comm_handle, info) if (run_persistent) then
if (info /= 0) then call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, v_neighbor_persistent%v%comm_handle, info)
write(psb_err_unit,*) my_rank, 'psb_comm_set persistent-neighbor error:', info if (info /= 0) then
call psb_abort(ctxt) write(psb_err_unit,*) my_rank, 'psb_comm_set persistent-neighbor error:', info
end if call psb_abort(ctxt)
call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) end if
if (info /= psb_success_) then call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_)
write(psb_err_unit,*) my_rank, 'persistent-neighbor start error:', info if (info /= psb_success_) then
call psb_abort(ctxt) write(psb_err_unit,*) my_rank, 'persistent-neighbor start error:', info
end if call psb_abort(ctxt)
call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) end if
if (info /= psb_success_) then call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_)
write(psb_err_unit,*) my_rank, 'persistent-neighbor wait error:', info if (info /= psb_success_) then
call psb_abort(ctxt) write(psb_err_unit,*) my_rank, 'persistent-neighbor wait error:', info
call psb_abort(ctxt)
end if
end if end if
! ================================================================== ! ==================================================================
! 8. Performance: repeat exchanges and measure timings ! 8. Performance: repeat exchanges and measure timings
! ================================================================== ! ==================================================================
if (my_rank == 0) then if (my_rank == 0) then
write(psb_out_unit,'("Timing: running ",i0," iterations for baseline, neighbor and persistent-neighbor")') iters write(psb_out_unit,'("Timing: running ",i0," iterations for selected exchange mode(s)")') iters
end if end if
tsum_baseline = 0.0_psb_dpk_ tsum_baseline = 0.0_psb_dpk_
tsum_neighbor = 0.0_psb_dpk_ tsum_neighbor = 0.0_psb_dpk_
tsum_neighbor_persistent = 0.0_psb_dpk_ tsum_neighbor_persistent = 0.0_psb_dpk_
call psb_comm_set(psb_comm_isend_irecv_, v_baseline%v%comm_handle, info)
call psb_comm_set(psb_comm_ineighbor_alltoallv_, v_neighbor%v%comm_handle, info)
call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, v_neighbor_persistent%v%comm_handle, info)
! ---- Comm check: verify selected communication schemes ----
n_total = n_total + 1
comm_ok = allocated(v_baseline%v%comm_handle) .and. allocated(v_neighbor%v%comm_handle) .and. &
& allocated(v_neighbor_persistent%v%comm_handle)
if (comm_ok) then
comm_ok = (v_baseline%v%comm_handle%comm_type == psb_comm_isend_irecv_) .and. &
& (v_neighbor%v%comm_handle%comm_type == psb_comm_ineighbor_alltoallv_) .and. &
& (v_neighbor_persistent%v%comm_handle%comm_type == psb_comm_persistent_ineighbor_alltoallv_)
end if
if (my_rank == 0) then
if (comm_ok) then
write(psb_out_unit,'(" [PASS] comm scheme selection : baseline/neighbor/persistent OK")')
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] comm scheme selection : unexpected comm_type mapping")')
end if
end if
do i = 1, iters do i = 1, iters
! baseline timing if (run_baseline) then
t0 = psb_wtime() t0 = psb_wtime()
call psi_swapdata( & call psi_swapdata(psb_comm_status_start_, dzero, v_baseline%v, desc_a, info, data=psb_comm_halo_)
swap_status=psb_comm_status_start_, & call psi_swapdata(psb_comm_status_wait_, dzero, v_baseline%v, desc_a, info, data=psb_comm_halo_)
beta=dzero, & t1 = psb_wtime()
y=v_baseline%v, & dt = t1 - t0
desc_a=desc_a, & call psb_amx(ctxt, dt)
info=info, & tsum_baseline = tsum_baseline + dt
data=psb_comm_halo_) end if
call psi_swapdata( &
swap_status=psb_comm_status_wait_, &
beta=dzero, &
y=v_baseline%v, &
desc_a=desc_a, &
info=info, &
data=psb_comm_halo_)
t1 = psb_wtime()
dt = t1 - t0
call psb_amx(ctxt, dt)
tsum_baseline = tsum_baseline + dt
! neighbor timing (start + wait) if (run_neighbor) then
t0 = psb_wtime() t0 = psb_wtime()
call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_)
call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_)
t1 = psb_wtime() t1 = psb_wtime()
dt = t1 - t0 dt = t1 - t0
call psb_amx(ctxt, dt) call psb_amx(ctxt, dt)
tsum_neighbor = tsum_neighbor + dt tsum_neighbor = tsum_neighbor + dt
end if
! persistent-neighbor timing (start + wait) if (run_persistent) then
t0 = psb_wtime() t0 = psb_wtime()
call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_)
call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_)
t1 = psb_wtime() t1 = psb_wtime()
dt = t1 - t0 dt = t1 - t0
call psb_amx(ctxt, dt) call psb_amx(ctxt, dt)
tsum_neighbor_persistent = tsum_neighbor_persistent + dt tsum_neighbor_persistent = tsum_neighbor_persistent + dt
end if
end do end do
if (my_rank == 0) then if (my_rank == 0) then
write(psb_out_unit,'(" Avg baseline time : ",es12.5)') (tsum_baseline / real(iters,psb_dpk_)) if (run_baseline) then
write(psb_out_unit,'(" Tot baseline time : ",es12.5)') tsum_baseline write(psb_out_unit,'(" Avg baseline time : ",es12.5)') (tsum_baseline / real(iters,psb_dpk_))
write(psb_out_unit,'(" Avg neighbor time : ",es12.5)') (tsum_neighbor / real(iters,psb_dpk_)) write(psb_out_unit,'(" Tot baseline time : ",es12.5)') tsum_baseline
write(psb_out_unit,'(" Tot neighbor time : ",es12.5)') tsum_neighbor end if
write(psb_out_unit,'(" Avg pers-neigh time: ",es12.5)') (tsum_neighbor_persistent / real(iters,psb_dpk_)) if (run_neighbor) then
write(psb_out_unit,'(" Tot pers-neigh time: ",es12.5)') tsum_neighbor_persistent write(psb_out_unit,'(" Avg neighbor time : ",es12.5)') (tsum_neighbor / real(iters,psb_dpk_))
write(psb_out_unit,'(" Tot neighbor time : ",es12.5)') tsum_neighbor
end if
if (run_persistent) then
write(psb_out_unit,'(" Avg pers-neigh time: ",es12.5)') (tsum_neighbor_persistent / real(iters,psb_dpk_))
write(psb_out_unit,'(" Tot pers-neigh time: ",es12.5)') tsum_neighbor_persistent
end if
end if end if
! ================================================================== ! ==================================================================
@ -347,113 +346,121 @@ program psb_comm_test
result_neighbor = v_neighbor%get_vect() result_neighbor = v_neighbor%get_vect()
result_persistent = v_neighbor_persistent%get_vect() result_persistent = v_neighbor_persistent%get_vect()
! ---- Test 1: cross-check baseline vs neighbor (all entries) ---- if (run_baseline .and. run_neighbor) then
n_total = n_total + 1 n_total = n_total + 1
err = maxval(abs(result_baseline(1:ncol) - result_neighbor(1:ncol))) err = maxval(abs(result_baseline(1:ncol) - result_neighbor(1:ncol)))
call psb_amx(ctxt, err) call psb_amx(ctxt, err)
if (my_rank == 0) then if (my_rank == 0) then
if (err < tol) then if (err < tol) then
write(psb_out_unit,'(" [PASS] cross-check baseline vs neighbor : err = ",es12.5)') err write(psb_out_unit,'(" [PASS] cross-check baseline vs neighbor : err = ",es12.5)') err
n_pass = n_pass + 1 n_pass = n_pass + 1
else else
write(psb_out_unit,'(" [FAIL] cross-check baseline vs neighbor : err = ",es12.5)') err write(psb_out_unit,'(" [FAIL] cross-check baseline vs neighbor : err = ",es12.5)') err
end if
end if end if
end if end if
! ---- Test 2: baseline absolute correctness (halo = global index) ---- if (run_baseline) then
n_total = n_total + 1 n_total = n_total + 1
err = maxval(abs(result_baseline(1:ncol) - expected(1:ncol))) err = maxval(abs(result_baseline(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err) call psb_amx(ctxt, err)
if (my_rank == 0) then if (my_rank == 0) then
if (err < tol) then if (err < tol) then
write(psb_out_unit,'(" [PASS] baseline absolute correctness : err = ",es12.5)') err write(psb_out_unit,'(" [PASS] baseline absolute correctness : err = ",es12.5)') err
n_pass = n_pass + 1 n_pass = n_pass + 1
else else
write(psb_out_unit,'(" [FAIL] baseline absolute correctness : err = ",es12.5)') err write(psb_out_unit,'(" [FAIL] baseline absolute correctness : err = ",es12.5)') err
end if
end if end if
end if end if
! ---- Test 3: neighbor absolute correctness (halo = global index) ---- if (run_neighbor) then
n_total = n_total + 1 n_total = n_total + 1
err = maxval(abs(result_neighbor(1:ncol) - expected(1:ncol))) err = maxval(abs(result_neighbor(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err) call psb_amx(ctxt, err)
if (my_rank == 0) then if (my_rank == 0) then
if (err < tol) then if (err < tol) then
write(psb_out_unit,'(" [PASS] neighbor absolute correctness : err = ",es12.5)') err write(psb_out_unit,'(" [PASS] neighbor absolute correctness : err = ",es12.5)') err
n_pass = n_pass + 1 n_pass = n_pass + 1
else else
write(psb_out_unit,'(" [FAIL] neighbor absolute correctness : err = ",es12.5)') err write(psb_out_unit,'(" [FAIL] neighbor absolute correctness : err = ",es12.5)') err
end if
end if end if
end if end if
! ---- Test 4: cross-check baseline vs persistent-neighbor (all entries) ---- if (run_baseline .and. run_persistent) then
n_total = n_total + 1 n_total = n_total + 1
err = maxval(abs(result_baseline(1:ncol) - result_persistent(1:ncol))) err = maxval(abs(result_baseline(1:ncol) - result_persistent(1:ncol)))
call psb_amx(ctxt, err) call psb_amx(ctxt, err)
if (my_rank == 0) then if (my_rank == 0) then
if (err < tol) then if (err < tol) then
write(psb_out_unit,'(" [PASS] cross-check baseline vs pers-nei : err = ",es12.5)') err write(psb_out_unit,'(" [PASS] cross-check baseline vs pers-nei : err = ",es12.5)') err
n_pass = n_pass + 1 n_pass = n_pass + 1
else else
write(psb_out_unit,'(" [FAIL] cross-check baseline vs pers-nei : err = ",es12.5)') err write(psb_out_unit,'(" [FAIL] cross-check baseline vs pers-nei : err = ",es12.5)') err
end if
end if end if
end if end if
! ---- Test 5: persistent-neighbor absolute correctness ---- if (run_persistent) then
n_total = n_total + 1 n_total = n_total + 1
err = maxval(abs(result_persistent(1:ncol) - expected(1:ncol))) err = maxval(abs(result_persistent(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err) call psb_amx(ctxt, err)
if (my_rank == 0) then if (my_rank == 0) then
if (err < tol) then if (err < tol) then
write(psb_out_unit,'(" [PASS] pers-neigh absolute correctness : err = ",es12.5)') err write(psb_out_unit,'(" [PASS] pers-neigh absolute correctness : err = ",es12.5)') err
n_pass = n_pass + 1 n_pass = n_pass + 1
else else
write(psb_out_unit,'(" [FAIL] pers-neigh absolute correctness : err = ",es12.5)') err write(psb_out_unit,'(" [FAIL] pers-neigh absolute correctness : err = ",es12.5)') err
end if
end if end if
end if end if
! ---- Test 6: repeat neighbor exchange (topology reuse) ---- if (run_neighbor) then
! Reset halo entries to zero, run again, and check ! ---- Test 6: repeat neighbor exchange (topology reuse) ----
do i = nrow+1, ncol do i = nrow+1, ncol
result_neighbor(i) = dzero result_neighbor(i) = dzero
end do end do
call v_neighbor%set_vect(result_neighbor) call v_neighbor%set_vect(result_neighbor)
call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_)
call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_)
result_neighbor = v_neighbor%get_vect() result_neighbor = v_neighbor%get_vect()
n_total = n_total + 1 n_total = n_total + 1
err = maxval(abs(result_neighbor(1:ncol) - expected(1:ncol))) err = maxval(abs(result_neighbor(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err) call psb_amx(ctxt, err)
if (my_rank == 0) then if (my_rank == 0) then
if (err < tol) then if (err < tol) then
write(psb_out_unit,'(" [PASS] neighbor topology reuse : err = ",es12.5)') err write(psb_out_unit,'(" [PASS] neighbor topology reuse : err = ",es12.5)') err
n_pass = n_pass + 1 n_pass = n_pass + 1
else else
write(psb_out_unit,'(" [FAIL] neighbor topology reuse : err = ",es12.5)') err write(psb_out_unit,'(" [FAIL] neighbor topology reuse : err = ",es12.5)') err
end if
end if end if
end if end if
! ---- Test 7: repeat persistent-neighbor exchange (buffer reuse) ---- if (run_persistent) then
do i = nrow+1, ncol ! ---- Test 7: repeat persistent-neighbor exchange (buffer reuse) ----
result_persistent(i) = dzero do i = nrow+1, ncol
end do result_persistent(i) = dzero
call v_neighbor_persistent%set_vect(result_persistent) end do
call v_neighbor_persistent%set_vect(result_persistent)
call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_)
call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_)
result_persistent = v_neighbor_persistent%get_vect() result_persistent = v_neighbor_persistent%get_vect()
n_total = n_total + 1 n_total = n_total + 1
err = maxval(abs(result_persistent(1:ncol) - expected(1:ncol))) err = maxval(abs(result_persistent(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err) call psb_amx(ctxt, err)
if (my_rank == 0) then if (my_rank == 0) then
if (err < tol) then if (err < tol) then
write(psb_out_unit,'(" [PASS] pers-neigh buffer reuse : err = ",es12.5)') err write(psb_out_unit,'(" [PASS] pers-neigh buffer reuse : err = ",es12.5)') err
n_pass = n_pass + 1 n_pass = n_pass + 1
else else
write(psb_out_unit,'(" [FAIL] pers-neigh buffer reuse : err = ",es12.5)') err write(psb_out_unit,'(" [FAIL] pers-neigh buffer reuse : err = ",es12.5)') err
end if
end if end if
end if end if

Loading…
Cancel
Save