diff --git a/base/comm/internals/psi_dswapdata.F90 b/base/comm/internals/psi_dswapdata.F90 index 8ad4c5f4..5d8b8ff3 100644 --- a/base/comm/internals/psi_dswapdata.F90 +++ b/base/comm/internals/psi_dswapdata.F90 @@ -591,38 +591,61 @@ contains ! combuf(total_send+1 : total_send+total_recv) = recv area buffer_size = topology_total_send + topology_total_recv - if (neighbor_comm_handle%use_persistent_buffers) then - if ((.not.allocated(y%combuf)) .or. (size(y%combuf) < buffer_size)) then - neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1 - if (neighbor_comm_handle%persistent_request_ready) then - if (neighbor_comm_handle%persistent_request /= mpi_request_null) then - call mpi_request_free(neighbor_comm_handle%persistent_request, iret) + if (buffer_size > 0) then + if (neighbor_comm_handle%use_persistent_buffers) then + if (.not. allocated(y%combuf)) then + neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1 + if (neighbor_comm_handle%persistent_request_ready) then + if (neighbor_comm_handle%persistent_request /= mpi_request_null) then + call mpi_request_free(neighbor_comm_handle%persistent_request, iret) + end if + neighbor_comm_handle%persistent_request = mpi_request_null + neighbor_comm_handle%persistent_request_ready = .false. + neighbor_comm_handle%persistent_in_flight = .false. + neighbor_comm_handle%persistent_buffer_size = 0 + end if + call y%new_buffer(buffer_size, info) + if (info /= 0) then + call psb_errpush(psb_err_alloc_dealloc_, name) + goto 9999 + end if + else if (size(y%combuf) < buffer_size) then + neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1 + if (neighbor_comm_handle%persistent_request_ready) then + if (neighbor_comm_handle%persistent_request /= mpi_request_null) then + call mpi_request_free(neighbor_comm_handle%persistent_request, iret) + end if + neighbor_comm_handle%persistent_request = mpi_request_null + neighbor_comm_handle%persistent_request_ready = .false. + neighbor_comm_handle%persistent_in_flight = .false. + neighbor_comm_handle%persistent_buffer_size = 0 + end if + call y%new_buffer(buffer_size, info) + if (info /= 0) then + call psb_errpush(psb_err_alloc_dealloc_, name) + goto 9999 end if - neighbor_comm_handle%persistent_request = mpi_request_null - neighbor_comm_handle%persistent_request_ready = .false. - neighbor_comm_handle%persistent_in_flight = .false. - neighbor_comm_handle%persistent_buffer_size = 0 end if + else call y%new_buffer(buffer_size, info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_, name) goto 9999 end if end if + neighbor_comm_handle%comm_request = mpi_request_null + + ! Gather send data into contiguous send buffer (polymorphic for GPU) + if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems' + call y%gth(int(topology_total_send,psb_mpk_), & + & neighbor_comm_handle%send_indexes, & + & y%combuf(1:topology_total_send)) else - call y%new_buffer(buffer_size, info) - if (info /= 0) then - call psb_errpush(psb_err_alloc_dealloc_, name) - goto 9999 - end if + ! No data to send/recv: ensure requests/buffers indicate idle state + neighbor_comm_handle%comm_request = mpi_request_null + neighbor_comm_handle%persistent_in_flight = .false. + neighbor_comm_handle%persistent_request_ready = neighbor_comm_handle%persistent_request_ready end if - neighbor_comm_handle%comm_request = mpi_request_null - - ! Gather send data into contiguous send buffer (polymorphic for GPU) - if (debug) write(*,*) me,' nbr_vect: gathering send data,', topology_total_send,' elems' - call y%gth(int(topology_total_send,psb_mpk_), & - & neighbor_comm_handle%send_indexes, & - & y%combuf(1:topology_total_send)) ! Wait for device (important for GPU subclasses) call y%device_wait() @@ -631,27 +654,32 @@ contains ! Lazy persistent-init: build the request once, then reuse with START/WAIT. if (.not. neighbor_comm_handle%persistent_request_ready) then #ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT - if (debug) write(*,*) me,' nbr_vect: posting MPI_Neighbor_alltoallv_init' - call mpi_neighbor_alltoallv_init( & - & y%combuf(1), & ! send buffer - & neighbor_comm_handle%send_counts, & - & neighbor_comm_handle%send_displs, & - & psb_mpi_r_dpk_, & - & y%combuf(topology_total_send + 1), & ! recv buffer - & neighbor_comm_handle%recv_counts, & - & neighbor_comm_handle%recv_displs, & - & psb_mpi_r_dpk_, & - & neighbor_comm_handle%graph_comm, & - & mpi_info_null, & - & neighbor_comm_handle%persistent_request, iret) - if (iret /= mpi_success) then - info = psb_err_mpi_error_ - call psb_errpush(info, name, m_err=(/iret/)) - goto 9999 + if (buffer_size > 0) then + if (debug) write(*,*) me,' nbr_vect: posting MPI_Neighbor_alltoallv_init' + call mpi_neighbor_alltoallv_init( & + & y%combuf(1), & ! send buffer + & neighbor_comm_handle%send_counts, & + & neighbor_comm_handle%send_displs, & + & psb_mpi_r_dpk_, & + & y%combuf(topology_total_send + 1), & ! recv buffer + & neighbor_comm_handle%recv_counts, & + & neighbor_comm_handle%recv_displs, & + & psb_mpi_r_dpk_, & + & neighbor_comm_handle%graph_comm, & + & mpi_info_null, & + & neighbor_comm_handle%persistent_request, iret) + if (iret /= mpi_success) then + info = psb_err_mpi_error_ + call psb_errpush(info, name, m_err=(/iret/)) + goto 9999 + end if + neighbor_comm_handle%diag_init_calls = neighbor_comm_handle%diag_init_calls + 1 + neighbor_comm_handle%persistent_request_ready = .true. + neighbor_comm_handle%persistent_buffer_size = buffer_size + else + neighbor_comm_handle%persistent_request_ready = .false. + neighbor_comm_handle%persistent_buffer_size = 0 end if - neighbor_comm_handle%diag_init_calls = neighbor_comm_handle%diag_init_calls + 1 - neighbor_comm_handle%persistent_request_ready = .true. - neighbor_comm_handle%persistent_buffer_size = buffer_size #else ! Fallback when persistent neighborhood collectives are not available neighbor_comm_handle%persistent_request_ready = .false. @@ -660,51 +688,64 @@ contains end if #ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT - call mpi_start(neighbor_comm_handle%persistent_request, iret) - if (iret /= mpi_success) then - info = psb_err_mpi_error_ - call psb_errpush(info, name, m_err=(/iret/)) - goto 9999 + if (buffer_size > 0) then + call mpi_start(neighbor_comm_handle%persistent_request, iret) + if (iret /= mpi_success) then + info = psb_err_mpi_error_ + call psb_errpush(info, name, m_err=(/iret/)) + goto 9999 + end if + neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1 + neighbor_comm_handle%persistent_in_flight = .true. + else + neighbor_comm_handle%persistent_in_flight = .false. end if - neighbor_comm_handle%diag_start_calls = neighbor_comm_handle%diag_start_calls + 1 - neighbor_comm_handle%persistent_in_flight = .true. #else - call mpi_ineighbor_alltoallv( & - & y%combuf(1), & ! send buffer - & neighbor_comm_handle%send_counts, & - & neighbor_comm_handle%send_displs, & - & psb_mpi_r_dpk_, & - & y%combuf(topology_total_send + 1), & ! recv buffer - & neighbor_comm_handle%recv_counts, & - & neighbor_comm_handle%recv_displs, & - & psb_mpi_r_dpk_, & - & neighbor_comm_handle%graph_comm, & - & neighbor_comm_handle%comm_request, iret) - if (iret /= mpi_success) then - info = psb_err_mpi_error_ - call psb_errpush(info, name, m_err=(/iret/)) - goto 9999 + if (buffer_size > 0) then + call mpi_ineighbor_alltoallv( & + & y%combuf(1), & ! send buffer + & neighbor_comm_handle%send_counts, & + & neighbor_comm_handle%send_displs, & + & psb_mpi_r_dpk_, & + & y%combuf(topology_total_send + 1), & ! recv buffer + & neighbor_comm_handle%recv_counts, & + & neighbor_comm_handle%recv_displs, & + & psb_mpi_r_dpk_, & + & neighbor_comm_handle%graph_comm, & + & neighbor_comm_handle%comm_request, iret) + if (iret /= mpi_success) then + info = psb_err_mpi_error_ + call psb_errpush(info, name, m_err=(/iret/)) + goto 9999 + end if + neighbor_comm_handle%persistent_in_flight = .true. + else + neighbor_comm_handle%persistent_in_flight = .false. + neighbor_comm_handle%comm_request = mpi_request_null end if - neighbor_comm_handle%persistent_in_flight = .true. #endif else ! Post non-blocking neighborhood alltoallv if (debug) write(*,*) me,' nbr_vect: posting MPI_Ineighbor_alltoallv' - call mpi_ineighbor_alltoallv( & - & y%combuf(1), & ! send buffer - & neighbor_comm_handle%send_counts, & - & neighbor_comm_handle%send_displs, & - & psb_mpi_r_dpk_, & - & y%combuf(topology_total_send + 1), & ! recv buffer - & neighbor_comm_handle%recv_counts, & - & neighbor_comm_handle%recv_displs, & - & psb_mpi_r_dpk_, & - & neighbor_comm_handle%graph_comm, & - & neighbor_comm_handle%comm_request, iret) - if (iret /= mpi_success) then - info = psb_err_mpi_error_ - call psb_errpush(info, name, m_err=(/iret/)) - goto 9999 + if (buffer_size > 0) then + call mpi_ineighbor_alltoallv( & + & y%combuf(1), & ! send buffer + & neighbor_comm_handle%send_counts, & + & neighbor_comm_handle%send_displs, & + & psb_mpi_r_dpk_, & + & y%combuf(topology_total_send + 1), & ! recv buffer + & neighbor_comm_handle%recv_counts, & + & neighbor_comm_handle%recv_displs, & + & psb_mpi_r_dpk_, & + & neighbor_comm_handle%graph_comm, & + & neighbor_comm_handle%comm_request, iret) + if (iret /= mpi_success) then + info = psb_err_mpi_error_ + call psb_errpush(info, name, m_err=(/iret/)) + goto 9999 + end if + else + neighbor_comm_handle%comm_request = mpi_request_null end if end if @@ -715,55 +756,69 @@ contains ! --------------------------------------------------------- if (do_wait) then - if (neighbor_comm_handle%use_persistent_buffers) then - if (.not. neighbor_comm_handle%persistent_in_flight) then - info = psb_err_mpi_error_ - call psb_errpush(info, name, a_err='Invalid WAIT: no persistent neighbor request in flight') - goto 9999 + topology_total_send = neighbor_comm_handle%total_send + topology_total_recv = neighbor_comm_handle%total_recv + + if ((topology_total_send + topology_total_recv) == 0) then + ! Valid no-op exchange: nothing was posted in START and nothing to wait/scatter. + if (neighbor_comm_handle%use_persistent_buffers) then + neighbor_comm_handle%persistent_in_flight = .false. + else + neighbor_comm_handle%comm_request = mpi_request_null end if else - if (neighbor_comm_handle%comm_request == mpi_request_null) then - write(psb_err_unit,*) me, 'DBG: neighbor WAIT but comm_request is NULL; is_initialized=', & - & neighbor_comm_handle%is_initialized - info = psb_err_mpi_error_ - call psb_errpush(info, name, m_err=(/-2/)) - goto 9999 + if (neighbor_comm_handle%use_persistent_buffers) then + if (.not. neighbor_comm_handle%persistent_in_flight) then + info = psb_err_mpi_error_ + call psb_errpush(info, name, a_err='Invalid WAIT: no persistent neighbor request in flight') + goto 9999 + end if + else + if (neighbor_comm_handle%comm_request == mpi_request_null) then + write(psb_err_unit,*) me, 'DBG: neighbor WAIT but comm_request is NULL; is_initialized=', & + & neighbor_comm_handle%is_initialized + info = psb_err_mpi_error_ + call psb_errpush(info, name, m_err=(/-2/)) + goto 9999 + end if end if end if - topology_total_send = neighbor_comm_handle%total_send - topology_total_recv = neighbor_comm_handle%total_recv - - ! Wait for the non-blocking collective to complete - if (debug) write(*,*) me,' nbr_vect: waiting on MPI request' - if (neighbor_comm_handle%use_persistent_buffers) then + ! Only wait and scatter if there's data + if ((topology_total_send + topology_total_recv) > 0) then + ! Wait for the non-blocking collective to complete + if (debug) write(*,*) me,' nbr_vect: waiting on MPI request' + if (neighbor_comm_handle%use_persistent_buffers) then #ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT - call mpi_wait(neighbor_comm_handle%persistent_request, p2pstat, iret) + call mpi_wait(neighbor_comm_handle%persistent_request, p2pstat, iret) #else - call mpi_wait(neighbor_comm_handle%comm_request, p2pstat, iret) + call mpi_wait(neighbor_comm_handle%comm_request, p2pstat, iret) #endif + else + call mpi_wait(neighbor_comm_handle%comm_request, p2pstat, iret) + end if + if (iret /= mpi_success) then + info = psb_err_mpi_error_ + call psb_errpush(info, name, m_err=(/iret/)) + goto 9999 + end if + if (neighbor_comm_handle%use_persistent_buffers) then + neighbor_comm_handle%diag_wait_calls = neighbor_comm_handle%diag_wait_calls + 1 + end if + if (neighbor_comm_handle%use_persistent_buffers) then + neighbor_comm_handle%persistent_in_flight = .false. + end if + + ! Scatter received data to local vector positions (polymorphic for GPU) + if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems' + call y%sct(int(topology_total_recv,psb_mpk_), & + & neighbor_comm_handle%recv_indexes, & + & y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), & + & beta) else - call mpi_wait(neighbor_comm_handle%comm_request, p2pstat, iret) - end if - if (iret /= mpi_success) then - info = psb_err_mpi_error_ - call psb_errpush(info, name, m_err=(/iret/)) - goto 9999 - end if - if (neighbor_comm_handle%use_persistent_buffers) then - neighbor_comm_handle%diag_wait_calls = neighbor_comm_handle%diag_wait_calls + 1 - end if - if (neighbor_comm_handle%use_persistent_buffers) then - neighbor_comm_handle%persistent_in_flight = .false. + ! nothing to wait/scatter end if - ! Scatter received data to local vector positions (polymorphic for GPU) - if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems' - call y%sct(int(topology_total_recv,psb_mpk_), & - & neighbor_comm_handle%recv_indexes, & - & y%combuf(topology_total_send+1:topology_total_send+topology_total_recv), & - & beta) - ! Clean up if ((.not. neighbor_comm_handle%use_persistent_buffers) .or. & @@ -1259,7 +1314,23 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in buffer_size = topology_total_send + topology_total_recv if (neighbor_comm_handle%use_persistent_buffers) then - if ((.not.allocated(y%combuf)) .or. (size(y%combuf) < buffer_size)) then + if (.not. allocated(y%combuf)) then + neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1 + if (neighbor_comm_handle%persistent_request_ready) then + if (neighbor_comm_handle%persistent_request /= mpi_request_null) then + call mpi_request_free(neighbor_comm_handle%persistent_request, iret) + end if + neighbor_comm_handle%persistent_request = mpi_request_null + neighbor_comm_handle%persistent_request_ready = .false. + neighbor_comm_handle%persistent_in_flight = .false. + neighbor_comm_handle%persistent_buffer_size = 0 + end if + call y%new_buffer(buffer_size, info) + if (info /= 0) then + call psb_errpush(psb_err_alloc_dealloc_, name) + goto 9999 + end if + else if (size(y%combuf) < buffer_size) then neighbor_comm_handle%diag_buffer_reallocs = neighbor_comm_handle%diag_buffer_reallocs + 1 if (neighbor_comm_handle%persistent_request_ready) then if (neighbor_comm_handle%persistent_request /= mpi_request_null) then diff --git a/test/comm/.gitignore b/test/comm/.gitignore new file mode 100644 index 00000000..a899a643 --- /dev/null +++ b/test/comm/.gitignore @@ -0,0 +1,3 @@ +**/massif/* +**/**/massif/* +*.kcg \ No newline at end of file diff --git a/test/comm/cg/psb_comm_cg_test.F90 b/test/comm/cg/psb_comm_cg_test.F90 index c56c568a..7d949690 100644 --- a/test/comm/cg/psb_comm_cg_test.F90 +++ b/test/comm/cg/psb_comm_cg_test.F90 @@ -4,6 +4,7 @@ program psb_comm_cg_test use psb_linsolve_mod use psb_comm_factory_mod use psb_comm_neighbor_impl_mod, only: psb_comm_neighbor_handle + use, intrinsic :: ieee_arithmetic implicit none @@ -32,17 +33,16 @@ program psb_comm_cg_test character(len=20) :: prec_name(n_precs) character(len=5) :: afmt character(len=256) :: arg - logical :: prec_ready logical :: setup_done info = psb_success_ - prec_ready = .false. afmt = 'CSR' idim = 40 itmax = 1000 nrep = 5 nwarm = 1 - itrace = 0 + ! Keep itrace positive to avoid modulo-by-zero paths in convergence logging. + itrace = 1 istop = 2 eps = 1.d-6 scheme_type = (/ psb_comm_isend_irecv_, psb_comm_ineighbor_alltoallv_, & @@ -87,6 +87,15 @@ program psb_comm_cg_test info = psb_success_ end if end if + ! call psb_set_debug_level(psb_debug_ext_) + + + ! call probe_ieee('before psb_init') + call psb_init(ctxt) + ! call probe_ieee('after psb_init') + ! call clear_ieee_flags() + ! call probe_ieee('after clear_ieee_flags') + call psb_info(ctxt, iam, np) allocate(setup_time(n_precs,n_schemes,nrep), solve_time(n_precs,n_schemes,nrep), & & total_time(n_precs,n_schemes,nrep), final_error(n_precs,n_schemes,nrep), & @@ -96,9 +105,6 @@ program psb_comm_cg_test & comm_set_time(n_precs,n_schemes,nrep), krylov_time(n_precs,n_schemes,nrep), stat=info) if (info /= psb_success_) stop 1 - call psb_init(ctxt) - call psb_info(ctxt, iam, np) - if (iam == psb_root_) then write(psb_out_unit,*) 'Welcome to PSBLAS version: ', psb_version_string_ write(psb_out_unit,*) 'This is the comm/cg test program' @@ -116,59 +122,53 @@ program psb_comm_cg_test call psb_barrier(ctxt) t_start = psb_wtime() + ! call probe_ieee('before psb_d_gen_pde3d') call psb_d_gen_pde3d(ctxt,idim,a,b,x,desc_a,afmt,info) + ! call probe_ieee('after psb_d_gen_pde3d') if (info /= psb_success_) goto 9999 do prec_idx = 1, n_precs do scheme_idx = 1, n_schemes - setup_done = .false. do rep = 1, nrep call psb_geaxpby(dzero,b,dzero,x,desc_a,info) if (info /= psb_success_) goto 9999 - if (.not. setup_done) then - call psb_barrier(ctxt) - t_start = psb_wtime() - call prec%init(ctxt,trim(prec_type(prec_idx)),info) - if (info /= psb_success_) goto 9999 - prec_init_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start - call psb_amx(ctxt,prec_init_time(prec_idx,scheme_idx,rep)) + call psb_barrier(ctxt) - t_start = psb_wtime() - call psb_precbld(a,desc_a,prec,info) - if (info /= psb_success_) goto 9999 - prec_bld_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start - call psb_amx(ctxt,prec_bld_time(prec_idx,scheme_idx,rep)) + t_start = psb_wtime() + call prec%init(ctxt,trim(prec_type(prec_idx)),info) + if (info /= psb_success_) goto 9999 - if (.not.allocated(prec%prec)) then - info = psb_err_internal_error_ - write(psb_err_unit,*) 'Preconditioner object not allocated after build' - goto 9999 - end if + prec_init_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start + call psb_amx(ctxt,prec_init_time(prec_idx,scheme_idx,rep)) - t_start = psb_wtime() - call psb_comm_set(scheme_type(scheme_idx),x%v%comm_handle,info) - comm_set_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start - call psb_amx(ctxt,comm_set_time(prec_idx,scheme_idx,rep)) + t_start = psb_wtime() + call prec%build(a,desc_a,info) + if (info /= psb_success_) goto 9999 + prec_bld_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start + call psb_amx(ctxt,prec_bld_time(prec_idx,scheme_idx,rep)) - if (info /= psb_success_) goto 9999 - if (.not.allocated(prec%prec)) then - info = psb_err_internal_error_ - write(psb_err_unit,*) 'Preconditioner object lost after psb_comm_set' - goto 9999 - end if - - setup_time(prec_idx,scheme_idx,rep) = prec_init_time(prec_idx,scheme_idx,rep) + & - & prec_bld_time(prec_idx,scheme_idx,rep) + comm_set_time(prec_idx,scheme_idx,rep) - setup_done = .true. - prec_ready = .true. - else - prec_init_time(prec_idx,scheme_idx,rep) = prec_init_time(prec_idx,scheme_idx,1) - prec_bld_time(prec_idx,scheme_idx,rep) = prec_bld_time(prec_idx,scheme_idx,1) - comm_set_time(prec_idx,scheme_idx,rep) = comm_set_time(prec_idx,scheme_idx,1) - setup_time(prec_idx,scheme_idx,rep) = setup_time(prec_idx,scheme_idx,1) + if (.not.allocated(prec%prec)) then + info = psb_err_internal_error_ + write(psb_err_unit,*) 'Preconditioner object not allocated after build' + goto 9999 end if + t_start = psb_wtime() + call psb_comm_set(scheme_type(scheme_idx),x%v%comm_handle,info) + comm_set_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start + call psb_amx(ctxt,comm_set_time(prec_idx,scheme_idx,rep)) + + if (info /= psb_success_) goto 9999 + if (.not.allocated(prec%prec)) then + info = psb_err_internal_error_ + write(psb_err_unit,*) 'Preconditioner object lost after psb_comm_set' + goto 9999 + end if + + setup_time(prec_idx,scheme_idx,rep) = prec_init_time(prec_idx,scheme_idx,rep) + & + & prec_bld_time(prec_idx,scheme_idx,rep) + comm_set_time(prec_idx,scheme_idx,rep) + do iter = 1, nwarm call psb_geaxpby(dzero,b,dzero,x,desc_a,info) if (info /= psb_success_) goto 9999 @@ -181,24 +181,34 @@ program psb_comm_cg_test if (info /= psb_success_) goto 9999 call psb_barrier(ctxt) - if (.not.allocated(prec%prec)) then - info = psb_err_internal_error_ - write(psb_err_unit,*) 'Preconditioner object lost before psb_krylov' - goto 9999 - end if t_start = psb_wtime() call psb_krylov('CG',a,prec,b,x,eps,desc_a,info,& & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istop) krylov_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start call psb_amx(ctxt,krylov_time(prec_idx,scheme_idx,rep)) + if (info /= psb_success_) goto 9999 + + call psb_geaxpby(dzero,b,dzero,x,desc_a,info) + if (info /= psb_success_) goto 9999 + + call psb_barrier(ctxt) + if (.not.allocated(prec%prec)) then + info = psb_err_internal_error_ + write(psb_err_unit,*) 'Preconditioner object lost before psb_krylov' + goto 9999 + end if + + call prec%free(info) + if (info /= psb_success_) goto 9999 + solve_time(prec_idx,scheme_idx,rep) = krylov_time(prec_idx,scheme_idx,rep) total_time(prec_idx,scheme_idx,rep) = setup_time(prec_idx,scheme_idx,rep) + & & solve_time(prec_idx,scheme_idx,rep) iter_count(prec_idx,scheme_idx,rep) = iter - iter_denom = real(max(iter,1_psb_ipk_),psb_dpk_) - krylov_it_time(prec_idx,scheme_idx,rep) = solve_time(prec_idx,scheme_idx,rep)/iter_denom - total_it_time(prec_idx,scheme_idx,rep) = total_time(prec_idx,scheme_idx,rep)/iter_denom + iter_denom = real(max(iter,1_psb_ipk_),psb_dpk_) + krylov_it_time(prec_idx,scheme_idx,rep) = solve_time(prec_idx,scheme_idx,rep)/iter_denom + total_it_time(prec_idx,scheme_idx,rep) = total_time(prec_idx,scheme_idx,rep)/iter_denom final_error(prec_idx,scheme_idx,rep) = err solve_info(prec_idx,scheme_idx,rep) = info @@ -219,12 +229,6 @@ program psb_comm_cg_test if (info /= psb_success_) goto 9999 end do - - if (prec_ready) then - call psb_precfree(prec,info) - if (info /= psb_success_) goto 9999 - prec_ready = .false. - end if end do end do @@ -303,7 +307,7 @@ program psb_comm_cg_test call psb_gefree(b,desc_a,info) call psb_gefree(x,desc_a,info) call psb_spfree(a,desc_a,info) - if (prec_ready) call psb_precfree(prec,info) + call psb_precfree(prec,info) call psb_cdfree(desc_a,info) deallocate(setup_time,solve_time,total_time,final_error,iter_count,solve_info, & & prec_init_time,prec_bld_time,comm_set_time,krylov_time, & @@ -325,7 +329,8 @@ contains do i = 2, size(v) key = v(i) j = i - 1 - do while ((j >= 1).and.(v(j) > key)) + do while (j >= 1) + if (v(j) <= key) exit v(j+1) = v(j) j = j - 1 end do @@ -428,6 +433,28 @@ contains end if end function gfun + subroutine probe_ieee(where) + character(len=*), intent(in) :: where + logical :: invalid_flag, divzero_flag, overflow_flag, underflow_flag + + call ieee_get_flag(ieee_invalid, invalid_flag) + call ieee_get_flag(ieee_divide_by_zero, divzero_flag) + call ieee_get_flag(ieee_overflow, overflow_flag) + call ieee_get_flag(ieee_underflow, underflow_flag) + + if (invalid_flag .or. divzero_flag .or. overflow_flag .or. underflow_flag) then + write(psb_out_unit,'("IEEE probe [",a,"] invalid=",l1,", div0=",l1,", overflow=",l1,", underflow=",l1)') & + trim(where), invalid_flag, divzero_flag, overflow_flag, underflow_flag + end if + end subroutine probe_ieee + + subroutine clear_ieee_flags() + call ieee_set_flag(ieee_invalid, .false.) + call ieee_set_flag(ieee_divide_by_zero, .false.) + call ieee_set_flag(ieee_overflow, .false.) + call ieee_set_flag(ieee_underflow, .false.) + end subroutine clear_ieee_flags + subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info) implicit none integer(psb_ipk_), intent(in) :: idim @@ -458,13 +485,55 @@ contains call psb_info(ctxt, iam, np) + if (idim <= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='idim must be > 0') + goto 9999 + end if + if (np <= 0) then + info = psb_err_context_error_ + call psb_errpush(info,name,a_err='invalid context: np <= 0') + goto 9999 + end if + if (iam < 0) then + info = psb_err_context_error_ + call psb_errpush(info,name,a_err='invalid context: iam < 0') + goto 9999 + end if + deltah = done/(idim+2) sqdeltah = deltah*deltah deltah2 = 2.d0*deltah + if (abs(deltah) <= tiny(deltah)) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid mesh spacing: deltah ~ 0') + goto 9999 + end if + if (abs(sqdeltah) <= tiny(sqdeltah)) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid mesh spacing: sqdeltah ~ 0') + goto 9999 + end if + if (abs(deltah2) <= tiny(deltah2)) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid mesh spacing: deltah2 ~ 0') + goto 9999 + end if + m = idim*idim*idim n = m + if (n <= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid global size: n <= 0') + goto 9999 + end if nnz = ((n*9)/(np)) + if (nnz <= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid local nnz estimate: nnz <= 0') + goto 9999 + end if if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n nt = (m+np-1)/np @@ -481,8 +550,11 @@ contains call psb_barrier(ctxt) t0 = psb_wtime() + ! call probe_ieee('enter psb_cdall') call psb_cdall(ctxt,desc_a,info,nl=nr) + ! call probe_ieee('after psb_cdall') if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) + ! call probe_ieee('after psb_spall') if (info == psb_success_) call psb_geall(xv,desc_a,info) if (info == psb_success_) call psb_geall(bv,desc_a,info) call psb_barrier(ctxt) @@ -592,12 +664,30 @@ contains end do call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) - if(info /= psb_success_) exit + ! call probe_ieee('after psb_spins') + if(info /= psb_success_) then + write(psb_err_unit,'("INSERT FAIL rank=",i0,", call=psb_spins, ii=",i0,", ib=",i0,", icoeff=",i0)') & + iam, ii, ib, icoeff + write(psb_err_unit,'(" glob_row=",i0,", ix=",i0,", iy=",i0,", iz=",i0)') glob_row, ix, iy, iz + exit + end if call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) - if(info /= psb_success_) exit + ! call probe_ieee('after psb_geins bv') + if(info /= psb_success_) then + write(psb_err_unit,'("INSERT FAIL rank=",i0,", call=psb_geins bv, ii=",i0,", ib=",i0,", icoeff=",i0)') & + iam, ii, ib, icoeff + write(psb_err_unit,'(" glob_row=",i0,", ix=",i0,", iy=",i0,", iz=",i0)') glob_row, ix, iy, iz + exit + end if zt(:)=dzero call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) - if(info /= psb_success_) exit + ! call probe_ieee('after psb_geins xv') + if(info /= psb_success_) then + write(psb_err_unit,'("INSERT FAIL rank=",i0,", call=psb_geins xv, ii=",i0,", ib=",i0,", icoeff=",i0)') & + iam, ii, ib, icoeff + write(psb_err_unit,'(" glob_row=",i0,", ix=",i0,", iy=",i0,", iz=",i0)') glob_row, ix, iy, iz + exit + end if end do tgen = psb_wtime()-t1 @@ -612,12 +702,16 @@ contains call psb_barrier(ctxt) t1 = psb_wtime() + ! call probe_ieee('before psb_cdasb') call psb_cdasb(desc_a,info) + ! call probe_ieee('after psb_cdasb') tcdasb = psb_wtime()-t1 call psb_barrier(ctxt) t1 = psb_wtime() + ! call probe_ieee('before psb_spasb') if (info == psb_success_) call psb_spasb(a,desc_a,info,afmt=afmt) + ! call probe_ieee('after psb_spasb') call psb_barrier(ctxt) if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/test/comm/spmv/Makefile b/test/comm/spmv/Makefile index a8a5f8fb..1555f006 100644 --- a/test/comm/spmv/Makefile +++ b/test/comm/spmv/Makefile @@ -1,4 +1,4 @@ -INSTALLDIR=../.. +INSTALLDIR=../../.. INCDIR=$(INSTALLDIR)/include/ MODDIR=$(INSTALLDIR)/modules/ include $(INCDIR)/Make.inc.psblas diff --git a/test/comm/swapdata/Makefile b/test/comm/swapdata/Makefile index fe64c4a4..988410d2 100644 --- a/test/comm/swapdata/Makefile +++ b/test/comm/swapdata/Makefile @@ -1,4 +1,4 @@ -INSTALLDIR=../.. +INSTALLDIR=../../.. INCDIR=$(INSTALLDIR)/include/ MODDIR=$(INSTALLDIR)/modules/ include $(INCDIR)/Make.inc.psblas diff --git a/test/comm/swapdata/psb_comm_test.F90 b/test/comm/swapdata/psb_comm_test.F90 index fba35221..25b4e22b 100644 --- a/test/comm/swapdata/psb_comm_test.F90 +++ b/test/comm/swapdata/psb_comm_test.F90 @@ -52,6 +52,7 @@ program psb_comm_test ! ---- error / reporting ---- integer(psb_ipk_) :: n_pass, n_total, imode + logical :: run_baseline, run_neighbor, run_persistent logical :: comm_ok real(psb_dpk_) :: err, tol real(psb_dpk_) :: t0, t1, dt, tsum_baseline, tsum_neighbor, tsum_neighbor_persistent @@ -94,6 +95,26 @@ program psb_comm_test end if end do + run_baseline = .false. + run_neighbor = .false. + run_persistent = .false. + select case (trim(adjustl(mode))) + case ('both','all') + run_baseline = .true. + run_neighbor = .true. + run_persistent = .true. + case ('baseline') + run_baseline = .true. + case ('neighbor') + run_neighbor = .true. + case ('persistent','persistent_neighbor','persistent-neighbor') + run_persistent = .true. + case default + run_baseline = .true. + run_neighbor = .true. + run_persistent = .true. + end select + if (idim <= 0) then write(*,*) 'Invalid dimension specified. Usage: --dim ' call psb_abort(ctxt) @@ -190,154 +211,132 @@ program psb_comm_test ! ================================================================== ! 6. Baseline halo exchange (Isend/Irecv in one call) ! ================================================================== - ! v_baseline%v is a psb_d_base_vect_type - call psi_swapdata( & - swap_status=psb_comm_status_start_, & - beta=dzero, & - y=v_baseline%v, & - desc_a=desc_a, & - info=info, & - data=psb_comm_halo_) - if (info /= psb_success_) then - write(psb_err_unit,*) my_rank, 'baseline swap error:', info - call psb_abort(ctxt) - end if - - call psi_swapdata( & - swap_status=psb_comm_status_wait_, & + if (run_baseline) then + call psi_swapdata( & + swap_status=psb_comm_status_start_, & beta=dzero, & y=v_baseline%v, & desc_a=desc_a, & info=info, & data=psb_comm_halo_) - if (info /= psb_success_) then - write(psb_err_unit,*) my_rank, 'baseline swap error:', info - call psb_abort(ctxt) + if (info /= psb_success_) then + write(psb_err_unit,*) my_rank, 'baseline swap error:', info + call psb_abort(ctxt) + end if + + call psi_swapdata( & + swap_status=psb_comm_status_wait_, & + beta=dzero, & + y=v_baseline%v, & + desc_a=desc_a, & + info=info, & + data=psb_comm_halo_) + if (info /= psb_success_) then + write(psb_err_unit,*) my_rank, 'baseline swap error:', info + call psb_abort(ctxt) + end if end if ! ================================================================== ! 7. Neighbor topology halo exchange (start + wait) ! ================================================================== - call psb_comm_set(psb_comm_ineighbor_alltoallv_, v_neighbor%v%comm_handle, info) - if (info /= 0) then - write(psb_err_unit,*) my_rank, 'psb_comm_set neighbor error:', info - call psb_abort(ctxt) - end if - call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) - if (info /= psb_success_) then - write(psb_err_unit,*) my_rank, 'neighbor start error:', info - call psb_abort(ctxt) - end if + if (run_neighbor) then + call psb_comm_set(psb_comm_ineighbor_alltoallv_, v_neighbor%v%comm_handle, info) + if (info /= 0) then + write(psb_err_unit,*) my_rank, 'psb_comm_set neighbor error:', info + call psb_abort(ctxt) + end if + call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) + if (info /= psb_success_) then + write(psb_err_unit,*) my_rank, 'neighbor start error:', info + call psb_abort(ctxt) + end if - call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) - if (info /= psb_success_) then - write(psb_err_unit,*) my_rank, 'neighbor wait error:', info - call psb_abort(ctxt) + call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) + if (info /= psb_success_) then + write(psb_err_unit,*) my_rank, 'neighbor wait error:', info + call psb_abort(ctxt) + end if end if ! ================================================================== ! 7b. Persistent-neighbor halo exchange (start + wait) ! ================================================================== - call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, v_neighbor_persistent%v%comm_handle, info) - if (info /= 0) then - write(psb_err_unit,*) my_rank, 'psb_comm_set persistent-neighbor error:', info - call psb_abort(ctxt) - end if - call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) - if (info /= psb_success_) then - write(psb_err_unit,*) my_rank, 'persistent-neighbor start error:', info - call psb_abort(ctxt) - end if - call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) - if (info /= psb_success_) then - write(psb_err_unit,*) my_rank, 'persistent-neighbor wait error:', info - call psb_abort(ctxt) + if (run_persistent) then + call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, v_neighbor_persistent%v%comm_handle, info) + if (info /= 0) then + write(psb_err_unit,*) my_rank, 'psb_comm_set persistent-neighbor error:', info + call psb_abort(ctxt) + end if + call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) + if (info /= psb_success_) then + write(psb_err_unit,*) my_rank, 'persistent-neighbor start error:', info + call psb_abort(ctxt) + end if + call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) + if (info /= psb_success_) then + write(psb_err_unit,*) my_rank, 'persistent-neighbor wait error:', info + call psb_abort(ctxt) + end if end if ! ================================================================== ! 8. Performance: repeat exchanges and measure timings ! ================================================================== if (my_rank == 0) then - write(psb_out_unit,'("Timing: running ",i0," iterations for baseline, neighbor and persistent-neighbor")') iters + write(psb_out_unit,'("Timing: running ",i0," iterations for selected exchange mode(s)")') iters end if tsum_baseline = 0.0_psb_dpk_ tsum_neighbor = 0.0_psb_dpk_ tsum_neighbor_persistent = 0.0_psb_dpk_ - call psb_comm_set(psb_comm_isend_irecv_, v_baseline%v%comm_handle, info) - call psb_comm_set(psb_comm_ineighbor_alltoallv_, v_neighbor%v%comm_handle, info) - call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, v_neighbor_persistent%v%comm_handle, info) - - ! ---- Comm check: verify selected communication schemes ---- - n_total = n_total + 1 - comm_ok = allocated(v_baseline%v%comm_handle) .and. allocated(v_neighbor%v%comm_handle) .and. & - & allocated(v_neighbor_persistent%v%comm_handle) - - if (comm_ok) then - comm_ok = (v_baseline%v%comm_handle%comm_type == psb_comm_isend_irecv_) .and. & - & (v_neighbor%v%comm_handle%comm_type == psb_comm_ineighbor_alltoallv_) .and. & - & (v_neighbor_persistent%v%comm_handle%comm_type == psb_comm_persistent_ineighbor_alltoallv_) - end if - - if (my_rank == 0) then - if (comm_ok) then - write(psb_out_unit,'(" [PASS] comm scheme selection : baseline/neighbor/persistent OK")') - n_pass = n_pass + 1 - else - write(psb_out_unit,'(" [FAIL] comm scheme selection : unexpected comm_type mapping")') - end if - end if - do i = 1, iters - ! baseline timing - t0 = psb_wtime() - call psi_swapdata( & - swap_status=psb_comm_status_start_, & - beta=dzero, & - y=v_baseline%v, & - desc_a=desc_a, & - info=info, & - data=psb_comm_halo_) - call psi_swapdata( & - swap_status=psb_comm_status_wait_, & - beta=dzero, & - y=v_baseline%v, & - desc_a=desc_a, & - info=info, & - data=psb_comm_halo_) - t1 = psb_wtime() - dt = t1 - t0 - call psb_amx(ctxt, dt) - tsum_baseline = tsum_baseline + dt + if (run_baseline) then + t0 = psb_wtime() + call psi_swapdata(psb_comm_status_start_, dzero, v_baseline%v, desc_a, info, data=psb_comm_halo_) + call psi_swapdata(psb_comm_status_wait_, dzero, v_baseline%v, desc_a, info, data=psb_comm_halo_) + t1 = psb_wtime() + dt = t1 - t0 + call psb_amx(ctxt, dt) + tsum_baseline = tsum_baseline + dt + end if - ! neighbor timing (start + wait) - t0 = psb_wtime() - call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) - call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) - t1 = psb_wtime() - dt = t1 - t0 - call psb_amx(ctxt, dt) - tsum_neighbor = tsum_neighbor + dt + if (run_neighbor) then + t0 = psb_wtime() + call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) + call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) + t1 = psb_wtime() + dt = t1 - t0 + call psb_amx(ctxt, dt) + tsum_neighbor = tsum_neighbor + dt + end if - ! persistent-neighbor timing (start + wait) - t0 = psb_wtime() - call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) - call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) - t1 = psb_wtime() - dt = t1 - t0 - call psb_amx(ctxt, dt) - tsum_neighbor_persistent = tsum_neighbor_persistent + dt + if (run_persistent) then + t0 = psb_wtime() + call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) + call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) + t1 = psb_wtime() + dt = t1 - t0 + call psb_amx(ctxt, dt) + tsum_neighbor_persistent = tsum_neighbor_persistent + dt + end if end do if (my_rank == 0) then - write(psb_out_unit,'(" Avg baseline time : ",es12.5)') (tsum_baseline / real(iters,psb_dpk_)) - write(psb_out_unit,'(" Tot baseline time : ",es12.5)') tsum_baseline - write(psb_out_unit,'(" Avg neighbor time : ",es12.5)') (tsum_neighbor / real(iters,psb_dpk_)) - write(psb_out_unit,'(" Tot neighbor time : ",es12.5)') tsum_neighbor - write(psb_out_unit,'(" Avg pers-neigh time: ",es12.5)') (tsum_neighbor_persistent / real(iters,psb_dpk_)) - write(psb_out_unit,'(" Tot pers-neigh time: ",es12.5)') tsum_neighbor_persistent + if (run_baseline) then + write(psb_out_unit,'(" Avg baseline time : ",es12.5)') (tsum_baseline / real(iters,psb_dpk_)) + write(psb_out_unit,'(" Tot baseline time : ",es12.5)') tsum_baseline + end if + if (run_neighbor) then + write(psb_out_unit,'(" Avg neighbor time : ",es12.5)') (tsum_neighbor / real(iters,psb_dpk_)) + write(psb_out_unit,'(" Tot neighbor time : ",es12.5)') tsum_neighbor + end if + if (run_persistent) then + write(psb_out_unit,'(" Avg pers-neigh time: ",es12.5)') (tsum_neighbor_persistent / real(iters,psb_dpk_)) + write(psb_out_unit,'(" Tot pers-neigh time: ",es12.5)') tsum_neighbor_persistent + end if end if ! ================================================================== @@ -347,113 +346,121 @@ program psb_comm_test result_neighbor = v_neighbor%get_vect() result_persistent = v_neighbor_persistent%get_vect() - ! ---- Test 1: cross-check baseline vs neighbor (all entries) ---- - n_total = n_total + 1 - err = maxval(abs(result_baseline(1:ncol) - result_neighbor(1:ncol))) - call psb_amx(ctxt, err) - if (my_rank == 0) then - if (err < tol) then - write(psb_out_unit,'(" [PASS] cross-check baseline vs neighbor : err = ",es12.5)') err - n_pass = n_pass + 1 - else - write(psb_out_unit,'(" [FAIL] cross-check baseline vs neighbor : err = ",es12.5)') err + if (run_baseline .and. run_neighbor) then + n_total = n_total + 1 + err = maxval(abs(result_baseline(1:ncol) - result_neighbor(1:ncol))) + call psb_amx(ctxt, err) + if (my_rank == 0) then + if (err < tol) then + write(psb_out_unit,'(" [PASS] cross-check baseline vs neighbor : err = ",es12.5)') err + n_pass = n_pass + 1 + else + write(psb_out_unit,'(" [FAIL] cross-check baseline vs neighbor : err = ",es12.5)') err + end if end if end if - ! ---- Test 2: baseline absolute correctness (halo = global index) ---- - n_total = n_total + 1 - err = maxval(abs(result_baseline(1:ncol) - expected(1:ncol))) - call psb_amx(ctxt, err) - if (my_rank == 0) then - if (err < tol) then - write(psb_out_unit,'(" [PASS] baseline absolute correctness : err = ",es12.5)') err - n_pass = n_pass + 1 - else - write(psb_out_unit,'(" [FAIL] baseline absolute correctness : err = ",es12.5)') err + if (run_baseline) then + n_total = n_total + 1 + err = maxval(abs(result_baseline(1:ncol) - expected(1:ncol))) + call psb_amx(ctxt, err) + if (my_rank == 0) then + if (err < tol) then + write(psb_out_unit,'(" [PASS] baseline absolute correctness : err = ",es12.5)') err + n_pass = n_pass + 1 + else + write(psb_out_unit,'(" [FAIL] baseline absolute correctness : err = ",es12.5)') err + end if end if end if - ! ---- Test 3: neighbor absolute correctness (halo = global index) ---- - n_total = n_total + 1 - err = maxval(abs(result_neighbor(1:ncol) - expected(1:ncol))) - call psb_amx(ctxt, err) - if (my_rank == 0) then - if (err < tol) then - write(psb_out_unit,'(" [PASS] neighbor absolute correctness : err = ",es12.5)') err - n_pass = n_pass + 1 - else - write(psb_out_unit,'(" [FAIL] neighbor absolute correctness : err = ",es12.5)') err + if (run_neighbor) then + n_total = n_total + 1 + err = maxval(abs(result_neighbor(1:ncol) - expected(1:ncol))) + call psb_amx(ctxt, err) + if (my_rank == 0) then + if (err < tol) then + write(psb_out_unit,'(" [PASS] neighbor absolute correctness : err = ",es12.5)') err + n_pass = n_pass + 1 + else + write(psb_out_unit,'(" [FAIL] neighbor absolute correctness : err = ",es12.5)') err + end if end if end if - ! ---- Test 4: cross-check baseline vs persistent-neighbor (all entries) ---- - n_total = n_total + 1 - err = maxval(abs(result_baseline(1:ncol) - result_persistent(1:ncol))) - call psb_amx(ctxt, err) - if (my_rank == 0) then - if (err < tol) then - write(psb_out_unit,'(" [PASS] cross-check baseline vs pers-nei : err = ",es12.5)') err - n_pass = n_pass + 1 - else - write(psb_out_unit,'(" [FAIL] cross-check baseline vs pers-nei : err = ",es12.5)') err + if (run_baseline .and. run_persistent) then + n_total = n_total + 1 + err = maxval(abs(result_baseline(1:ncol) - result_persistent(1:ncol))) + call psb_amx(ctxt, err) + if (my_rank == 0) then + if (err < tol) then + write(psb_out_unit,'(" [PASS] cross-check baseline vs pers-nei : err = ",es12.5)') err + n_pass = n_pass + 1 + else + write(psb_out_unit,'(" [FAIL] cross-check baseline vs pers-nei : err = ",es12.5)') err + end if end if end if - ! ---- Test 5: persistent-neighbor absolute correctness ---- - n_total = n_total + 1 - err = maxval(abs(result_persistent(1:ncol) - expected(1:ncol))) - call psb_amx(ctxt, err) - if (my_rank == 0) then - if (err < tol) then - write(psb_out_unit,'(" [PASS] pers-neigh absolute correctness : err = ",es12.5)') err - n_pass = n_pass + 1 - else - write(psb_out_unit,'(" [FAIL] pers-neigh absolute correctness : err = ",es12.5)') err + if (run_persistent) then + n_total = n_total + 1 + err = maxval(abs(result_persistent(1:ncol) - expected(1:ncol))) + call psb_amx(ctxt, err) + if (my_rank == 0) then + if (err < tol) then + write(psb_out_unit,'(" [PASS] pers-neigh absolute correctness : err = ",es12.5)') err + n_pass = n_pass + 1 + else + write(psb_out_unit,'(" [FAIL] pers-neigh absolute correctness : err = ",es12.5)') err + end if end if end if - ! ---- Test 6: repeat neighbor exchange (topology reuse) ---- - ! Reset halo entries to zero, run again, and check - do i = nrow+1, ncol - result_neighbor(i) = dzero - end do - call v_neighbor%set_vect(result_neighbor) + if (run_neighbor) then + ! ---- Test 6: repeat neighbor exchange (topology reuse) ---- + do i = nrow+1, ncol + result_neighbor(i) = dzero + end do + call v_neighbor%set_vect(result_neighbor) - call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) - call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) + call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) + call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_) - result_neighbor = v_neighbor%get_vect() - n_total = n_total + 1 - err = maxval(abs(result_neighbor(1:ncol) - expected(1:ncol))) - call psb_amx(ctxt, err) - if (my_rank == 0) then - if (err < tol) then - write(psb_out_unit,'(" [PASS] neighbor topology reuse : err = ",es12.5)') err - n_pass = n_pass + 1 - else - write(psb_out_unit,'(" [FAIL] neighbor topology reuse : err = ",es12.5)') err + result_neighbor = v_neighbor%get_vect() + n_total = n_total + 1 + err = maxval(abs(result_neighbor(1:ncol) - expected(1:ncol))) + call psb_amx(ctxt, err) + if (my_rank == 0) then + if (err < tol) then + write(psb_out_unit,'(" [PASS] neighbor topology reuse : err = ",es12.5)') err + n_pass = n_pass + 1 + else + write(psb_out_unit,'(" [FAIL] neighbor topology reuse : err = ",es12.5)') err + end if end if end if - ! ---- Test 7: repeat persistent-neighbor exchange (buffer reuse) ---- - do i = nrow+1, ncol - result_persistent(i) = dzero - end do - call v_neighbor_persistent%set_vect(result_persistent) + if (run_persistent) then + ! ---- Test 7: repeat persistent-neighbor exchange (buffer reuse) ---- + do i = nrow+1, ncol + result_persistent(i) = dzero + end do + call v_neighbor_persistent%set_vect(result_persistent) - call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) - call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) + call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) + call psi_swapdata(psb_comm_status_wait_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_) - result_persistent = v_neighbor_persistent%get_vect() - n_total = n_total + 1 - err = maxval(abs(result_persistent(1:ncol) - expected(1:ncol))) - call psb_amx(ctxt, err) - if (my_rank == 0) then - if (err < tol) then - write(psb_out_unit,'(" [PASS] pers-neigh buffer reuse : err = ",es12.5)') err - n_pass = n_pass + 1 - else - write(psb_out_unit,'(" [FAIL] pers-neigh buffer reuse : err = ",es12.5)') err + result_persistent = v_neighbor_persistent%get_vect() + n_total = n_total + 1 + err = maxval(abs(result_persistent(1:ncol) - expected(1:ncol))) + call psb_amx(ctxt, err) + if (my_rank == 0) then + if (err < tol) then + write(psb_out_unit,'(" [PASS] pers-neigh buffer reuse : err = ",es12.5)') err + n_pass = n_pass + 1 + else + write(psb_out_unit,'(" [FAIL] pers-neigh buffer reuse : err = ",es12.5)') err + end if end if end if