[UPDATE] Updated buffer reuse in psb_dcg

communication_v2
Stack-1 2 months ago
parent 33477e4f03
commit 6b803fd759

@ -159,7 +159,7 @@ contains
end if
if (.not. allocated(y%comm_handle)) then
call psb_comm_init(psb_comm_isend_irecv_, y%comm_handle, info)
call psb_comm_set(psb_comm_isend_irecv_, y%comm_handle, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, a_err='init comm default baseline')
goto 9999
@ -338,6 +338,16 @@ contains
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
idx_pt = snd_pt
if ((idx_pt < 1) .or. (nesd < 0) .or. (idx_pt+max(0,nesd)-1 > size(comm_indexes%v))) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='baseline gather metadata out of bounds')
goto 9999
end if
if ((idx_pt < 1) .or. (nesd < 0) .or. (idx_pt+max(0,nesd)-1 > size(y%combuf))) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='baseline gather combuf bounds error')
goto 9999
end if
call y%gth(idx_pt,nesd,comm_indexes)
pnti = pnti + nerv + nesd + 3
end do
@ -441,6 +451,17 @@ contains
snd_pt = 1+pnti+nerv+psb_n_elem_send_
rcv_pt = 1+pnti+psb_n_elem_recv_
if ((idx_pt < 1) .or. (nerv < 0) .or. (idx_pt+max(0,nerv)-1 > size(comm_indexes%v))) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='baseline scatter metadata out of bounds')
goto 9999
end if
if ((rcv_pt < 1) .or. (nerv < 0) .or. (rcv_pt+max(0,nerv)-1 > size(y%combuf))) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='baseline scatter combuf bounds error')
goto 9999
end if
if (debug) write(0,*)me,' Received from: ',prcid(i),&
& y%combuf(rcv_pt:rcv_pt+nerv-1)
call y%sct(rcv_pt,nerv,comm_indexes,beta)
@ -547,6 +568,13 @@ contains
! ---------------------------------------------------------
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
if (neighbor_comm_handle%use_persistent_buffers) then
if (neighbor_comm_handle%persistent_in_flight) then
info = psb_err_mpi_error_
call psb_errpush(info, name, a_err='Invalid START: persistent neighbor request already in flight')
goto 9999
end if
end if
if (.not. neighbor_comm_handle%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology via handle'
call neighbor_comm_handle%topology_init(comm_indexes%v, num_neighbors, total_send, total_recv, ctxt, icomm, info)
@ -571,6 +599,7 @@ contains
end if
neighbor_comm_handle%persistent_request = mpi_request_null
neighbor_comm_handle%persistent_request_ready = .false.
neighbor_comm_handle%persistent_in_flight = .false.
neighbor_comm_handle%persistent_buffer_size = 0
end if
call y%new_buffer(buffer_size, info)
@ -635,6 +664,7 @@ contains
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
neighbor_comm_handle%persistent_in_flight = .true.
#else
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer
@ -652,6 +682,7 @@ contains
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
neighbor_comm_handle%persistent_in_flight = .true.
#endif
else
! Post non-blocking neighborhood alltoallv
@ -682,19 +713,11 @@ contains
if (do_wait) then
if (neighbor_comm_handle%use_persistent_buffers) then
#ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT
if (.not. neighbor_comm_handle%persistent_request_ready) then
if (.not. neighbor_comm_handle%persistent_in_flight) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
call psb_errpush(info, name, a_err='Invalid WAIT: no persistent neighbor request in flight')
goto 9999
end if
#else
if (neighbor_comm_handle%comm_request == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
#endif
else
if (neighbor_comm_handle%comm_request == mpi_request_null) then
write(psb_err_unit,*) me, 'DBG: neighbor WAIT but comm_request is NULL; is_initialized=', &
@ -724,6 +747,9 @@ contains
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (neighbor_comm_handle%use_persistent_buffers) then
neighbor_comm_handle%persistent_in_flight = .false.
end if
! Scatter received data to local vector positions (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'
@ -834,7 +860,7 @@ contains
end if
if (.not. allocated(y%comm_handle)) then
call psb_comm_init(psb_comm_isend_irecv_, y%comm_handle, info)
call psb_comm_set(psb_comm_isend_irecv_, y%comm_handle, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, a_err='init comm default baseline')
goto 9999
@ -1202,6 +1228,13 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in
! ---------------------------------------------------------
if (do_start) then
if(debug) write(*,*) me,' nbr_vect: starting data exchange'
if (neighbor_comm_handle%use_persistent_buffers) then
if (neighbor_comm_handle%persistent_in_flight) then
info = psb_err_mpi_error_
call psb_errpush(info, name, a_err='Invalid START: persistent neighbor request already in flight')
goto 9999
end if
end if
if (.not. neighbor_comm_handle%is_initialized) then
if (debug) write(*,*) me,' nbr_vect: building topology via handle'
call neighbor_comm_handle%topology_init(comm_indexes%v, num_neighbors, total_send, total_recv, &
@ -1227,6 +1260,7 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in
end if
neighbor_comm_handle%persistent_request = mpi_request_null
neighbor_comm_handle%persistent_request_ready = .false.
neighbor_comm_handle%persistent_in_flight = .false.
neighbor_comm_handle%persistent_buffer_size = 0
end if
call y%new_buffer(buffer_size, info)
@ -1290,6 +1324,7 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
neighbor_comm_handle%persistent_in_flight = .true.
#else
call mpi_ineighbor_alltoallv( &
& y%combuf(1), & ! send buffer
@ -1307,6 +1342,7 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
neighbor_comm_handle%persistent_in_flight = .true.
#endif
else
! Post non-blocking neighborhood alltoallv
@ -1337,19 +1373,11 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in
if (do_wait) then
if (neighbor_comm_handle%use_persistent_buffers) then
#ifdef PSB_HAVE_MPI_NEIGHBOR_PERSISTENT
if (.not. neighbor_comm_handle%persistent_request_ready) then
if (.not. neighbor_comm_handle%persistent_in_flight) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
goto 9999
end if
#else
if (neighbor_comm_handle%comm_request == mpi_request_null) then
info = psb_err_mpi_error_
call psb_errpush(info, name, m_err=(/-2/))
call psb_errpush(info, name, a_err='Invalid WAIT: no persistent neighbor request in flight')
goto 9999
end if
#endif
else
if (neighbor_comm_handle%comm_request == mpi_request_null) then
info = psb_err_mpi_error_
@ -1377,6 +1405,9 @@ subroutine psi_dswap_neighbor_topology_multivect(ctxt,swap_status,beta,y,comm_in
call psb_errpush(info, name, m_err=(/iret/))
goto 9999
end if
if (neighbor_comm_handle%use_persistent_buffers) then
neighbor_comm_handle%persistent_in_flight = .false.
end if
! Scatter received data to local vector positions (polymorphic for GPU)
if (debug) write(*,*) me,' nbr_vect: scattering recv data,', topology_total_recv,' elems'

@ -160,7 +160,7 @@ contains
end if
if (.not. allocated(y%comm_handle)) then
call psb_comm_init(psb_comm_isend_irecv_, y%comm_handle, info)
call psb_comm_set(psb_comm_isend_irecv_, y%comm_handle, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, a_err='init comm default baseline')
goto 9999
@ -741,7 +741,7 @@ contains
end if
if (.not. allocated(y%comm_handle)) then
call psb_comm_init(psb_comm_isend_irecv_, y%comm_handle, info)
call psb_comm_set(psb_comm_isend_irecv_, y%comm_handle, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_, name, a_err='init comm default baseline')
goto 9999

@ -11,7 +11,7 @@ module psb_comm_factory_mod
contains
! Allocatable-based factory routines (preferred names)
subroutine psb_comm_init(comm_type, handle, info)
subroutine psb_comm_set(comm_type, handle, info)
implicit none
integer(psb_ipk_), intent(in) :: comm_type
class(psb_comm_handle_type), allocatable, intent(inout) :: handle
@ -68,7 +68,7 @@ contains
handle%id = old_id
handle%swap_status = old_swap_status
end select
end subroutine psb_comm_init
end subroutine psb_comm_set
subroutine psb_comm_free(handle, info)
implicit none
@ -83,25 +83,6 @@ contains
end if
end subroutine psb_comm_free
! Allocatable-based factory routines
subroutine psb_comm_create(comm_type, handle, info)
implicit none
integer(psb_ipk_), intent(in) :: comm_type
class(psb_comm_handle_type), allocatable, intent(inout) :: handle
integer(psb_ipk_), intent(out) :: info
call psb_comm_init(comm_type, handle, info)
end subroutine psb_comm_create
subroutine psb_comm_destroy(handle, info)
implicit none
class(psb_comm_handle_type), allocatable, intent(inout) :: handle
integer(psb_ipk_), intent(out) :: info
call psb_comm_free(handle, info)
end subroutine psb_comm_destroy
subroutine psb_comm_set_swap_status(handle, flag, info)
implicit none
class(psb_comm_handle_type), allocatable, intent(inout) :: handle

@ -28,6 +28,7 @@ module psb_comm_neighbor_impl_mod
integer(psb_mpk_) :: comm_request = mpi_request_null
integer(psb_mpk_) :: persistent_request = mpi_request_null
logical :: persistent_request_ready = .false.
logical :: persistent_in_flight = .false.
integer(psb_ipk_) :: persistent_buffer_size = 0
contains
procedure, pass :: init => psb_comm_neighbor_init
@ -316,6 +317,7 @@ contains
end if
this%persistent_request = mpi_request_null
this%persistent_request_ready = .false.
this%persistent_in_flight = .false.
this%persistent_buffer_size = 0
end if
@ -372,6 +374,7 @@ contains
this%id = 0
this%swap_status = 0
this%comm_request = mpi_request_null
this%persistent_in_flight = .false.
this%persistent_request = mpi_request_null
this%persistent_request_ready = .false.
this%persistent_buffer_size = 0
@ -388,6 +391,7 @@ contains
this%comm_request = mpi_request_null
this%persistent_request = mpi_request_null
this%persistent_request_ready = .false.
this%persistent_in_flight = .false.
this%persistent_buffer_size = 0
call this%free(info)
end subroutine psb_comm_neighbor_destroy
@ -424,6 +428,7 @@ contains
this%comm_request = mpi_request_null
this%persistent_request = mpi_request_null
this%persistent_request_ready = .false.
this%persistent_in_flight = .false.
this%persistent_buffer_size = 0
end subroutine psb_comm_neighbor_init

@ -21,15 +21,13 @@ module psb_comm_schemes_mod
end enum
! (abstract interfaces moved below type definition)
! --- comm handle type ---
type, abstract :: psb_comm_handle_type
integer(psb_ipk_) :: id = -1
integer(psb_ipk_) :: comm_type = psb_comm_unknown_
integer(psb_ipk_) :: swap_status = psb_comm_status_unknown_
contains
procedure(psb_comm_init), deferred :: init
procedure(psb_comm_set), deferred :: init
procedure(psb_comm_free), deferred :: free
procedure(psb_comm_set_swap_status), deferred :: set_swap_status
procedure(psb_comm_get_swap_status), deferred :: get_swap_status
@ -37,7 +35,7 @@ module psb_comm_schemes_mod
! --- abstract interfaces ---
abstract interface
subroutine psb_comm_init(this, info)
subroutine psb_comm_set(this, info)
import :: psb_ipk_, psb_comm_handle_type
class(psb_comm_handle_type), intent(inout) :: this
integer(psb_ipk_), intent(out) :: info

@ -50,7 +50,7 @@ module psb_d_base_vect_mod
use psb_i_base_vect_mod
use psb_l_base_vect_mod
use psb_comm_schemes_mod, only: psb_comm_handle_type, psb_comm_isend_irecv_, psb_comm_unknown_
use psb_comm_factory_mod, only: psb_comm_init, psb_comm_free
use psb_comm_factory_mod, only: psb_comm_set, psb_comm_free
!> \namespace psb_base_mod \class psb_d_base_vect_type
@ -176,13 +176,6 @@ module psb_d_base_vect_mod
procedure, pass(x) :: check_addr => d_base_check_addr
! Communication lifecycle split:
! - `create_comm`: allocate/select a fresh handle implementation via factory.
! - `init_comm`: configure the current handle instance from an existing one
! (e.g., copy `id` and swap status), and reset buffers;
! it recreates the handle only if missing or scheme changes.
! - `destroy_comm`/`free_comm`: release current handle resources.
!
! Dot product and AXPBY
@ -414,7 +407,7 @@ contains
end if
if (info == psb_success_) then
if (.not. allocated(x%comm_handle)) then
call psb_comm_init(psb_comm_isend_irecv_, x%comm_handle, info)
call psb_comm_set(psb_comm_isend_irecv_, x%comm_handle, info)
end if
end if
@ -436,7 +429,7 @@ contains
allocate(psb_d_base_vect_type :: y, stat=info)
if (info == psb_success_) then
call psb_comm_init(psb_comm_isend_irecv_, y%comm_handle, info)
call psb_comm_set(psb_comm_isend_irecv_, y%comm_handle, info)
end if
end subroutine d_base_mold
@ -464,7 +457,7 @@ contains
end if
if (info == psb_success_) then
if (.not. allocated(x%comm_handle)) then
call psb_comm_init(psb_comm_isend_irecv_, x%comm_handle, info)
call psb_comm_set(psb_comm_isend_irecv_, x%comm_handle, info)
end if
end if
@ -1114,7 +1107,7 @@ contains
call y%set_ncfs(x%get_ncfs())
if (allocated(x%iv)) y%iv = x%iv
if (allocated(x%comm_handle)) then
call psb_comm_init(x%comm_handle%comm_type, y%comm_handle, info)
call psb_comm_set(x%comm_handle%comm_type, y%comm_handle, info)
if (info /= psb_success_) return
y%comm_handle%id = x%comm_handle%id
call x%comm_handle%get_swap_status(swap_status, info)
@ -1122,7 +1115,7 @@ contains
call y%comm_handle%set_swap_status(swap_status, info)
if (info /= psb_success_) return
else
call psb_comm_init(psb_comm_isend_irecv_, y%comm_handle, info)
call psb_comm_set(psb_comm_isend_irecv_, y%comm_handle, info)
end if
end subroutine d_base_cpy
@ -2409,7 +2402,7 @@ contains
end if
if (need_new_handle) then
call psb_comm_init(comm_type, x%comm_handle, info)
call psb_comm_set(comm_type, x%comm_handle, info)
if (info /= psb_success_) return
end if

@ -120,6 +120,7 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
real(psb_dpk_) :: alpha, beta, rho, rho_old, sigma,alpha_old,beta_old
integer(psb_ipk_) :: itmax_, istop_, it, itx, itrace_,&
& n_col, n_row,err_act, ieg,nspl, istebz
integer(psb_ipk_) :: i, swap_status
integer(psb_lpk_) :: mglob
integer(psb_ipk_) :: debug_level, debug_unit
type(psb_ctxt_type) :: ctxt
@ -173,6 +174,19 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
if (info == psb_success_) call psb_geall(wwrk,desc_a,info,n=5_psb_ipk_)
if (info == psb_success_) call psb_geasb(wwrk,desc_a,info,mold=x%v,scratch=.true.)
if ((info == psb_success_).and.allocated(x%v%comm_handle)) then
do i=1,size(wwrk)
if (allocated(wwrk(i)%v)) then
call psb_comm_set(x%v%comm_handle%comm_type,wwrk(i)%v%comm_handle,info)
if (info /= psb_success_) exit
wwrk(i)%v%comm_handle%id = x%v%comm_handle%id
call x%v%comm_handle%get_swap_status(swap_status,info)
if (info /= psb_success_) exit
call wwrk(i)%v%comm_handle%set_swap_status(swap_status,info)
if (info /= psb_success_) exit
end if
end do
end if
if (info /= psb_success_) then
info=psb_err_from_subroutine_non_
call psb_errpush(info,name)

1824
log.txt

File diff suppressed because one or more lines are too long

@ -13,27 +13,41 @@ program psb_comm_cg_test
type(psb_dprec_type) :: prec
integer(psb_ipk_) :: info, iam, np
integer(psb_ipk_) :: idim, itmax, itrace, istop, iter, is
integer(psb_ipk_) :: iter_arr(3), info_arr(3)
integer(psb_ipk_) :: scheme_types(3)
real(psb_dpk_) :: eps, err, t1, t2
real(psb_dpk_) :: tsolve(3), err_arr(3)
character(len=25) :: scheme_names(3)
integer(psb_ipk_) :: idim, itmax, itrace, istop, iter
integer(psb_ipk_) :: scheme_idx, prec_idx, rep, nrep, nwarm
integer(psb_ipk_), parameter :: n_schemes=3, n_precs=2
integer(psb_ipk_), allocatable :: iter_count(:,:,:), solve_info(:,:,:)
integer(psb_ipk_) :: scheme_type(n_schemes)
real(psb_dpk_) :: eps, err, t_start, t_elapsed
real(psb_dpk_), allocatable :: setup_time(:,:,:), solve_time(:,:,:)
real(psb_dpk_), allocatable :: total_time(:,:,:), final_error(:,:,:)
real(psb_dpk_) :: avg_t, std_t, med_t, p10_t, p90_t, min_t, max_t
character(len=25) :: scheme_name(n_schemes)
character(len=12) :: prec_type(n_precs)
character(len=20) :: prec_name(n_precs)
character(len=5) :: afmt
character(len=256) :: arg
logical :: prec_ready
info = psb_success_
prec_ready = .false.
afmt = 'CSR'
idim = 40
itmax = 500
itrace = 0
idim = 20
itmax = 5000
nrep = 1
nwarm = 1
itrace = 0
istop = 2
eps = 1.d-6
scheme_types = (/ psb_comm_isend_irecv_, psb_comm_ineighbor_alltoallv_, &
scheme_type = (/ psb_comm_isend_irecv_, psb_comm_ineighbor_alltoallv_, &
& psb_comm_persistent_ineighbor_alltoallv_ /)
scheme_names(1) = 'isend_irecv'
scheme_names(2) = 'ineighbor_alltoallv'
scheme_names(3) = 'persistent_ineighbor_a2av'
scheme_name(1) = 'isend_irecv'
scheme_name(2) = 'ineighbor_alltoallv'
scheme_name(3) = 'persistent_ineighbor_a2av'
prec_type(1) = 'NONE'
prec_type(2) = 'DIAG'
prec_name(1) = 'none'
prec_name(2) = 'diag'
call get_command_argument(1,arg)
if (len_trim(arg) > 0) then
@ -43,6 +57,27 @@ program psb_comm_cg_test
info = psb_success_
end if
end if
call get_command_argument(2,arg)
if (len_trim(arg) > 0) then
read(arg,*,iostat=info) nrep
if ((info /= 0).or.(nrep <= 0)) then
nrep = 7
info = psb_success_
end if
end if
call get_command_argument(3,arg)
if (len_trim(arg) > 0) then
read(arg,*,iostat=info) nwarm
if ((info /= 0).or.(nwarm < 0)) then
nwarm = 1
info = psb_success_
end if
end if
allocate(setup_time(n_precs,n_schemes,nrep), solve_time(n_precs,n_schemes,nrep), &
& total_time(n_precs,n_schemes,nrep), final_error(n_precs,n_schemes,nrep), &
& iter_count(n_precs,n_schemes,nrep), solve_info(n_precs,n_schemes,nrep), stat=info)
if (info /= psb_success_) stop 1
call psb_init(ctxt)
call psb_info(ctxt, iam, np)
@ -53,58 +88,120 @@ program psb_comm_cg_test
write(psb_out_unit,'("Grid dimensions : ",i4," x ",i4," x ",i4)') idim,idim,idim
write(psb_out_unit,'("Number of processors : ",i0)') np
write(psb_out_unit,'("Iterative method : CG")')
write(psb_out_unit,'("Preconditioner : NONE")')
write(psb_out_unit,'("Preconditioners : NONE, DIAG")')
write(psb_out_unit,'("Repetitions : ",i0)') nrep
write(psb_out_unit,'("Warmup solves : ",i0," (each with itmax=1)")') nwarm
write(psb_out_unit,'(" ")')
end if
call psb_barrier(ctxt)
t1 = psb_wtime()
t_start = psb_wtime()
call psb_d_gen_pde3d(ctxt,idim,a,b,x,desc_a,afmt,info)
if (info /= psb_success_) goto 9999
call prec%init(ctxt,'NONE',info)
if (info /= psb_success_) goto 9999
call prec%build(a,desc_a,info)
if (info /= psb_success_) goto 9999
do is = 1, 3
call psb_geaxpby(dzero,b,dzero,x,desc_a,info)
if (info /= psb_success_) goto 9999
call psb_comm_init(scheme_types(is),x%v%comm_handle,info)
if (info /= psb_success_) goto 9999
call psb_barrier(ctxt)
t1 = psb_wtime()
call psb_krylov('CG',a,prec,b,x,eps,desc_a,info,&
& itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istop)
t2 = psb_wtime() - t1
call psb_amx(ctxt,t2)
tsolve(is) = t2
iter_arr(is) = iter
err_arr(is) = err
info_arr(is) = info
if (info /= psb_success_) goto 9999
do prec_idx = 1, n_precs
do scheme_idx = 1, n_schemes
do rep = 1, nrep
if (prec_ready) then
call psb_precfree(prec,info)
if (info /= psb_success_) goto 9999
prec_ready = .false.
end if
call psb_geaxpby(dzero,b,dzero,x,desc_a,info)
if (info /= psb_success_) goto 9999
call psb_barrier(ctxt)
t_start = psb_wtime()
call psb_precinit(ctxt,prec,trim(prec_type(prec_idx)),info)
if (info /= psb_success_) goto 9999
call psb_precbld(a,desc_a,prec,info)
if (info /= psb_success_) goto 9999
if (.not.allocated(prec%prec)) then
info = psb_err_internal_error_
write(psb_err_unit,*) 'Preconditioner object not allocated after build'
goto 9999
end if
prec_ready = .true.
call psb_comm_set(scheme_type(scheme_idx),x%v%comm_handle,info)
if (info /= psb_success_) goto 9999
if (.not.allocated(prec%prec)) then
info = psb_err_internal_error_
write(psb_err_unit,*) 'Preconditioner object lost after psb_comm_set'
goto 9999
end if
setup_time(prec_idx,scheme_idx,rep) = psb_wtime() - t_start
call psb_amx(ctxt,setup_time(prec_idx,scheme_idx,rep))
do iter = 1, nwarm
call psb_geaxpby(dzero,b,dzero,x,desc_a,info)
if (info /= psb_success_) goto 9999
call psb_krylov('CG',a,prec,b,x,eps,desc_a,info,&
& itmax=itmax,itrace=itrace,istop=istop)
if (info /= psb_success_) goto 9999
end do
call psb_geaxpby(dzero,b,dzero,x,desc_a,info)
if (info /= psb_success_) goto 9999
call psb_barrier(ctxt)
t_start = psb_wtime()
if (.not.allocated(prec%prec)) then
info = psb_err_internal_error_
write(psb_err_unit,*) 'Preconditioner object lost before psb_krylov'
goto 9999
end if
call psb_krylov('CG',a,prec,b,x,eps,desc_a,info,&
& itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istop)
t_elapsed = psb_wtime() - t_start
call psb_amx(ctxt,t_elapsed)
solve_time(prec_idx,scheme_idx,rep) = t_elapsed
total_time(prec_idx,scheme_idx,rep) = setup_time(prec_idx,scheme_idx,rep) + &
& solve_time(prec_idx,scheme_idx,rep)
iter_count(prec_idx,scheme_idx,rep) = iter
final_error(prec_idx,scheme_idx,rep) = err
solve_info(prec_idx,scheme_idx,rep) = info
if (info /= psb_success_) goto 9999
end do
end do
end do
if (iam == psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("CG solve time by communication scheme")')
write(psb_out_unit,'("--------------------------------------")')
do is = 1, 3
write(psb_out_unit,'(a25,2x,"time=",es12.5,2x,"iter=",i8,2x,"err=",es12.5,2x,"info=",i6)') &
& trim(scheme_names(is)), tsolve(is), iter_arr(is), err_arr(is), info_arr(is)
write(psb_out_unit,'("CG timing stats by preconditioner and communication scheme")')
write(psb_out_unit,'("-----------------------------------------------------------")')
do prec_idx = 1, n_precs
write(psb_out_unit,'("Preconditioner: ",a)') trim(prec_name(prec_idx))
do scheme_idx = 1, n_schemes
call compute_stats(setup_time(prec_idx,scheme_idx,:),avg_t,std_t,med_t,p10_t,p90_t,min_t,max_t)
write(psb_out_unit,&
& '(a25,2x,"setup median=",es12.5,2x,"p10=",es12.5,2x,"p90=",es12.5)') &
& trim(scheme_name(scheme_idx)), med_t, p10_t, p90_t
call compute_stats(solve_time(prec_idx,scheme_idx,:),avg_t,std_t,med_t,p10_t,p90_t,min_t,max_t)
write(psb_out_unit,&
& '(27x,"solve median=",es12.5,2x,"mean=",es12.5,2x,"std=",es12.5)') med_t, avg_t, std_t
call compute_stats(total_time(prec_idx,scheme_idx,:),avg_t,std_t,med_t,p10_t,p90_t,min_t,max_t)
write(psb_out_unit,&
& '(27x,"total median=",es12.5,2x,"min=",es12.5,2x,"max=",es12.5)') med_t, min_t, max_t
call compute_stats(final_error(prec_idx,scheme_idx,:),avg_t,std_t,med_t,p10_t,p90_t,min_t,max_t)
write(psb_out_unit,&
& '(27x,"err median=",es12.5,2x,"last it/info=",i8,"/",i6)') &
& med_t, iter_count(prec_idx,scheme_idx,nrep), solve_info(prec_idx,scheme_idx,nrep)
end do
write(psb_out_unit,'(" ")')
end do
end if
call psb_gefree(b,desc_a,info)
call psb_gefree(x,desc_a,info)
call psb_spfree(a,desc_a,info)
call prec%free(info)
if (prec_ready) call psb_precfree(prec,info)
call psb_cdfree(desc_a,info)
deallocate(setup_time,solve_time,total_time,final_error,iter_count,solve_info)
call psb_exit(ctxt)
stop
@ -114,6 +211,64 @@ program psb_comm_cg_test
contains
subroutine sort_real_inplace(v)
real(psb_dpk_), intent(inout) :: v(:)
integer(psb_ipk_) :: i, j
real(psb_dpk_) :: key
do i = 2, size(v)
key = v(i)
j = i - 1
do while ((j >= 1).and.(v(j) > key))
v(j+1) = v(j)
j = j - 1
end do
v(j+1) = key
end do
end subroutine sort_real_inplace
subroutine compute_stats(vals,mean_v,std_v,median_v,p10_v,p90_v,min_v,max_v)
real(psb_dpk_), intent(in) :: vals(:)
real(psb_dpk_), intent(out) :: mean_v,std_v,median_v,p10_v,p90_v,min_v,max_v
real(psb_dpk_), allocatable :: tmp(:)
integer(psb_ipk_) :: n, idx10, idx90
n = size(vals)
if (n <= 0) then
mean_v = dzero; std_v = dzero; median_v = dzero
p10_v = dzero; p90_v = dzero; min_v = dzero; max_v = dzero
return
end if
mean_v = sum(vals)/real(n,psb_dpk_)
if (n > 1) then
std_v = sqrt(sum((vals-mean_v)**2)/real(n-1,psb_dpk_))
else
std_v = dzero
end if
allocate(tmp(n))
tmp = vals
call sort_real_inplace(tmp)
if (mod(n,2) == 0) then
median_v = (tmp(n/2)+tmp(n/2+1))/2.0_psb_dpk_
else
median_v = tmp((n+1)/2)
end if
idx10 = int(ceiling(0.10_psb_dpk_*real(n,psb_dpk_)),kind=psb_ipk_)
idx90 = int(ceiling(0.90_psb_dpk_*real(n,psb_dpk_)),kind=psb_ipk_)
idx10 = max(1_psb_ipk_,min(n,idx10))
idx90 = max(1_psb_ipk_,min(n,idx90))
p10_v = tmp(idx10)
p90_v = tmp(idx90)
min_v = tmp(1)
max_v = tmp(n)
deallocate(tmp)
end subroutine compute_stats
function b1(x,y,z) result(val)
real(psb_dpk_), intent(in) :: x,y,z
real(psb_dpk_) :: val

@ -8,7 +8,7 @@ module psb_spmv_overlap_test
use psb_base_mod
use psb_util_mod
use psb_comm_factory_mod, only: psb_comm_init
use psb_comm_factory_mod, only: psb_comm_set
use psb_comm_schemes_mod, only: psb_comm_isend_irecv_, psb_comm_ineighbor_alltoallv_, &
& psb_comm_persistent_ineighbor_alltoallv_
@ -584,11 +584,11 @@ contains
if (info /= psb_success_) goto 9999
! Set communication schemes on the x vectors used by psb_spmm.
call psb_comm_init(psb_comm_isend_irecv_, x_baseline%v%comm_handle, info)
call psb_comm_set(psb_comm_isend_irecv_, x_baseline%v%comm_handle, info)
if (info /= psb_success_) goto 9999
call psb_comm_init(psb_comm_ineighbor_alltoallv_, x_neighbor%v%comm_handle, info)
call psb_comm_set(psb_comm_ineighbor_alltoallv_, x_neighbor%v%comm_handle, info)
if (info /= psb_success_) goto 9999
call psb_comm_init(psb_comm_persistent_ineighbor_alltoallv_, x_persistent%v%comm_handle, info)
call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, x_persistent%v%comm_handle, info)
if (info /= psb_success_) goto 9999
! Warm-up all schemes once.

@ -18,7 +18,7 @@
program psb_comm_test
use psb_base_mod
use psi_mod
use psb_comm_factory_mod, only: psb_comm_init, psb_comm_free
use psb_comm_factory_mod, only: psb_comm_set, psb_comm_free
use psb_comm_schemes_mod, only: psb_comm_ineighbor_alltoallv_, psb_comm_persistent_ineighbor_alltoallv_, &
& psb_comm_isend_irecv_
use psb_comm_schemes_mod, only: psb_comm_status_start_, psb_comm_status_wait_, psb_comm_status_unknown_
@ -219,9 +219,9 @@ program psb_comm_test
! ==================================================================
! 7. Neighbor topology halo exchange (start + wait)
! ==================================================================
call psb_comm_init(psb_comm_ineighbor_alltoallv_, v_neighbor%v%comm_handle, info)
call psb_comm_set(psb_comm_ineighbor_alltoallv_, v_neighbor%v%comm_handle, info)
if (info /= 0) then
write(psb_err_unit,*) my_rank, 'psb_comm_init neighbor error:', info
write(psb_err_unit,*) my_rank, 'psb_comm_set neighbor error:', info
call psb_abort(ctxt)
end if
call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor%v, desc_a, info, data=psb_comm_halo_)
@ -239,9 +239,9 @@ program psb_comm_test
! ==================================================================
! 7b. Persistent-neighbor halo exchange (start + wait)
! ==================================================================
call psb_comm_init(psb_comm_persistent_ineighbor_alltoallv_, v_neighbor_persistent%v%comm_handle, info)
call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, v_neighbor_persistent%v%comm_handle, info)
if (info /= 0) then
write(psb_err_unit,*) my_rank, 'psb_comm_init persistent-neighbor error:', info
write(psb_err_unit,*) my_rank, 'psb_comm_set persistent-neighbor error:', info
call psb_abort(ctxt)
end if
call psi_swapdata(psb_comm_status_start_, dzero, v_neighbor_persistent%v, desc_a, info, data=psb_comm_halo_)
@ -266,9 +266,9 @@ program psb_comm_test
tsum_neighbor = 0.0_psb_dpk_
tsum_neighbor_persistent = 0.0_psb_dpk_
call psb_comm_init(psb_comm_isend_irecv_, v_baseline%v%comm_handle, info)
call psb_comm_init(psb_comm_ineighbor_alltoallv_, v_neighbor%v%comm_handle, info)
call psb_comm_init(psb_comm_persistent_ineighbor_alltoallv_, v_neighbor_persistent%v%comm_handle, info)
call psb_comm_set(psb_comm_isend_irecv_, v_baseline%v%comm_handle, info)
call psb_comm_set(psb_comm_ineighbor_alltoallv_, v_neighbor%v%comm_handle, info)
call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_, v_neighbor_persistent%v%comm_handle, info)
! ---- Comm check: verify selected communication schemes ----
n_total = n_total + 1

@ -837,7 +837,7 @@ program psb_d_pde3d
& err=err,itrace=itrace,&
& istop=istopc)
case('BICGSTAB','BICGSTABL','BICG','CG','CGS','FCG','GCR','RGMRES')
call psb_comm_init(psb_comm_persistent_ineighbor_alltoallv_,xxv%v%comm_handle,info)
call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_,xxv%v%comm_handle,info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='comm init'

@ -1,6 +1,6 @@
17 Number of entries below this
BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR RICHARDSON
BJAC Preconditioner NONE DIAG BJAC
DIAG Preconditioner NONE DIAG BJAC
CSR Storage format for matrix A: CSR COO
100 Domain size (acutal system is this**3 (pde3d) )
3 Partition: 1 BLOCK 3 3D

Loading…
Cancel
Save