You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psblas3/test/halo_new/test_halo_new.F90

280 lines
10 KiB
Fortran

!
! Test program for D-type halo exchange: baseline vs neighbor topology.
!
! This test exercises the lower-level psi_swapdata interface directly
! to compare the two communication paths implemented in psi_dswapdata.F90:
!
! 1. Baseline (Isend/Irecv) : flag = IOR(psb_swap_send_, psb_swap_recv_)
! 2. Neighbor topology (Ineighbor_alltoallv) : flag = psb_swap_start_ then psb_swap_wait_
!
! It builds a 3D block-partitioned descriptor with a 7-point stencil,
! fills owned entries with their global index, performs halo exchange
! via both paths, then checks:
! (a) The two paths produce identical results (cross-check)
! (b) Every halo entry equals the global index of its source (absolute check)
!
! Run with: mpirun -np <P> ./test_halo_new
!
program test_halo_new
use psb_base_mod
use psi_mod
implicit none
! ---- parameters ----
integer(psb_ipk_), parameter :: idim = 10 ! grid idim x idim x idim
! ---- descriptor / context ----
type(psb_ctxt_type) :: ctxt
type(psb_desc_type) :: desc_a
integer(psb_ipk_) :: iam, np, info, i, nr, nlr
integer(psb_lpk_) :: m, nt
integer(psb_lpk_), allocatable :: myidx(:)
! ---- vectors ----
type(psb_d_vect_type) :: v_baseline, v_neighbor
! ---- temporary / comparison arrays ----
real(psb_dpk_), allocatable :: vals(:)
real(psb_dpk_), allocatable :: res_bl(:), res_nb(:)
real(psb_dpk_), allocatable :: expected(:)
! ---- work buffer for psi_swapdata ----
real(psb_dpk_), allocatable :: work(:)
! ---- halo index bookkeeping ----
integer(psb_ipk_) :: nrow, ncol, totxch, idxs, idxr, data_
class(psb_i_base_vect_type), pointer :: d_vidx
! ---- error / reporting ----
integer(psb_ipk_) :: n_pass, n_total, imode
real(psb_dpk_) :: err, tol
integer(psb_lpk_), allocatable :: glob_col(:)
character(len=40) :: name
name = 'test_halo_new'
tol = 1.0d-12
n_pass = 0
n_total = 0
! ==================================================================
! 1. Initialise MPI / PSBLAS context
! ==================================================================
call psb_init(ctxt)
call psb_info(ctxt, iam, np)
if (iam == 0) then
write(psb_out_unit,'("================================================")')
write(psb_out_unit,'(" Test: D-type halo baseline vs neighbor topo")')
write(psb_out_unit,'(" Processes : ",i0)') np
write(psb_out_unit,'(" Grid : ",i0," x ",i0," x ",i0)') idim,idim,idim
write(psb_out_unit,'("================================================")')
end if
! ==================================================================
! 2. Build descriptor with 7-point stencil connectivity
! ==================================================================
m = (1_psb_lpk_ * idim) * idim * idim
nt = (m + np - 1) / np
nr = max(0, min(int(nt,psb_ipk_), int(m - (iam * nt),psb_ipk_)))
call psb_cdall(ctxt, desc_a, info, nl=nr)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'cdall error:', info
call psb_abort(ctxt)
end if
myidx = desc_a%get_global_indices()
nlr = size(myidx)
do i = 1, nlr
call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)/), desc_a, info)
if (myidx(i) > 1) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-1/), desc_a, info)
if (myidx(i) < m) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+1/), desc_a, info)
if (myidx(i) > idim) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-idim/), desc_a, info)
if (myidx(i) + idim <= m) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+idim/), desc_a, info)
if (myidx(i) > int(idim,psb_lpk_)*idim) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), &
& (/myidx(i) - int(idim,psb_lpk_)*idim/), desc_a, info)
if (myidx(i) + int(idim,psb_lpk_)*idim <= m) &
& call psb_cdins(1_psb_ipk_, (/myidx(i)/), &
& (/myidx(i) + int(idim,psb_lpk_)*idim/), desc_a, info)
end do
call psb_cdasb(desc_a, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'cdasb error:', info
call psb_abort(ctxt)
end if
nrow = desc_a%get_local_rows() ! owned
ncol = desc_a%get_local_cols() ! owned + halo
! ==================================================================
! 3. Allocate two D vectors (scratch) and fill owned entries
! ==================================================================
call psb_geall(v_baseline, desc_a, info)
call psb_geall(v_neighbor, desc_a, info)
call psb_geasb(v_baseline, desc_a, info, scratch=.true.)
call psb_geasb(v_neighbor, desc_a, info, scratch=.true.)
! Fill owned entries with the global index value
allocate(vals(ncol))
vals = dzero
do i = 1, nlr
vals(i) = real(myidx(i), psb_dpk_)
end do
call v_baseline%set_vect(vals)
call v_neighbor%set_vect(vals)
deallocate(vals)
! ==================================================================
! 4. Build the expected result for halo positions
! glob_col(j) = global index of local column j
! After halo exchange every position j should hold glob_col(j).
! ==================================================================
allocate(glob_col(ncol), expected(ncol))
glob_col = desc_a%get_global_indices(owned=.false.)
do i = 1, ncol
expected(i) = real(glob_col(i), psb_dpk_)
end do
! ==================================================================
! 5. Retrieve halo index list (same list used by both paths)
! ==================================================================
data_ = psb_comm_halo_
call desc_a%get_list_p(data_, d_vidx, totxch, idxr, idxs, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'get_list_p error:', info
call psb_abort(ctxt)
end if
allocate(work(nrow))
work = dzero
! ==================================================================
! 6. Baseline halo exchange (Isend/Irecv in one call)
! ==================================================================
imode = IOR(psb_swap_send_, psb_swap_recv_)
call psi_swapdata(ctxt, desc_a%get_mpic(), imode, dzero, &
& v_baseline%v, d_vidx, totxch, idxs, idxr, work, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'baseline swap error:', info
call psb_abort(ctxt)
end if
! ==================================================================
! 7. Neighbor topology halo exchange (start + wait)
! ==================================================================
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_start_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'neighbor start error:', info
call psb_abort(ctxt)
end if
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_wait_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
if (info /= psb_success_) then
write(psb_err_unit,*) iam, 'neighbor wait error:', info
call psb_abort(ctxt)
end if
! ==================================================================
! 8. Extract results and compare
! ==================================================================
res_bl = v_baseline%get_vect()
res_nb = v_neighbor%get_vect()
! ---- Test 1: cross-check baseline vs neighbor (all entries) ----
n_total = n_total + 1
err = maxval(abs(res_bl(1:ncol) - res_nb(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] cross-check baseline vs neighbor : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] cross-check baseline vs neighbor : err = ",es12.5)') err
end if
end if
! ---- Test 2: baseline absolute correctness (halo = global index) ----
n_total = n_total + 1
err = maxval(abs(res_bl(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] baseline absolute correctness : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] baseline absolute correctness : err = ",es12.5)') err
end if
end if
! ---- Test 3: neighbor absolute correctness (halo = global index) ----
n_total = n_total + 1
err = maxval(abs(res_nb(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] neighbor absolute correctness : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] neighbor absolute correctness : err = ",es12.5)') err
end if
end if
! ---- Test 4: repeat neighbor exchange (topology reuse) ----
! Reset halo entries to zero, run again, and check
do i = nrow+1, ncol
res_nb(i) = dzero
end do
call v_neighbor%set_vect(res_nb)
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_start_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
call psi_swapdata(ctxt, desc_a%get_mpic(), psb_swap_wait_, dzero, &
& v_neighbor%v, d_vidx, totxch, idxs, idxr, work, info)
res_nb = v_neighbor%get_vect()
n_total = n_total + 1
err = maxval(abs(res_nb(1:ncol) - expected(1:ncol)))
call psb_amx(ctxt, err)
if (iam == 0) then
if (err < tol) then
write(psb_out_unit,'(" [PASS] neighbor topology reuse : err = ",es12.5)') err
n_pass = n_pass + 1
else
write(psb_out_unit,'(" [FAIL] neighbor topology reuse : err = ",es12.5)') err
end if
end if
! ==================================================================
! 9. Summary
! ==================================================================
if (iam == 0) then
write(psb_out_unit,'("================================================")')
write(psb_out_unit,'(" Results: ",i0," / ",i0," tests passed")') n_pass, n_total
if (n_pass == n_total) then
write(psb_out_unit,'(" STATUS: ALL PASSED")')
else
write(psb_out_unit,'(" STATUS: SOME FAILURES")')
end if
write(psb_out_unit,'("================================================")')
end if
! ==================================================================
! 10. Cleanup
! ==================================================================
deallocate(res_bl, res_nb, expected, glob_col, work)
call psb_gefree(v_baseline, desc_a, info)
call psb_gefree(v_neighbor, desc_a, info)
call psb_cdfree(desc_a, info)
call psb_exit(ctxt)
end program test_halo_new