[UPDATE] Convert the CG test to the builder API and drop the redundant builder test

Rewrite psb_d_nest_cg_test to build the operator through the psb_d_nest_matrix
utility (init/ins/asb + get_owned_rows) instead of the low-level path, so no
per-field descriptor or l2g idiom appears in user-facing test code; x_exact=1
is set with x%set(done) rather than an l2g loop.

With this change psb_d_nest_cg_test fully subsumes psb_d_nest_builder_test
(same operator via the same builder, NONE plus DIAG/BJAC), so the latter is
removed.  The test suite is now glob (square matvec), rect (rectangular
matvec) and cg (builder + preconditioned CG).  Build hooks and README updated.

Author: Simone Staccone (Stack-1)
nested_matrix_type
Stack-1 2 weeks ago
parent e1d759d019
commit 8bd49c43b1

@ -26,7 +26,6 @@ file(MAKE_DIRECTORY ${EXEDIR})
set(SOURCES_D_NEST_GLOB_TEST psb_d_nest_glob_test.F90) set(SOURCES_D_NEST_GLOB_TEST psb_d_nest_glob_test.F90)
set(SOURCES_D_NEST_RECT_TEST psb_d_nest_rect_test.F90) set(SOURCES_D_NEST_RECT_TEST psb_d_nest_rect_test.F90)
set(SOURCES_D_NEST_CG_TEST psb_d_nest_cg_test.F90) set(SOURCES_D_NEST_CG_TEST psb_d_nest_cg_test.F90)
set(SOURCES_D_NEST_BUILDER_TEST psb_d_nest_builder_test.F90)
add_executable(psb_d_nest_glob_test ${SOURCES_D_NEST_GLOB_TEST}) add_executable(psb_d_nest_glob_test ${SOURCES_D_NEST_GLOB_TEST})
target_link_libraries(psb_d_nest_glob_test psblas::util psblas::linsolve psblas::prec psblas::ext psblas::base) target_link_libraries(psb_d_nest_glob_test psblas::util psblas::linsolve psblas::prec psblas::ext psblas::base)
@ -37,11 +36,8 @@ target_link_libraries(psb_d_nest_rect_test psblas::util psblas::linsolve psblas:
add_executable(psb_d_nest_cg_test ${SOURCES_D_NEST_CG_TEST}) add_executable(psb_d_nest_cg_test ${SOURCES_D_NEST_CG_TEST})
target_link_libraries(psb_d_nest_cg_test psblas::util psblas::linsolve psblas::prec psblas::ext psblas::base) target_link_libraries(psb_d_nest_cg_test psblas::util psblas::linsolve psblas::prec psblas::ext psblas::base)
add_executable(psb_d_nest_builder_test ${SOURCES_D_NEST_BUILDER_TEST})
target_link_libraries(psb_d_nest_builder_test psblas::util psblas::linsolve psblas::prec psblas::ext psblas::base)
# Set output directory for executables # Set output directory for executables
foreach(target psb_d_nest_glob_test psb_d_nest_rect_test psb_d_nest_cg_test psb_d_nest_builder_test) foreach(target psb_d_nest_glob_test psb_d_nest_rect_test psb_d_nest_cg_test)
set_target_properties(${target} PROPERTIES set_target_properties(${target} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${EXEDIR} RUNTIME_OUTPUT_DIRECTORY ${EXEDIR}
) )

@ -16,7 +16,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
EXEDIR=./runs EXEDIR=./runs
all: runsd psb_d_nest_glob_test psb_d_nest_rect_test psb_d_nest_cg_test psb_d_nest_builder_test all: runsd psb_d_nest_glob_test psb_d_nest_rect_test psb_d_nest_cg_test
runsd: runsd:
(if test ! -d runs ; then mkdir runs; fi) (if test ! -d runs ; then mkdir runs; fi)
@ -33,13 +33,9 @@ psb_d_nest_cg_test: psb_d_nest_cg_test.o
$(FLINK) psb_d_nest_cg_test.o -o psb_d_nest_cg_test $(PSBLAS_LIB) $(LDLIBS) $(FLINK) psb_d_nest_cg_test.o -o psb_d_nest_cg_test $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_nest_cg_test $(EXEDIR) /bin/mv psb_d_nest_cg_test $(EXEDIR)
psb_d_nest_builder_test: psb_d_nest_builder_test.o
$(FLINK) psb_d_nest_builder_test.o -o psb_d_nest_builder_test $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_nest_builder_test $(EXEDIR)
clean: clean:
/bin/rm -f psb_d_nest_glob_test.o psb_d_nest_rect_test.o psb_d_nest_cg_test.o psb_d_nest_builder_test.o *$(.mod) \ /bin/rm -f psb_d_nest_glob_test.o psb_d_nest_rect_test.o psb_d_nest_cg_test.o *$(.mod) \
$(EXEDIR)/psb_d_nest_glob_test $(EXEDIR)/psb_d_nest_rect_test $(EXEDIR)/psb_d_nest_cg_test $(EXEDIR)/psb_d_nest_builder_test $(EXEDIR)/psb_d_nest_glob_test $(EXEDIR)/psb_d_nest_rect_test $(EXEDIR)/psb_d_nest_cg_test
verycleanlib: verycleanlib:
(cd ../..; make veryclean) (cd ../..; make veryclean)
lib: lib:

@ -196,7 +196,7 @@ for a block operator).
### 3.4 Low-level API (advanced) ### 3.4 Low-level API (advanced)
`psb_d_nest_matrix` is built on lower-level pieces, available directly (see `psb_d_nest_cg_test.F90` for an end-to-end example): `psb_d_nest_matrix` is built on lower-level pieces, available directly:
* `psb_cd_nest_compose(grid_desc, desc_glob, info)` — compose the per-field descriptors into the single global descriptor with the union halo. * `psb_cd_nest_compose(grid_desc, desc_glob, info)` — compose the per-field descriptors into the single global descriptor with the union halo.
* `psb_d_nest_base_setup(nest_op, block_storage, grid_desc, desc_glob, info)` — set up the `psb_d_nest_base_mat` operator (implements the local `csmv`, `get_diag`, `csgetrow`). * `psb_d_nest_base_setup(nest_op, block_storage, grid_desc, desc_glob, info)` — set up the `psb_d_nest_base_mat` operator (implements the local `csmv`, `get_diag`, `csgetrow`).
@ -211,8 +211,7 @@ A field-split interface (`psb_d_nest_get_block`, `psb_d_nest_get_field_desc`, `p
|------------------------------|----------------| |------------------------------|----------------|
| `psb_d_nest_glob_test` | Square 2×2 operator built with `psb_d_nest_matrix`; the nested `psb_spmm` is compared bit-for-bit against the same matrix assembled monolithically in CSR. | | `psb_d_nest_glob_test` | Square 2×2 operator built with `psb_d_nest_matrix`; the nested `psb_spmm` is compared bit-for-bit against the same matrix assembled monolithically in CSR. |
| `psb_d_nest_rect_test` | Same, with fields of different size (`nV = 2 nQ`) and genuinely **rectangular** off-diagonal blocks. | | `psb_d_nest_rect_test` | Same, with fields of different size (`nV = 2 nQ`) and genuinely **rectangular** off-diagonal blocks. |
| `psb_d_nest_cg_test` | Standard PSBLAS **CG** on an SPD, ill-conditioned operator (1D Laplacian reordered red-black), built on the **low-level path**, solved under every stock preconditioner (`NONE`, `DIAG`, `BJAC`/ILU(0)); requires convergence to machine precision for all of them, and that `DIAG` reproduces the `NONE` iteration count exactly (a bit-precise check of the nested `get_diag`, since the diagonal is the constant `2I`). | | `psb_d_nest_cg_test` | Standard PSBLAS **CG** on an SPD, ill-conditioned operator (1D Laplacian reordered red-black), solved under every stock preconditioner (`NONE`, `DIAG`, `BJAC`/ILU(0)); requires convergence to machine precision for all of them, and that `DIAG` reproduces the `NONE` iteration count exactly (a bit-precise check of the nested `get_diag`, since the diagonal is the constant `2I`). |
| `psb_d_nest_builder_test` | Same CG solve as above but built through the `psb_d_nest_matrix` utility (high-level path). |
All tests run both serially and in parallel, and the result is invariant with respect to the number of MPI processes. All tests run both serially and in parallel, and the result is invariant with respect to the number of MPI processes.
@ -231,7 +230,6 @@ make # builds the executables into ./runs
./runs/psb_d_nest_glob_test # serial ./runs/psb_d_nest_glob_test # serial
mpirun -np 4 ./runs/psb_d_nest_rect_test mpirun -np 4 ./runs/psb_d_nest_rect_test
mpirun -np 4 ./runs/psb_d_nest_cg_test mpirun -np 4 ./runs/psb_d_nest_cg_test
mpirun -np 4 ./runs/psb_d_nest_builder_test
``` ```
Each test prints a single `[PASS]` / `[FAIL]` line (printed by rank 0). Each test prints a single `[PASS]` / `[FAIL]` line (printed by rank 0).

@ -1,210 +0,0 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific prior written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_d_nest_builder_test.F90
!
! Program: psb_d_nest_builder_test
! Author: Simone Staccone (Stack-1)
!
! Same operator as the low-level CG test (1D Laplacian reordered red-black, SPD
! and ill-conditioned) but built with the psb_d_nest_matrix utility: the user
! declares nested_matrix, gives the field sizes, inserts the block values and
! calls asb. All the setup (per-field descriptors, union halo, compose, setup,
! wrap) is handled by the utility. Solved with CG and checked against the
! exact solution.
!
! M = [ 2I C ] C(r,r) = -1 , C(r,r-1) = -1 (the Laplacian edges)
! [ C^T 2I ]
!
! Run: ./psb_d_nest_builder_test ; mpirun -np 4 ./psb_d_nest_builder_test
!
program psb_d_nest_builder_test
use psb_base_mod
use psb_prec_mod
use psb_linsolve_mod
use psb_d_nest_mod ! umbrella: includes psb_d_nest_matrix (builder)
implicit none
type(psb_ctxt_type) :: context
integer(psb_ipk_) :: my_rank, num_procs, info, i_local_row, entry_idx
integer(psb_ipk_) :: field1_local_rows, field2_local_rows
integer(psb_lpk_) :: field1_global_row, field2_global_row, field_size
type(psb_d_nest_matrix) :: nested_matrix ! the only object needed
type(psb_dprec_type) :: preconditioner
type(psb_d_vect_type) :: x_solution, rhs, x_exact
real(psb_dpk_) :: insert_value(1)
integer(psb_lpk_), allocatable :: entry_rows(:), entry_cols(:)
integer(psb_lpk_), allocatable :: field1_rows(:), field2_rows(:)
real(psb_dpk_), allocatable :: entry_vals(:)
real(psb_dpk_) :: stop_tol, final_residual, norm_x_exact, solution_error
integer(psb_ipk_) :: max_iter, n_iter, stop_criterion
real(psb_dpk_), parameter :: solution_tol = 1.0e-6_psb_dpk_
call psb_init(context)
call psb_info(context, my_rank, num_procs)
field_size = 512 ! global size of each field (N = 2*field_size)
stop_tol = 1.0e-9_psb_dpk_
max_iter = 4000
stop_criterion = 2
!---------------------------------------------------------------
! 1) create the nested operator: 2 fields of global size field_size
!---------------------------------------------------------------
call nested_matrix%init(context, [field_size, field_size], info)
if (info /= psb_success_) then
if (my_rank==0) write(*,*) 'FAIL init info=', info; goto 9999
end if
! rows owned by this process in each field
field1_rows = nested_matrix%get_owned_rows(1)
field2_rows = nested_matrix%get_owned_rows(2)
field1_local_rows = size(field1_rows)
field2_local_rows = size(field2_rows)
!---------------------------------------------------------------
! 2) insert the values, one block at a time (owned rows only)
!---------------------------------------------------------------
! block (1,1) = 2I
allocate(entry_rows(field1_local_rows), entry_cols(field1_local_rows), entry_vals(field1_local_rows))
do i_local_row = 1, field1_local_rows
field1_global_row = field1_rows(i_local_row)
entry_rows(i_local_row)=field1_global_row; entry_cols(i_local_row)=field1_global_row
entry_vals(i_local_row)=2.0_psb_dpk_
end do
call nested_matrix%ins(1, 1, field1_local_rows, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! block (2,2) = 2I
allocate(entry_rows(field2_local_rows), entry_cols(field2_local_rows), entry_vals(field2_local_rows))
do i_local_row = 1, field2_local_rows
field2_global_row = field2_rows(i_local_row)
entry_rows(i_local_row)=field2_global_row; entry_cols(i_local_row)=field2_global_row
entry_vals(i_local_row)=2.0_psb_dpk_
end do
call nested_matrix%ins(2, 2, field2_local_rows, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! block (1,2) = C : rows field1, cols field2 ; C(r,r)=-1, C(r,r-1)=-1
allocate(entry_rows(2*field1_local_rows), entry_cols(2*field1_local_rows), entry_vals(2*field1_local_rows))
entry_idx = 0
do i_local_row = 1, field1_local_rows
field1_global_row = field1_rows(i_local_row)
entry_idx = entry_idx + 1
entry_rows(entry_idx) = field1_global_row
entry_cols(entry_idx) = field1_global_row
entry_vals(entry_idx) = -1.0_psb_dpk_
if (field1_global_row > 1) then
entry_idx = entry_idx + 1
entry_rows(entry_idx) = field1_global_row
entry_cols(entry_idx) = field1_global_row - 1_psb_lpk_
entry_vals(entry_idx) = -1.0_psb_dpk_
end if
end do
call nested_matrix%ins(1, 2, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! block (2,1) = C^T : rows field2, cols field1 ; C^T(s,s)=-1, C^T(s,s+1)=-1
allocate(entry_rows(2*field2_local_rows), entry_cols(2*field2_local_rows), entry_vals(2*field2_local_rows))
entry_idx = 0
do i_local_row = 1, field2_local_rows
field2_global_row = field2_rows(i_local_row)
entry_idx = entry_idx + 1
entry_rows(entry_idx) = field2_global_row
entry_cols(entry_idx) = field2_global_row
entry_vals(entry_idx) = -1.0_psb_dpk_
if (field2_global_row < field_size) then
entry_idx = entry_idx + 1
entry_rows(entry_idx) = field2_global_row
entry_cols(entry_idx) = field2_global_row + 1_psb_lpk_
entry_vals(entry_idx) = -1.0_psb_dpk_
end if
end do
call nested_matrix%ins(2, 1, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
!---------------------------------------------------------------
! 3) assemble: from here nested_matrix%a_glob / nested_matrix%desc_glob are ready for Krylov
!---------------------------------------------------------------
call nested_matrix%asb(info)
if (info /= psb_success_) then
if (my_rank==0) write(*,*) 'FAIL asb info=', info; goto 9999
end if
!---------------------------------------------------------------
! 4) consistent RHS x_exact=1, rhs = M*x_exact, then solve with standard CG
!---------------------------------------------------------------
call psb_geall(x_exact, nested_matrix%desc_glob, info)
do i_local_row = 1, nested_matrix%desc_glob%get_local_rows()
call nested_matrix%desc_glob%l2g(i_local_row, field1_global_row, info)
insert_value(1) = 1.0_psb_dpk_
call psb_geins(1, [field1_global_row], insert_value, x_exact, nested_matrix%desc_glob, info)
end do
call psb_geasb(x_exact, nested_matrix%desc_glob, info)
call psb_geall(rhs, nested_matrix%desc_glob, info); call psb_geasb(rhs, nested_matrix%desc_glob, info)
call psb_spmm(done, nested_matrix%a_glob, x_exact, dzero, rhs, nested_matrix%desc_glob, info)
norm_x_exact = psb_genrm2(x_exact, nested_matrix%desc_glob, info)
call preconditioner%init(context, 'NONE', info)
call preconditioner%build(nested_matrix%a_glob, nested_matrix%desc_glob, info)
call psb_geall(x_solution, nested_matrix%desc_glob, info); call psb_geasb(x_solution, nested_matrix%desc_glob, info)
call psb_krylov('CG', nested_matrix%a_glob, preconditioner, rhs, x_solution, stop_tol, nested_matrix%desc_glob, info, &
& itmax=max_iter, iter=n_iter, err=final_residual, istop=stop_criterion)
if (info /= psb_success_) then
if (my_rank==0) write(*,*) 'FAIL krylov info=', info; goto 9999
end if
call psb_geaxpby(-done, x_exact, done, x_solution, nested_matrix%desc_glob, info)
solution_error = psb_genrm2(x_solution, nested_matrix%desc_glob, info) / norm_x_exact
if (my_rank == 0) then
write(*,'(a,i0,a,i0)') ' np=', num_procs, ' N(global)=', 2*field_size
write(*,'(a,i0)') ' CG iterations = ', n_iter
write(*,'(a,es12.4)') ' CG relative residual = ', final_residual
write(*,'(a,es12.4)') ' ||x - x_exact||/||x_ex|| = ', solution_error
if ((n_iter < max_iter) .and. (solution_error <= solution_tol)) then
write(*,*) '[PASS] nested matrix built with the utility, solved with CG'
else
write(*,*) '[FAIL] tol ', solution_tol
end if
end if
call nested_matrix%free(info)
9999 continue
call psb_exit(context)
end program psb_d_nest_builder_test

@ -34,11 +34,12 @@
! Program: psb_d_nest_cg_test ! Program: psb_d_nest_cg_test
! Author: Simone Staccone (Stack-1) ! Author: Simone Staccone (Stack-1)
! !
! Solves a linear system with the GLOBAL nested operator using the standard ! Solves a linear system with the nested operator using the standard PSBLAS CG
! PSBLAS CG (psb_krylov('CG', ...)). This test builds the operator on the ! (psb_krylov('CG', ...)) under every stock one-level preconditioner, to show
! LOW-LEVEL path (per-field descriptors, blocks, compose, setup, wrap) to ! that the nested operator plugs into the PSBLAS preconditioning infrastructure:
! directly validate the machinery the psb_d_nest_matrix utility relies on; the ! NONE (operator only),
! same solve through the utility is in psb_d_nest_builder_test. ! DIAG (exercises the nested get_diag),
! BJAC (ILU(0), exercises the nested csgetrow through the ILU build).
! !
! CG needs a SYMMETRIC POSITIVE DEFINITE operator and, to stress the test ! CG needs a SYMMETRIC POSITIVE DEFINITE operator and, to stress the test
! (hundreds of matvecs), an ILL-CONDITIONED one. We use a real case: the 1D ! (hundreds of matvecs), an ILL-CONDITIONED one. We use a real case: the 1D
@ -54,12 +55,10 @@
! Laplacian up to a permutation: SPD but with lambda_min ~ (pi/m)^2 => cond ~ ! Laplacian up to a permutation: SPD but with lambda_min ~ (pi/m)^2 => cond ~
! N^2 => CG performs O(N) iterations that GROW with N. ! N^2 => CG performs O(N) iterations that GROW with N.
! !
! The system is solved under every stock PSBLAS preconditioner: NONE (operator ! The operator is built with the psb_d_nest_matrix utility. The test passes if
! only), DIAG (exercises the nested get_diag) and BJAC/ILU(0) (exercises the ! every solve converges to the exact solution and DIAG reproduces the NONE
! nested csgetrow through the ILU factorization). The test passes if every ! iteration count exactly (with the constant diagonal 2I, Jacobi is a pure
! solve converges to the exact solution and DIAG reproduces the NONE iteration ! rescaling, so any mismatch would expose a wrong nested get_diag).
! count exactly (with the constant diagonal 2I, Jacobi is a pure rescaling, so
! any mismatch would expose a wrong nested get_diag).
! !
! Run: ./psb_d_nest_cg_test ; mpirun -np 4 ./psb_d_nest_cg_test ! Run: ./psb_d_nest_cg_test ; mpirun -np 4 ./psb_d_nest_cg_test
! !
@ -68,43 +67,32 @@ program psb_d_nest_cg_test
use psb_util_mod use psb_util_mod
use psb_prec_mod use psb_prec_mod
use psb_linsolve_mod use psb_linsolve_mod
use psb_d_nest_mod use psb_d_nest_mod ! umbrella: includes psb_d_nest_matrix (builder)
implicit none implicit none
type(psb_ctxt_type) :: context type(psb_ctxt_type) :: context
integer(psb_ipk_) :: my_rank, num_procs, info, i_local_row, entry_idx, field_local_rows integer(psb_ipk_) :: my_rank, num_procs, info, i_local_row, entry_idx
integer(psb_lpk_) :: field1_global_row, field2_global_row, field_size integer(psb_ipk_) :: field1_local_rows, field2_local_rows
integer(psb_lpk_) :: field1_global_row, field2_global_row, field_size
! per-field descriptors + blocks type(psb_d_nest_matrix) :: nested_matrix
type(psb_desc_type) :: field1_desc, field2_desc type(psb_dprec_type) :: preconditioner
type(psb_dspmat_type) :: diag_block1, coupling_12, coupling_21, diag_block2 type(psb_d_vect_type) :: x_solution, rhs, x_exact
! nested storage + grid descriptor + composed global path integer(psb_lpk_), allocatable :: entry_rows(:), entry_cols(:)
type(psb_d_nest_sparse_mat) :: block_storage integer(psb_lpk_), allocatable :: field1_rows(:), field2_rows(:)
type(psb_desc_nest_type) :: grid_desc real(psb_dpk_), allocatable :: entry_vals(:)
type(psb_desc_type) :: desc_global
type(psb_d_nest_base_mat) :: nest_operator
type(psb_dspmat_type) :: global_operator
! preconditioner + vectors
type(psb_dprec_type) :: preconditioner
type(psb_d_vect_type) :: x_solution, rhs, x_exact
real(psb_dpk_) :: insert_value(1)
! global triplets for the coupling blocks
integer(psb_lpk_), allocatable :: entry_rows(:), entry_cols(:)
real(psb_dpk_), allocatable :: entry_vals(:)
! solver parameters ! solver parameters
real(psb_dpk_) :: diag_value, stop_tol, final_residual, norm_x_exact, solution_error real(psb_dpk_) :: diag_value, stop_tol, final_residual, norm_x_exact, solution_error
integer(psb_ipk_) :: max_iter, trace_level, n_iter, stop_criterion integer(psb_ipk_) :: max_iter, trace_level, n_iter, stop_criterion
real(psb_dpk_), parameter :: solution_tol = 1.0e-6_psb_dpk_ real(psb_dpk_), parameter :: solution_tol = 1.0e-6_psb_dpk_
! stock preconditioners to exercise on the nested operator ! stock preconditioners to exercise on the nested operator
integer(psb_ipk_), parameter :: n_precs = 3 integer(psb_ipk_), parameter :: n_precs = 3
character(len=6), parameter :: prec_names(n_precs) = ['NONE ', 'DIAG ', 'BJAC '] character(len=6), parameter :: prec_names(n_precs) = ['NONE ', 'DIAG ', 'BJAC ']
integer(psb_ipk_) :: i_prec, iter_none, iter_diag integer(psb_ipk_) :: i_prec, iter_none, iter_diag
logical :: all_passed logical :: all_passed
call psb_init(context) call psb_init(context)
call psb_info(context, my_rank, num_procs) call psb_info(context, my_rank, num_procs)
@ -117,63 +105,47 @@ program psb_d_nest_cg_test
stop_criterion = 2 ! stop on the relative residual stop_criterion = 2 ! stop on the relative residual
!--------------------------------------------------------------- !---------------------------------------------------------------
! 1) per-field descriptors: block distribution of field_size global rows ! 1) create the nested operator: 2 fields of global size field_size
! over num_procs processes (total size independent of num_procs)
!--------------------------------------------------------------- !---------------------------------------------------------------
field_local_rows = int(field_size / int(num_procs, psb_lpk_), psb_ipk_) call nested_matrix%init(context, [field_size, field_size], info)
if (int(my_rank, psb_lpk_) < mod(field_size, int(num_procs, psb_lpk_))) & if (info /= psb_success_) then
& field_local_rows = field_local_rows + 1 if (my_rank==0) write(*,*) 'FAIL: nested_matrix%init info=', info; goto 9999
call psb_cdall(context, field1_desc, info, nl=field_local_rows) end if
call psb_cdall(context, field2_desc, info, nl=field_local_rows) field1_rows = nested_matrix%get_owned_rows(1)
field2_rows = nested_matrix%get_owned_rows(2)
field1_local_rows = size(field1_rows)
field2_local_rows = size(field2_rows)
!--------------------------------------------------------------- !---------------------------------------------------------------
! 2) diagonal blocks A = B = diag*I (odd/even nodes of the red-black ! 2) insert the blocks (owned rows only)
! reordered Laplacian are not adjacent to each other)
!--------------------------------------------------------------- !---------------------------------------------------------------
call psb_spall(diag_block1, field1_desc, info, nnz=field1_desc%get_local_rows()) ! block (1,1) = diag*I
call psb_spall(diag_block2, field2_desc, info, nnz=field2_desc%get_local_rows()) allocate(entry_rows(field1_local_rows), entry_cols(field1_local_rows), entry_vals(field1_local_rows))
do i_local_row = 1, field1_local_rows
do i_local_row = 1, field1_desc%get_local_rows() field1_global_row = field1_rows(i_local_row)
call field1_desc%l2g(i_local_row, field1_global_row, info) entry_rows(i_local_row) = field1_global_row
insert_value(1) = diag_value entry_cols(i_local_row) = field1_global_row
call psb_spins(1,[field1_global_row],[field1_global_row],insert_value,diag_block1,field1_desc,info) entry_vals(i_local_row) = diag_value
end do
do i_local_row = 1, field2_desc%get_local_rows()
call field2_desc%l2g(i_local_row, field2_global_row, info)
insert_value(1) = diag_value
call psb_spins(1,[field2_global_row],[field2_global_row],insert_value,diag_block2,field2_desc,info)
end do end do
call nested_matrix%ins(1, 1, field1_local_rows, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
!--------------------------------------------------------------- ! block (2,2) = diag*I
! 3) register, in the union halo, the cross-field columns of the coupling blocks allocate(entry_rows(field2_local_rows), entry_cols(field2_local_rows), entry_vals(field2_local_rows))
! C (row field1, col field2): columns {r, r-1} in field2 -> into field2_desc do i_local_row = 1, field2_local_rows
! C^T (row field2, col field1): columns {s, s+1} in field1 -> into field1_desc field2_global_row = field2_rows(i_local_row)
!--------------------------------------------------------------- entry_rows(i_local_row) = field2_global_row
do i_local_row = 1, field1_desc%get_local_rows() entry_cols(i_local_row) = field2_global_row
call field1_desc%l2g(i_local_row, field1_global_row, info) entry_vals(i_local_row) = diag_value
call psb_cdins(1, [field1_global_row], field2_desc, info)
if (field1_global_row > 1) call psb_cdins(1, [field1_global_row-1_psb_lpk_], field2_desc, info)
end do end do
do i_local_row = 1, field2_desc%get_local_rows() call nested_matrix%ins(2, 2, field2_local_rows, entry_rows, entry_cols, entry_vals, info)
call field2_desc%l2g(i_local_row, field2_global_row, info) deallocate(entry_rows, entry_cols, entry_vals)
call psb_cdins(1, [field2_global_row], field1_desc, info)
if (field2_global_row < field_size) call psb_cdins(1, [field2_global_row+1_psb_lpk_], field1_desc, info)
end do
call psb_cdasb(field1_desc, info)
call psb_cdasb(field2_desc, info)
call psb_spasb(diag_block1, field1_desc, info, dupl=psb_dupl_add_)
call psb_spasb(diag_block2, field2_desc, info, dupl=psb_dupl_add_)
!--------------------------------------------------------------- ! block (1,2) = C : rows field1, cols field2 ; C(r,r)=-1, C(r,r-1)=-1
! 4) coupling C (1,2): rows field1 (field1_desc), columns field2 (field2_desc) allocate(entry_rows(2*field1_local_rows), entry_cols(2*field1_local_rows), entry_vals(2*field1_local_rows))
! C(r,r) = -1 , C(r,r-1) = -1 (odd node 2r-1 -> even nodes 2r and 2r-2)
!---------------------------------------------------------------
allocate(entry_rows(2*field1_desc%get_local_rows()), entry_cols(2*field1_desc%get_local_rows()), &
& entry_vals(2*field1_desc%get_local_rows()))
entry_idx = 0 entry_idx = 0
do i_local_row = 1, field1_desc%get_local_rows() do i_local_row = 1, field1_local_rows
call field1_desc%l2g(i_local_row, field1_global_row, info) field1_global_row = field1_rows(i_local_row)
entry_idx = entry_idx + 1 entry_idx = entry_idx + 1
entry_rows(entry_idx) = field1_global_row entry_rows(entry_idx) = field1_global_row
entry_cols(entry_idx) = field1_global_row entry_cols(entry_idx) = field1_global_row
@ -185,19 +157,14 @@ program psb_d_nest_cg_test
entry_vals(entry_idx) = -1.0_psb_dpk_ entry_vals(entry_idx) = -1.0_psb_dpk_
end if end if
end do end do
call psb_d_nest_rect_block(coupling_12, entry_idx, entry_rows, entry_cols, entry_vals, field1_desc, field2_desc, info) call nested_matrix%ins(1, 2, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals) deallocate(entry_rows, entry_cols, entry_vals)
!--------------------------------------------------------------- ! block (2,1) = C^T : rows field2, cols field1 ; C^T(s,s)=-1, C^T(s,s+1)=-1
! 5) coupling C^T (2,1) = exact transpose of C: allocate(entry_rows(2*field2_local_rows), entry_cols(2*field2_local_rows), entry_vals(2*field2_local_rows))
! rows field2 (field2_desc), columns field1 (field1_desc)
! C^T(s,s) = -1 , C^T(s,s+1) = -1 (even node 2s -> odd nodes 2s-1 and 2s+1)
!---------------------------------------------------------------
allocate(entry_rows(2*field2_desc%get_local_rows()), entry_cols(2*field2_desc%get_local_rows()), &
& entry_vals(2*field2_desc%get_local_rows()))
entry_idx = 0 entry_idx = 0
do i_local_row = 1, field2_desc%get_local_rows() do i_local_row = 1, field2_local_rows
call field2_desc%l2g(i_local_row, field2_global_row, info) field2_global_row = field2_rows(i_local_row)
entry_idx = entry_idx + 1 entry_idx = entry_idx + 1
entry_rows(entry_idx) = field2_global_row entry_rows(entry_idx) = field2_global_row
entry_cols(entry_idx) = field2_global_row entry_cols(entry_idx) = field2_global_row
@ -209,70 +176,34 @@ program psb_d_nest_cg_test
entry_vals(entry_idx) = -1.0_psb_dpk_ entry_vals(entry_idx) = -1.0_psb_dpk_
end if end if
end do end do
call psb_d_nest_rect_block(coupling_21, entry_idx, entry_rows, entry_cols, entry_vals, field2_desc, field1_desc, info) call nested_matrix%ins(2, 1, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals) deallocate(entry_rows, entry_cols, entry_vals)
!--------------------------------------------------------------- !---------------------------------------------------------------
! 6) nested grid (all four blocks present) ! 3) assemble: nested_matrix%a_glob / nested_matrix%desc_glob are ready for Krylov
!--------------------------------------------------------------- !---------------------------------------------------------------
block_storage%nrblocks = 2 call nested_matrix%asb(info)
block_storage%ncblocks = 2
allocate(block_storage%mats(2,2))
call psb_move_alloc(diag_block1, block_storage%mats(1,1), info)
call psb_move_alloc(coupling_12, block_storage%mats(1,2), info)
call psb_move_alloc(coupling_21, block_storage%mats(2,1), info)
call psb_move_alloc(diag_block2, block_storage%mats(2,2), info)
grid_desc%nrblocks = 2
grid_desc%ncblocks = 2
allocate(grid_desc%descs(2,2))
call field1_desc%clone(grid_desc%descs(1,1), info)
call field2_desc%clone(grid_desc%descs(1,2), info)
call field1_desc%clone(grid_desc%descs(2,1), info)
call field2_desc%clone(grid_desc%descs(2,2), info)
!---------------------------------------------------------------
! 7) composed global operator (what CG will use as its matrix)
!---------------------------------------------------------------
call psb_cd_nest_compose(grid_desc, desc_global, info)
if (info /= psb_success_) then if (info /= psb_success_) then
if (my_rank == 0) write(*,*) 'FAIL: psb_cd_nest_compose info=', info if (my_rank==0) write(*,*) 'FAIL: nested_matrix%asb info=', info; goto 9999
goto 9999
end if end if
call psb_d_nest_base_setup(nest_operator, block_storage, grid_desc, desc_global, info)
if (info /= psb_success_) then
if (my_rank == 0) write(*,*) 'FAIL: psb_d_nest_base_setup info=', info
goto 9999
end if
allocate(global_operator%a, source=nest_operator)
call global_operator%set_nrows(desc_global%get_local_rows())
call global_operator%set_ncols(desc_global%get_local_cols())
call global_operator%set_asb()
!--------------------------------------------------------------- !---------------------------------------------------------------
! 8) consistent RHS: x_exact = 1, rhs = M * x_exact (via the nested operator) ! 4) consistent RHS: x_exact = 1, rhs = M * x_exact (via the nested operator)
!--------------------------------------------------------------- !---------------------------------------------------------------
call psb_geall(x_exact, desc_global, info) call psb_geall(x_exact, nested_matrix%desc_glob, info)
do i_local_row = 1, desc_global%get_local_rows() call psb_geasb(x_exact, nested_matrix%desc_glob, info)
call desc_global%l2g(i_local_row, field1_global_row, info) call x_exact%set(done) ! x_exact = 1 everywhere
insert_value(1) = 1.0_psb_dpk_
call psb_geins(1, [field1_global_row], insert_value, x_exact, desc_global, info)
end do
call psb_geasb(x_exact, desc_global, info)
call psb_geall(rhs, desc_global, info); call psb_geasb(rhs, desc_global, info) call psb_geall(rhs, nested_matrix%desc_glob, info); call psb_geasb(rhs, nested_matrix%desc_glob, info)
call psb_spmm(done, global_operator, x_exact, dzero, rhs, desc_global, info) call psb_spmm(done, nested_matrix%a_glob, x_exact, dzero, rhs, nested_matrix%desc_glob, info)
if (info /= psb_success_) then if (info /= psb_success_) then
if (my_rank == 0) write(*,*) 'FAIL: psb_spmm (RHS) info=', info if (my_rank == 0) write(*,*) 'FAIL: psb_spmm (RHS) info=', info
goto 9999 goto 9999
end if end if
norm_x_exact = psb_genrm2(x_exact, nested_matrix%desc_glob, info)
norm_x_exact = psb_genrm2(x_exact, desc_global, info)
!--------------------------------------------------------------- !---------------------------------------------------------------
! 9) solve with the standard PSBLAS CG under every stock preconditioner: ! 5) solve with the standard PSBLAS CG under every stock preconditioner
! NONE (operator only), DIAG (exercises the nested get_diag),
! BJAC/ILU(0) (exercises the nested csgetrow through the ILU build)
!--------------------------------------------------------------- !---------------------------------------------------------------
if (my_rank == 0) write(*,'(a,i0,a,i0)') ' np=', num_procs, ' N(global)=', 2*field_size if (my_rank == 0) write(*,'(a,i0,a,i0)') ' np=', num_procs, ' N(global)=', 2*field_size
all_passed = .true. all_passed = .true.
@ -280,14 +211,16 @@ program psb_d_nest_cg_test
iter_diag = -1 iter_diag = -1
do i_prec = 1, n_precs do i_prec = 1, n_precs
call preconditioner%init(context, trim(prec_names(i_prec)), info) call preconditioner%init(context, trim(prec_names(i_prec)), info)
call preconditioner%build(global_operator, desc_global, info) call preconditioner%build(nested_matrix%a_glob, nested_matrix%desc_glob, info)
if (info /= psb_success_) then if (info /= psb_success_) then
if (my_rank == 0) write(*,*) 'FAIL: prec%build (', trim(prec_names(i_prec)), ') info=', info if (my_rank == 0) write(*,*) 'FAIL: prec%build (', trim(prec_names(i_prec)), ') info=', info
all_passed = .false.; exit all_passed = .false.; exit
end if end if
call psb_geall(x_solution, desc_global, info); call psb_geasb(x_solution, desc_global, info) call psb_geall(x_solution, nested_matrix%desc_glob, info)
call psb_krylov('CG', global_operator, preconditioner, rhs, x_solution, stop_tol, desc_global, info, & call psb_geasb(x_solution, nested_matrix%desc_glob, info)
call psb_krylov('CG', nested_matrix%a_glob, preconditioner, rhs, x_solution, stop_tol, &
& nested_matrix%desc_glob, info, &
& itmax=max_iter, iter=n_iter, err=final_residual, itrace=trace_level, istop=stop_criterion) & itmax=max_iter, iter=n_iter, err=final_residual, itrace=trace_level, istop=stop_criterion)
if (info /= psb_success_) then if (info /= psb_success_) then
if (my_rank == 0) write(*,*) 'FAIL: psb_krylov(CG,', trim(prec_names(i_prec)), ') info=', info if (my_rank == 0) write(*,*) 'FAIL: psb_krylov(CG,', trim(prec_names(i_prec)), ') info=', info
@ -295,8 +228,8 @@ program psb_d_nest_cg_test
end if end if
! solution error: || x_solution - x_exact || / || x_exact || ! solution error: || x_solution - x_exact || / || x_exact ||
call psb_geaxpby(-done, x_exact, done, x_solution, desc_global, info) call psb_geaxpby(-done, x_exact, done, x_solution, nested_matrix%desc_glob, info)
solution_error = psb_genrm2(x_solution, desc_global, info) / norm_x_exact solution_error = psb_genrm2(x_solution, nested_matrix%desc_glob, info) / norm_x_exact
if (my_rank == 0) then if (my_rank == 0) then
write(*,'(a,a6,a,i6,a,es12.4,a,es12.4)') ' prec=', prec_names(i_prec), & write(*,'(a,a6,a,i6,a,es12.4,a,es12.4)') ' prec=', prec_names(i_prec), &
@ -307,27 +240,25 @@ program psb_d_nest_cg_test
if (trim(prec_names(i_prec)) == 'NONE') iter_none = n_iter if (trim(prec_names(i_prec)) == 'NONE') iter_none = n_iter
if (trim(prec_names(i_prec)) == 'DIAG') iter_diag = n_iter if (trim(prec_names(i_prec)) == 'DIAG') iter_diag = n_iter
call psb_gefree(x_solution, desc_global, info) call psb_gefree(x_solution, nested_matrix%desc_glob, info)
call preconditioner%free(info) call preconditioner%free(info)
end do end do
!--------------------------------------------------------------- !---------------------------------------------------------------
! 10) verdict: every preconditioner converges to the right solution. ! 6) verdict: every preconditioner converges to the right solution, and DIAG
! With the constant diagonal 2I, Jacobi is a pure rescaling, so DIAG ! reproduces the NONE iteration count exactly (Jacobi on the constant
! must reproduce the unpreconditioned iteration count EXACTLY: this is ! diagonal 2I is a pure rescaling -> exactness check of the nested get_diag)
! a bit-precise check that the nested get_diag returns exact values.
! (BJAC/ILU(0) on a red-black ordering drops all fill, so it cannot
! reduce the iteration count of this exact-convergence regime; its
! much smaller final residual shows the ILU factors are consistent.)
!--------------------------------------------------------------- !---------------------------------------------------------------
if (my_rank == 0) then if (my_rank == 0) then
if (all_passed .and. (iter_diag == iter_none)) then if (all_passed .and. (iter_diag == iter_none)) then
write(*,*) '[PASS] CG converges on the global nested operator with NONE/DIAG/BJAC' write(*,*) '[PASS] CG converges on the nested operator with NONE/DIAG/BJAC'
else else
write(*,*) '[FAIL] preconditioned CG on the nested operator (tol ', solution_tol, ')' write(*,*) '[FAIL] preconditioned CG on the nested operator (tol ', solution_tol, ')'
end if end if
end if end if
call nested_matrix%free(info)
9999 continue 9999 continue
call psb_exit(context) call psb_exit(context)

Loading…
Cancel
Save