Merge branch 'nested_matrix_type' into communication_v2

communication_v2
Stack-1 1 day ago
commit 4106f44331

1
.gitignore vendored

@ -8,6 +8,7 @@
*.out
*.err
*.csv
*.log
leonardo_comm_script.sh
# header files generated

@ -98,6 +98,7 @@ clean: cleanlib
cleantest:
cd test/fileread && $(MAKE) clean
cd test/pdegen && $(MAKE) clean
cd test/nested && $(MAKE) clean
cd test/util && $(MAKE) clean
cleanlib:

@ -567,6 +567,15 @@ set(PSB_base_source_files
modules/tools/psb_m_tools_a_mod.f90
modules/tools/psb_cd_tools_mod.F90
modules/tools/psb_d_tools_mod.F90
modules/tools/psb_cd_nest_tools_mod.F90
modules/tools/psb_s_nest_tools_mod.F90
modules/tools/psb_d_nest_tools_mod.F90
modules/tools/psb_c_nest_tools_mod.F90
modules/tools/psb_z_nest_tools_mod.F90
modules/tools/psb_s_nest_builder_mod.F90
modules/tools/psb_d_nest_builder_mod.F90
modules/tools/psb_c_nest_builder_mod.F90
modules/tools/psb_z_nest_builder_mod.F90
modules/tools/psb_c_tools_mod.F90
modules/tools/psb_e_tools_a_mod.f90
modules/tools/psb_i2_tools_a_mod.f90
@ -638,6 +647,19 @@ set(PSB_base_source_files
modules/desc/psb_hash_map_mod.F90
modules/desc/psb_glist_map_mod.F90
modules/psb_base_mod.f90
modules/desc/psb_desc_nest_mod.f90
modules/serial/psb_s_nest_mat_mod.f90
modules/serial/psb_d_nest_mat_mod.f90
modules/serial/psb_c_nest_mat_mod.f90
modules/serial/psb_z_nest_mat_mod.f90
modules/serial/psb_s_nest_base_mat_mod.F90
modules/serial/psb_d_nest_base_mat_mod.F90
modules/serial/psb_c_nest_base_mat_mod.F90
modules/serial/psb_z_nest_base_mat_mod.F90
modules/psb_s_nest_mod.f90
modules/psb_d_nest_mod.f90
modules/psb_c_nest_mod.f90
modules/psb_z_nest_mod.f90
)
foreach(file IN LISTS PSB_base_source_files)
list(APPEND base_source_files ${CMAKE_CURRENT_LIST_DIR}/${file})

@ -36,6 +36,7 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \
serial/psb_i2_base_vect_mod.o serial/psb_i2_vect_mod.o\
serial/psb_i_base_vect_mod.o serial/psb_i_vect_mod.o\
serial/psb_l_base_vect_mod.o serial/psb_l_vect_mod.o\
serial/psb_i2_base_vect_mod.o serial/psb_i2_vect_mod.o\
serial/psb_d_base_vect_mod.o serial/psb_d_vect_mod.o\
serial/psb_s_base_vect_mod.o serial/psb_s_vect_mod.o\
serial/psb_c_base_vect_mod.o serial/psb_c_vect_mod.o\
@ -82,7 +83,11 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \
serial/psb_s_base_mat_mod.o serial/psb_s_csr_mat_mod.o serial/psb_s_csc_mat_mod.o serial/psb_s_mat_mod.o \
serial/psb_d_base_mat_mod.o serial/psb_d_csr_mat_mod.o serial/psb_d_csc_mat_mod.o serial/psb_d_mat_mod.o \
serial/psb_c_base_mat_mod.o serial/psb_c_csr_mat_mod.o serial/psb_c_csc_mat_mod.o serial/psb_c_mat_mod.o \
serial/psb_z_base_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_z_csc_mat_mod.o serial/psb_z_mat_mod.o
serial/psb_z_base_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_z_csc_mat_mod.o serial/psb_z_mat_mod.o \
serial/psb_s_nest_mat_mod.o serial/psb_s_nest_base_mat_mod.o \
serial/psb_d_nest_mat_mod.o serial/psb_d_nest_base_mat_mod.o \
serial/psb_c_nest_mat_mod.o serial/psb_c_nest_base_mat_mod.o \
serial/psb_z_nest_mat_mod.o serial/psb_z_nest_base_mat_mod.o
#\
# serial/psb_ls_csr_mat_mod.o serial/psb_ld_csr_mat_mod.o serial/psb_lc_csr_mat_mod.o serial/psb_lz_csr_mat_mod.o
#\
@ -91,10 +96,15 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \
UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\
desc/psb_gen_block_map_mod.o desc/psb_list_map_mod.o desc/psb_repl_map_mod.o\
desc/psb_glist_map_mod.o desc/psb_hash_map_mod.o desc/psb_hashval.o \
desc/psb_desc_mod.o auxil/psb_sort_mod.o \
desc/psb_desc_mod.o desc/psb_desc_nest_mod.o auxil/psb_sort_mod.o \
tools/psb_cd_tools_mod.o \
tools/psb_cd_nest_tools_mod.o \
tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \
tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o\
tools/psb_s_nest_tools_mod.o tools/psb_d_nest_tools_mod.o \
tools/psb_c_nest_tools_mod.o tools/psb_z_nest_tools_mod.o \
tools/psb_s_nest_builder_mod.o tools/psb_d_nest_builder_mod.o \
tools/psb_c_nest_builder_mod.o tools/psb_z_nest_builder_mod.o \
tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \
tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \
tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\
@ -128,7 +138,7 @@ UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\
MODULES=$(BASIC_MODS) $(SERIAL_MODS) $(UTIL_MODS)
OBJS = error.o psb_base_mod.o $(EXTRA_COBJS) cutil.o
OBJS = error.o psb_base_mod.o psb_s_nest_mod.o psb_d_nest_mod.o psb_c_nest_mod.o psb_z_nest_mod.o $(EXTRA_COBJS) cutil.o
MODDIR=../../modules
INCDIR=../../include
LIBDIR=../
@ -413,12 +423,55 @@ comm/psi_s_comm_a_mod.o comm/psi_d_comm_a_mod.o \
comm/psi_c_comm_a_mod.o comm/psi_z_comm_a_mod.o: desc/psb_desc_mod.o
tools/psb_tools_mod.o: tools/psb_cd_tools_mod.o tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o\
tools/psb_cd_nest_tools_mod.o \
tools/psb_s_nest_tools_mod.o tools/psb_d_nest_tools_mod.o \
tools/psb_c_nest_tools_mod.o tools/psb_z_nest_tools_mod.o \
tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \
tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \
tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \
tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\
tools/psb_c_tools_a_mod.o tools/psb_z_tools_a_mod.o
tools/psb_cd_nest_tools_mod.o: tools/psb_cd_nest_tools_mod.F90 tools/psb_cd_tools_mod.o desc/psb_desc_nest_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_cd_nest_tools_mod.F90 -o tools/psb_cd_nest_tools_mod.o
tools/psb_d_nest_tools_mod.o: tools/psb_d_nest_tools_mod.F90 tools/psb_d_tools_mod.o \
desc/psb_desc_nest_mod.o serial/psb_d_nest_mat_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_d_nest_tools_mod.F90 -o tools/psb_d_nest_tools_mod.o
tools/psb_d_nest_builder_mod.o: tools/psb_d_nest_builder_mod.F90 tools/psb_cd_tools_mod.o \
tools/psb_cd_nest_tools_mod.o \
tools/psb_s_nest_tools_mod.o tools/psb_d_nest_tools_mod.o \
tools/psb_c_nest_tools_mod.o tools/psb_z_nest_tools_mod.o \
serial/psb_d_nest_base_mat_mod.o serial/psb_d_nest_mat_mod.o desc/psb_desc_nest_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_d_nest_builder_mod.F90 -o tools/psb_d_nest_builder_mod.o
tools/psb_s_nest_tools_mod.o: tools/psb_s_nest_tools_mod.F90 tools/psb_s_tools_mod.o \
desc/psb_desc_nest_mod.o serial/psb_s_nest_mat_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_s_nest_tools_mod.F90 -o tools/psb_s_nest_tools_mod.o
tools/psb_s_nest_builder_mod.o: tools/psb_s_nest_builder_mod.F90 tools/psb_cd_tools_mod.o \
tools/psb_cd_nest_tools_mod.o tools/psb_s_nest_tools_mod.o \
serial/psb_s_nest_base_mat_mod.o serial/psb_s_nest_mat_mod.o desc/psb_desc_nest_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_s_nest_builder_mod.F90 -o tools/psb_s_nest_builder_mod.o
tools/psb_c_nest_tools_mod.o: tools/psb_c_nest_tools_mod.F90 tools/psb_c_tools_mod.o \
desc/psb_desc_nest_mod.o serial/psb_c_nest_mat_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_c_nest_tools_mod.F90 -o tools/psb_c_nest_tools_mod.o
tools/psb_c_nest_builder_mod.o: tools/psb_c_nest_builder_mod.F90 tools/psb_cd_tools_mod.o \
tools/psb_cd_nest_tools_mod.o tools/psb_c_nest_tools_mod.o \
serial/psb_c_nest_base_mat_mod.o serial/psb_c_nest_mat_mod.o desc/psb_desc_nest_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_c_nest_builder_mod.F90 -o tools/psb_c_nest_builder_mod.o
tools/psb_z_nest_tools_mod.o: tools/psb_z_nest_tools_mod.F90 tools/psb_z_tools_mod.o \
desc/psb_desc_nest_mod.o serial/psb_z_nest_mat_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_z_nest_tools_mod.F90 -o tools/psb_z_nest_tools_mod.o
tools/psb_z_nest_builder_mod.o: tools/psb_z_nest_builder_mod.F90 tools/psb_cd_tools_mod.o \
tools/psb_cd_nest_tools_mod.o tools/psb_z_nest_tools_mod.o \
serial/psb_z_nest_base_mat_mod.o serial/psb_z_nest_mat_mod.o desc/psb_desc_nest_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_z_nest_builder_mod.F90 -o tools/psb_z_nest_builder_mod.o
tools/psb_cd_tools_mod.o tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \
tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o \
@ -441,6 +494,48 @@ psblas/psb_z_psblas_mod.o: serial/psb_z_vect_mod.o serial/psb_z_mat_mod.o
psblas/psb_psblas_mod.o: psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o
psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o: serial/psb_mat_mod.o desc/psb_desc_mod.o
# --- nested mat/desc dependencies (MATNEST) ---
serial/psb_s_nest_mat_mod.o: serial/psb_s_mat_mod.o
serial/psb_s_nest_base_mat_mod.o: serial/psb_s_nest_mat_mod.o desc/psb_desc_nest_mod.o serial/psb_s_base_mat_mod.o serial/psb_s_mat_mod.o desc/psb_desc_mod.o \
serial/psb_i_vect_mod.o serial/psb_s_base_vect_mod.o
psb_s_nest_mod.o: \
desc/psb_desc_nest_mod.o \
serial/psb_s_nest_mat_mod.o \
serial/psb_s_nest_base_mat_mod.o \
tools/psb_cd_nest_tools_mod.o \
tools/psb_s_nest_tools_mod.o \
tools/psb_s_nest_builder_mod.o
serial/psb_c_nest_mat_mod.o: serial/psb_c_mat_mod.o
serial/psb_c_nest_base_mat_mod.o: serial/psb_c_nest_mat_mod.o desc/psb_desc_nest_mod.o serial/psb_c_base_mat_mod.o serial/psb_c_mat_mod.o desc/psb_desc_mod.o \
serial/psb_i_vect_mod.o serial/psb_c_base_vect_mod.o
psb_c_nest_mod.o: \
desc/psb_desc_nest_mod.o \
serial/psb_c_nest_mat_mod.o \
serial/psb_c_nest_base_mat_mod.o \
tools/psb_cd_nest_tools_mod.o \
tools/psb_c_nest_tools_mod.o \
tools/psb_c_nest_builder_mod.o
serial/psb_z_nest_mat_mod.o: serial/psb_z_mat_mod.o
serial/psb_z_nest_base_mat_mod.o: serial/psb_z_nest_mat_mod.o desc/psb_desc_nest_mod.o serial/psb_z_base_mat_mod.o serial/psb_z_mat_mod.o desc/psb_desc_mod.o \
serial/psb_i_vect_mod.o serial/psb_z_base_vect_mod.o
psb_z_nest_mod.o: \
desc/psb_desc_nest_mod.o \
serial/psb_z_nest_mat_mod.o \
serial/psb_z_nest_base_mat_mod.o \
tools/psb_cd_nest_tools_mod.o \
tools/psb_z_nest_tools_mod.o \
tools/psb_z_nest_builder_mod.o
desc/psb_desc_nest_mod.o: desc/psb_desc_mod.o
serial/psb_d_nest_mat_mod.o: serial/psb_d_mat_mod.o
serial/psb_d_nest_base_mat_mod.o: serial/psb_d_nest_mat_mod.o desc/psb_desc_nest_mod.o serial/psb_d_base_mat_mod.o serial/psb_d_mat_mod.o desc/psb_desc_mod.o \
serial/psb_i_vect_mod.o serial/psb_d_base_vect_mod.o
psb_d_nest_mod.o: \
desc/psb_desc_nest_mod.o \
serial/psb_d_nest_mat_mod.o \
serial/psb_d_nest_base_mat_mod.o \
tools/psb_cd_nest_tools_mod.o \
tools/psb_d_nest_tools_mod.o \
tools/psb_d_nest_builder_mod.o
psb_base_mod.o: $(MODULES)

@ -0,0 +1,161 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_desc_nest_mod
! Author: Simone Staccone (Stack-1)
!
! Defines psb_desc_nest_type: a 2-D array of psb_desc_type objects,
! one per block matrix entry in an nrblocks x ncblocks block system.
!
!
module psb_desc_nest_mod
use psb_desc_mod
use psb_error_mod
implicit none
type :: psb_desc_nest_type
integer(psb_ipk_) :: nrblocks = 0
integer(psb_ipk_) :: ncblocks = 0
type(psb_desc_type), allocatable :: descs(:,:)
contains
procedure :: get_nrblocks => psb_desc_nest_get_nrblocks
procedure :: get_ncblocks => psb_desc_nest_get_ncblocks
procedure :: get_desc => psb_desc_nest_get_desc
procedure :: is_valid => psb_desc_nest_is_valid
procedure :: sizeof => psb_desc_nest_sizeof
procedure :: free => psb_desc_nest_free
end type psb_desc_nest_type
contains
! get_nrblocks / get_ncblocks
function psb_desc_nest_get_nrblocks(d) result(nb)
class(psb_desc_nest_type), intent(in) :: d
integer(psb_ipk_) :: nb
nb = d%nrblocks
end function psb_desc_nest_get_nrblocks
function psb_desc_nest_get_ncblocks(d) result(nb)
class(psb_desc_nest_type), intent(in) :: d
integer(psb_ipk_) :: nb
nb = d%ncblocks
end function psb_desc_nest_get_ncblocks
! get_desc: copy descriptor (i,j) into the output argument
subroutine psb_desc_nest_get_desc(d, i_block_row, j_block_col, desc, info)
class(psb_desc_nest_type), intent(in) :: d
integer(psb_ipk_), intent(in) :: i_block_row, j_block_col
type(psb_desc_type), intent(out):: desc
integer(psb_ipk_), intent(out):: info
character(len=64) :: name
info = 0
name = 'psb_desc_nest_get_desc'
if (i_block_row < 1 .or. i_block_row > d%nrblocks .or. &
& j_block_col < 1 .or. j_block_col > d%ncblocks) then
info = -1
call psb_errpush(info, name, a_err='Invalid block indices')
return
end if
desc = d%descs(i_block_row,j_block_col)
end subroutine psb_desc_nest_get_desc
! is_valid: true if the per-column descriptors used by the kernel are valid.
! The previous version only checked the diagonal descs(i,i), which is
! wrong for saddle-point systems where the (2,2) block (and hence its
! diagonal descriptor) is absent. The nested halo relies on the per-column
! descriptors descs(1,j) (all descs(i,j) for fixed j are equivalent), so we
! validate those instead of the diagonal.
function psb_desc_nest_is_valid(d) result(valid)
class(psb_desc_nest_type), intent(in) :: d
logical :: valid
integer(psb_ipk_) :: j_block_col, info
character(len=64) :: name
character(len=20) :: colid
name = 'psb_desc_nest_is_valid'
info = 0
valid = (d%nrblocks >= 1) .and. (d%ncblocks >= 1) .and. allocated(d%descs)
if (valid) then
do j_block_col = 1, d%ncblocks
if (.not. d%descs(1,j_block_col)%is_valid()) then
valid = .false.
info = -1
call psb_errpush(info, name, a_err='Invalid descriptor in column '//trim(colid))
return
end if
end do
end if
end function psb_desc_nest_is_valid
! sizeof: total memory (bytes) of all sub-descriptors
function psb_desc_nest_sizeof(d) result(total_bytes)
class(psb_desc_nest_type), intent(in) :: d
integer(psb_epk_) :: total_bytes
integer(psb_ipk_) :: i_block_row, j_block_col
total_bytes = 0_psb_epk_
if (allocated(d%descs)) then
do j_block_col = 1, d%ncblocks
do i_block_row = 1, d%nrblocks
total_bytes = total_bytes + d%descs(i_block_row,j_block_col)%sizeof()
end do
end do
end if
end function psb_desc_nest_sizeof
! free: release all sub-descriptors and reset
subroutine psb_desc_nest_free(d, info)
class(psb_desc_nest_type), intent(inout) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
info = 0
if (allocated(d%descs)) then
do j_block_col = 1, d%ncblocks
do i_block_row = 1, d%nrblocks
call d%descs(i_block_row,j_block_col)%free(local_info)
if (local_info /= 0 .and. info == 0) info = local_info
end do
end do
deallocate(d%descs, stat=local_info)
if (local_info /= 0 .and. info == 0) info = local_info
end if
d%nrblocks = 0
d%ncblocks = 0
end subroutine psb_desc_nest_free
end module psb_desc_nest_mod

@ -0,0 +1,49 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_c_nest_mod
! Author: Simone Staccone (Stack-1)
!
! Umbrella module for the nested (block-structured) double precision
! real types. Users need only:
!
! use psb_c_nest_mod
!
! to access all three container types and their parallel operations.
!
module psb_c_nest_mod
use psb_desc_nest_mod ! grid descriptor (per-field, input to compose)
use psb_c_nest_mat_mod ! block storage (psb_c_nest_sparse_mat)
use psb_c_nest_base_mat_mod ! MATNEST operator + field-split interface
use psb_cd_nest_tools_mod ! psb_cd_nest_compose (global descriptor)
use psb_c_nest_tools_mod ! block assembly + psb_c_nest_rect_block
use psb_c_nest_builder_mod ! psb_c_nest_matrix: init/ins/asb frontend
end module psb_c_nest_mod

@ -0,0 +1,49 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_d_nest_mod
! Author: Simone Staccone (Stack-1)
!
! Umbrella module for the nested (block-structured) double precision
! real types. Users need only:
!
! use psb_d_nest_mod
!
! to access all three container types and their parallel operations.
!
module psb_d_nest_mod
use psb_desc_nest_mod ! grid descriptor (per-field, input to compose)
use psb_d_nest_mat_mod ! block storage (psb_d_nest_sparse_mat)
use psb_d_nest_base_mat_mod ! MATNEST operator + field-split interface
use psb_cd_nest_tools_mod ! psb_cd_nest_compose (global descriptor)
use psb_d_nest_tools_mod ! block assembly + psb_d_nest_rect_block
use psb_d_nest_builder_mod ! psb_d_nest_matrix: init/ins/asb frontend
end module psb_d_nest_mod

@ -0,0 +1,49 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_s_nest_mod
! Author: Simone Staccone (Stack-1)
!
! Umbrella module for the nested (block-structured) double precision
! real types. Users need only:
!
! use psb_s_nest_mod
!
! to access all three container types and their parallel operations.
!
module psb_s_nest_mod
use psb_desc_nest_mod ! grid descriptor (per-field, input to compose)
use psb_s_nest_mat_mod ! block storage (psb_s_nest_sparse_mat)
use psb_s_nest_base_mat_mod ! MATNEST operator + field-split interface
use psb_cd_nest_tools_mod ! psb_cd_nest_compose (global descriptor)
use psb_s_nest_tools_mod ! block assembly + psb_s_nest_rect_block
use psb_s_nest_builder_mod ! psb_s_nest_matrix: init/ins/asb frontend
end module psb_s_nest_mod

@ -0,0 +1,49 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_z_nest_mod
! Author: Simone Staccone (Stack-1)
!
! Umbrella module for the nested (block-structured) double precision
! real types. Users need only:
!
! use psb_z_nest_mod
!
! to access all three container types and their parallel operations.
!
module psb_z_nest_mod
use psb_desc_nest_mod ! grid descriptor (per-field, input to compose)
use psb_z_nest_mat_mod ! block storage (psb_z_nest_sparse_mat)
use psb_z_nest_base_mat_mod ! MATNEST operator + field-split interface
use psb_cd_nest_tools_mod ! psb_cd_nest_compose (global descriptor)
use psb_z_nest_tools_mod ! block assembly + psb_z_nest_rect_block
use psb_z_nest_builder_mod ! psb_z_nest_matrix: init/ins/asb frontend
end module psb_z_nest_mod

File diff suppressed because it is too large Load Diff

@ -0,0 +1,149 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific without permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_c_nest_mat_mod
! Author: Simone Staccone (Stack-1)
!
! Defines psb_c_nest_sparse_mat: a block-structured distributed sparse
! matrix for double precision real arithmetic.
!
! The matrix is stored as a 2-D array of psb_cspmat_type sub-matrices.
! Block presence is determined directly from the sub-matrix storage: a block
! (i,j) is present iff mats(i,j)%a is allocated (absent blocks contribute zero
! to any product). There is no separate presence flag array.
!
! Descriptor convention (current nested design)
! ---------------------------------------------
! Each matrix block (i,j) is associated with descs(i,j) from the
! corresponding psb_desc_nest_type. Nested tools (psb_spall_nest,
! psb_spins_nest, psb_spasb_nest, psb_spmm) consistently pass
! descs(i,j) together with mats(i,j).
!
! A block may be structurally absent (NULL/zero): this is represented by
! mats(i,j) left unbuilt (mats(i,j)%a not allocated). In that case the
! block contributes zero and is skipped by nested kernels.
!
! Descriptor storage is distinct from matrix presence: descriptors are
! typically defined for all block positions in descs(:,:), while actual
! matrix blocks may be present only on a subset.
!
! Reference examples in test/pdegen:
! * psb_c_pde_nest.full.F90 (A(2,2) left NULL, mats(2,2)%a not allocated)
! * psb_c_nest_tools.F90 and psb_c_pde_nest_full_tools.F90
! (2-D desc_nest%descs(i,j) used in nested allocation/assembly).
!
module psb_c_nest_mat_mod
use psb_c_mat_mod
implicit none
type :: psb_c_nest_sparse_mat
integer(psb_ipk_) :: nrblocks = 0
integer(psb_ipk_) :: ncblocks = 0
type(psb_cspmat_type), allocatable :: mats(:,:)
contains
procedure :: get_nrblocks => psb_c_nest_mat_get_nrb
procedure :: get_ncblocks => psb_c_nest_mat_get_ncb
procedure :: has_block => psb_c_nest_mat_has_block
procedure :: sizeof => psb_c_nest_mat_sizeof
procedure :: free => psb_c_nest_mat_free
end type psb_c_nest_sparse_mat
contains
! get_nrblocks / get_ncblocks
function psb_c_nest_mat_get_nrb(a) result(n)
class(psb_c_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: n
n = a%nrblocks
end function psb_c_nest_mat_get_nrb
function psb_c_nest_mat_get_ncb(a) result(n)
class(psb_c_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: n
n = a%ncblocks
end function psb_c_nest_mat_get_ncb
! has_block: return .true. if block (i,j) is non-null
function psb_c_nest_mat_has_block(a, i_block_row, j_block_col) result(has)
class(psb_c_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_), intent(in) :: i_block_row, j_block_col
logical :: has
has = .false.
if (i_block_row < 1 .or. i_block_row > a%nrblocks) return
if (j_block_col < 1 .or. j_block_col > a%ncblocks) return
if (.not. allocated(a%mats)) return
! P3: presence is determined solely by whether the sub-matrix has been
! built (its polymorphic storage %a is allocated). No parallel flag array.
has = allocated(a%mats(i_block_row, j_block_col)%a)
end function psb_c_nest_mat_has_block
! sizeof: total storage across all allocated sub-matrices
function psb_c_nest_mat_sizeof(a) result(total_bytes)
class(psb_c_nest_sparse_mat), intent(in) :: a
integer(psb_epk_) :: total_bytes
integer(psb_ipk_) :: i_block_row, j_block_col
total_bytes = 0_psb_epk_
if (allocated(a%mats)) then
do j_block_col = 1, a%ncblocks
do i_block_row = 1, a%nrblocks
if (allocated(a%mats(i_block_row, j_block_col)%a)) &
& total_bytes = total_bytes + a%mats(i_block_row, j_block_col)%sizeof()
end do
end do
end if
end function psb_c_nest_mat_sizeof
! free: release all sub-matrices
subroutine psb_c_nest_mat_free(a, info)
class(psb_c_nest_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
info = 0
if (allocated(a%mats)) then
do j_block_col = 1, a%ncblocks
do i_block_row = 1, a%nrblocks
if (allocated(a%mats(i_block_row, j_block_col)%a)) then
call a%mats(i_block_row, j_block_col)%free()
end if
end do
end do
deallocate(a%mats, stat=local_info)
if (local_info /= 0 .and. info == 0) info = local_info
end if
a%nrblocks = 0
a%ncblocks = 0
end subroutine psb_c_nest_mat_free
end module psb_c_nest_mat_mod

File diff suppressed because it is too large Load Diff

@ -0,0 +1,149 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific without permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_d_nest_mat_mod
! Author: Simone Staccone (Stack-1)
!
! Defines psb_d_nest_sparse_mat: a block-structured distributed sparse
! matrix for double precision real arithmetic.
!
! The matrix is stored as a 2-D array of psb_dspmat_type sub-matrices.
! Block presence is determined directly from the sub-matrix storage: a block
! (i,j) is present iff mats(i,j)%a is allocated (absent blocks contribute zero
! to any product). There is no separate presence flag array.
!
! Descriptor convention (current nested design)
! ---------------------------------------------
! Each matrix block (i,j) is associated with descs(i,j) from the
! corresponding psb_desc_nest_type. Nested tools (psb_spall_nest,
! psb_spins_nest, psb_spasb_nest, psb_spmm) consistently pass
! descs(i,j) together with mats(i,j).
!
! A block may be structurally absent (NULL/zero): this is represented by
! mats(i,j) left unbuilt (mats(i,j)%a not allocated). In that case the
! block contributes zero and is skipped by nested kernels.
!
! Descriptor storage is distinct from matrix presence: descriptors are
! typically defined for all block positions in descs(:,:), while actual
! matrix blocks may be present only on a subset.
!
! Reference examples in test/pdegen:
! * psb_d_pde_nest.full.F90 (A(2,2) left NULL, mats(2,2)%a not allocated)
! * psb_d_nest_tools.F90 and psb_d_pde_nest_full_tools.F90
! (2-D desc_nest%descs(i,j) used in nested allocation/assembly).
!
module psb_d_nest_mat_mod
use psb_d_mat_mod
implicit none
type :: psb_d_nest_sparse_mat
integer(psb_ipk_) :: nrblocks = 0
integer(psb_ipk_) :: ncblocks = 0
type(psb_dspmat_type), allocatable :: mats(:,:)
contains
procedure :: get_nrblocks => psb_d_nest_mat_get_nrb
procedure :: get_ncblocks => psb_d_nest_mat_get_ncb
procedure :: has_block => psb_d_nest_mat_has_block
procedure :: sizeof => psb_d_nest_mat_sizeof
procedure :: free => psb_d_nest_mat_free
end type psb_d_nest_sparse_mat
contains
! get_nrblocks / get_ncblocks
function psb_d_nest_mat_get_nrb(a) result(n)
class(psb_d_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: n
n = a%nrblocks
end function psb_d_nest_mat_get_nrb
function psb_d_nest_mat_get_ncb(a) result(n)
class(psb_d_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: n
n = a%ncblocks
end function psb_d_nest_mat_get_ncb
! has_block: return .true. if block (i,j) is non-null
function psb_d_nest_mat_has_block(a, i_block_row, j_block_col) result(has)
class(psb_d_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_), intent(in) :: i_block_row, j_block_col
logical :: has
has = .false.
if (i_block_row < 1 .or. i_block_row > a%nrblocks) return
if (j_block_col < 1 .or. j_block_col > a%ncblocks) return
if (.not. allocated(a%mats)) return
! P3: presence is determined solely by whether the sub-matrix has been
! built (its polymorphic storage %a is allocated). No parallel flag array.
has = allocated(a%mats(i_block_row, j_block_col)%a)
end function psb_d_nest_mat_has_block
! sizeof: total storage across all allocated sub-matrices
function psb_d_nest_mat_sizeof(a) result(total_bytes)
class(psb_d_nest_sparse_mat), intent(in) :: a
integer(psb_epk_) :: total_bytes
integer(psb_ipk_) :: i_block_row, j_block_col
total_bytes = 0_psb_epk_
if (allocated(a%mats)) then
do j_block_col = 1, a%ncblocks
do i_block_row = 1, a%nrblocks
if (allocated(a%mats(i_block_row, j_block_col)%a)) &
& total_bytes = total_bytes + a%mats(i_block_row, j_block_col)%sizeof()
end do
end do
end if
end function psb_d_nest_mat_sizeof
! free: release all sub-matrices
subroutine psb_d_nest_mat_free(a, info)
class(psb_d_nest_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
info = 0
if (allocated(a%mats)) then
do j_block_col = 1, a%ncblocks
do i_block_row = 1, a%nrblocks
if (allocated(a%mats(i_block_row, j_block_col)%a)) then
call a%mats(i_block_row, j_block_col)%free()
end if
end do
end do
deallocate(a%mats, stat=local_info)
if (local_info /= 0 .and. info == 0) info = local_info
end if
a%nrblocks = 0
a%ncblocks = 0
end subroutine psb_d_nest_mat_free
end module psb_d_nest_mat_mod

File diff suppressed because it is too large Load Diff

@ -0,0 +1,149 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific without permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_s_nest_mat_mod
! Author: Simone Staccone (Stack-1)
!
! Defines psb_s_nest_sparse_mat: a block-structured distributed sparse
! matrix for double precision real arithmetic.
!
! The matrix is stored as a 2-D array of psb_sspmat_type sub-matrices.
! Block presence is determined directly from the sub-matrix storage: a block
! (i,j) is present iff mats(i,j)%a is allocated (absent blocks contribute zero
! to any product). There is no separate presence flag array.
!
! Descriptor convention (current nested design)
! ---------------------------------------------
! Each matrix block (i,j) is associated with descs(i,j) from the
! corresponding psb_desc_nest_type. Nested tools (psb_spall_nest,
! psb_spins_nest, psb_spasb_nest, psb_spmm) consistently pass
! descs(i,j) together with mats(i,j).
!
! A block may be structurally absent (NULL/zero): this is represented by
! mats(i,j) left unbuilt (mats(i,j)%a not allocated). In that case the
! block contributes zero and is skipped by nested kernels.
!
! Descriptor storage is distinct from matrix presence: descriptors are
! typically defined for all block positions in descs(:,:), while actual
! matrix blocks may be present only on a subset.
!
! Reference examples in test/pdegen:
! * psb_s_pde_nest.full.F90 (A(2,2) left NULL, mats(2,2)%a not allocated)
! * psb_s_nest_tools.F90 and psb_s_pde_nest_full_tools.F90
! (2-D desc_nest%descs(i,j) used in nested allocation/assembly).
!
module psb_s_nest_mat_mod
use psb_s_mat_mod
implicit none
type :: psb_s_nest_sparse_mat
integer(psb_ipk_) :: nrblocks = 0
integer(psb_ipk_) :: ncblocks = 0
type(psb_sspmat_type), allocatable :: mats(:,:)
contains
procedure :: get_nrblocks => psb_s_nest_mat_get_nrb
procedure :: get_ncblocks => psb_s_nest_mat_get_ncb
procedure :: has_block => psb_s_nest_mat_has_block
procedure :: sizeof => psb_s_nest_mat_sizeof
procedure :: free => psb_s_nest_mat_free
end type psb_s_nest_sparse_mat
contains
! get_nrblocks / get_ncblocks
function psb_s_nest_mat_get_nrb(a) result(n)
class(psb_s_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: n
n = a%nrblocks
end function psb_s_nest_mat_get_nrb
function psb_s_nest_mat_get_ncb(a) result(n)
class(psb_s_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: n
n = a%ncblocks
end function psb_s_nest_mat_get_ncb
! has_block: return .true. if block (i,j) is non-null
function psb_s_nest_mat_has_block(a, i_block_row, j_block_col) result(has)
class(psb_s_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_), intent(in) :: i_block_row, j_block_col
logical :: has
has = .false.
if (i_block_row < 1 .or. i_block_row > a%nrblocks) return
if (j_block_col < 1 .or. j_block_col > a%ncblocks) return
if (.not. allocated(a%mats)) return
! P3: presence is determined solely by whether the sub-matrix has been
! built (its polymorphic storage %a is allocated). No parallel flag array.
has = allocated(a%mats(i_block_row, j_block_col)%a)
end function psb_s_nest_mat_has_block
! sizeof: total storage across all allocated sub-matrices
function psb_s_nest_mat_sizeof(a) result(total_bytes)
class(psb_s_nest_sparse_mat), intent(in) :: a
integer(psb_epk_) :: total_bytes
integer(psb_ipk_) :: i_block_row, j_block_col
total_bytes = 0_psb_epk_
if (allocated(a%mats)) then
do j_block_col = 1, a%ncblocks
do i_block_row = 1, a%nrblocks
if (allocated(a%mats(i_block_row, j_block_col)%a)) &
& total_bytes = total_bytes + a%mats(i_block_row, j_block_col)%sizeof()
end do
end do
end if
end function psb_s_nest_mat_sizeof
! free: release all sub-matrices
subroutine psb_s_nest_mat_free(a, info)
class(psb_s_nest_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
info = 0
if (allocated(a%mats)) then
do j_block_col = 1, a%ncblocks
do i_block_row = 1, a%nrblocks
if (allocated(a%mats(i_block_row, j_block_col)%a)) then
call a%mats(i_block_row, j_block_col)%free()
end if
end do
end do
deallocate(a%mats, stat=local_info)
if (local_info /= 0 .and. info == 0) info = local_info
end if
a%nrblocks = 0
a%ncblocks = 0
end subroutine psb_s_nest_mat_free
end module psb_s_nest_mat_mod

File diff suppressed because it is too large Load Diff

@ -0,0 +1,149 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific without permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_z_nest_mat_mod
! Author: Simone Staccone (Stack-1)
!
! Defines psb_z_nest_sparse_mat: a block-structured distributed sparse
! matrix for double precision real arithmetic.
!
! The matrix is stored as a 2-D array of psb_zspmat_type sub-matrices.
! Block presence is determined directly from the sub-matrix storage: a block
! (i,j) is present iff mats(i,j)%a is allocated (absent blocks contribute zero
! to any product). There is no separate presence flag array.
!
! Descriptor convention (current nested design)
! ---------------------------------------------
! Each matrix block (i,j) is associated with descs(i,j) from the
! corresponding psb_desc_nest_type. Nested tools (psb_spall_nest,
! psb_spins_nest, psb_spasb_nest, psb_spmm) consistently pass
! descs(i,j) together with mats(i,j).
!
! A block may be structurally absent (NULL/zero): this is represented by
! mats(i,j) left unbuilt (mats(i,j)%a not allocated). In that case the
! block contributes zero and is skipped by nested kernels.
!
! Descriptor storage is distinct from matrix presence: descriptors are
! typically defined for all block positions in descs(:,:), while actual
! matrix blocks may be present only on a subset.
!
! Reference examples in test/pdegen:
! * psb_z_pde_nest.full.F90 (A(2,2) left NULL, mats(2,2)%a not allocated)
! * psb_z_nest_tools.F90 and psb_z_pde_nest_full_tools.F90
! (2-D desc_nest%descs(i,j) used in nested allocation/assembly).
!
module psb_z_nest_mat_mod
use psb_z_mat_mod
implicit none
type :: psb_z_nest_sparse_mat
integer(psb_ipk_) :: nrblocks = 0
integer(psb_ipk_) :: ncblocks = 0
type(psb_zspmat_type), allocatable :: mats(:,:)
contains
procedure :: get_nrblocks => psb_z_nest_mat_get_nrb
procedure :: get_ncblocks => psb_z_nest_mat_get_ncb
procedure :: has_block => psb_z_nest_mat_has_block
procedure :: sizeof => psb_z_nest_mat_sizeof
procedure :: free => psb_z_nest_mat_free
end type psb_z_nest_sparse_mat
contains
! get_nrblocks / get_ncblocks
function psb_z_nest_mat_get_nrb(a) result(n)
class(psb_z_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: n
n = a%nrblocks
end function psb_z_nest_mat_get_nrb
function psb_z_nest_mat_get_ncb(a) result(n)
class(psb_z_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: n
n = a%ncblocks
end function psb_z_nest_mat_get_ncb
! has_block: return .true. if block (i,j) is non-null
function psb_z_nest_mat_has_block(a, i_block_row, j_block_col) result(has)
class(psb_z_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_), intent(in) :: i_block_row, j_block_col
logical :: has
has = .false.
if (i_block_row < 1 .or. i_block_row > a%nrblocks) return
if (j_block_col < 1 .or. j_block_col > a%ncblocks) return
if (.not. allocated(a%mats)) return
! P3: presence is determined solely by whether the sub-matrix has been
! built (its polymorphic storage %a is allocated). No parallel flag array.
has = allocated(a%mats(i_block_row, j_block_col)%a)
end function psb_z_nest_mat_has_block
! sizeof: total storage across all allocated sub-matrices
function psb_z_nest_mat_sizeof(a) result(total_bytes)
class(psb_z_nest_sparse_mat), intent(in) :: a
integer(psb_epk_) :: total_bytes
integer(psb_ipk_) :: i_block_row, j_block_col
total_bytes = 0_psb_epk_
if (allocated(a%mats)) then
do j_block_col = 1, a%ncblocks
do i_block_row = 1, a%nrblocks
if (allocated(a%mats(i_block_row, j_block_col)%a)) &
& total_bytes = total_bytes + a%mats(i_block_row, j_block_col)%sizeof()
end do
end do
end if
end function psb_z_nest_mat_sizeof
! free: release all sub-matrices
subroutine psb_z_nest_mat_free(a, info)
class(psb_z_nest_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
info = 0
if (allocated(a%mats)) then
do j_block_col = 1, a%ncblocks
do i_block_row = 1, a%nrblocks
if (allocated(a%mats(i_block_row, j_block_col)%a)) then
call a%mats(i_block_row, j_block_col)%free()
end if
end do
end do
deallocate(a%mats, stat=local_info)
if (local_info /= 0 .and. info == 0) info = local_info
end if
a%nrblocks = 0
a%ncblocks = 0
end subroutine psb_z_nest_mat_free
end module psb_z_nest_mat_mod

@ -0,0 +1,414 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific prior written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_c_nest_builder_mod.F90
!
! Module: psb_c_nest_builder_mod
! Author: Simone Staccone (Stack-1)
!
! User-friendly frontend to build a nested (MATNEST) operator without manually
! managing per-field descriptors, the union halo, composition and setup.
!
! All the boilerplate (identical for every nested operator) is hidden behind a
! single type, psb_c_nest_matrix, with the usual PSBLAS init/ins/asb pattern:
!
! type(psb_c_nest_matrix) :: nested_matrix
! call nested_matrix%init(ctxt, [n1, n2], info) ! 2 fields of global size n1, n2
! call nested_matrix%ins(1,1, n, rows, cols, vals, info) ! values of block (1,1) = A
! call nested_matrix%ins(1,2, n, rows, cols, vals, info) ! values of block (1,2) = B^T
! call nested_matrix%ins(2,1, n, rows, cols, vals, info) ! values of block (2,1) = B
! ... ! (absent blocks = not inserted)
! call nested_matrix%asb(info) ! assemble: builds a_glob, desc_glob
!
! ! from here on nested_matrix%a_glob and nested_matrix%desc_glob are an
! ! ordinary distributed matrix/descriptor:
! call psb_geall(x, nested_matrix%desc_glob, info)
! call psb_krylov('CG', nested_matrix%a_glob, prec, b, x, eps, nested_matrix%desc_glob, info, ...)
!
! Indices: in ins(block_row, block_col, ...) the rows live in the index space of
! field block_row, the columns in the index space of field block_col (GLOBAL
! field indices, 1..field_size). Each process inserts only the rows it owns
! (PSBLAS convention). Off-diagonal blocks may be rectangular.
!
! NOTE: after asb the object holds consistent internal pointers (a_glob%a points
! to block_storage / grid_desc): do not copy/move the object after assembly.
!
module psb_c_nest_builder_mod
use psb_const_mod
use psb_error_mod, only : psb_errpush
use psb_penv_mod, only : psb_ctxt_type, psb_info
use psb_desc_mod, only : psb_desc_type
use psb_c_mat_mod, only : psb_cspmat_type
use psb_c_base_mat_mod, only : psb_c_base_sparse_mat
use psb_cd_tools_mod, only : psb_cdall, psb_cdins, psb_cdasb
use psb_desc_nest_mod, only : psb_desc_nest_type
use psb_c_nest_mat_mod, only : psb_c_nest_sparse_mat
use psb_c_nest_base_mat_mod, only : psb_c_nest_base_mat, psb_c_nest_base_setup
use psb_cd_nest_tools_mod, only : psb_cd_nest_compose
use psb_c_nest_tools_mod, only : psb_c_nest_rect_block
implicit none
! growing triplet buffer for a single block
type :: psb_c_nest_block_buffer
integer(psb_ipk_) :: n_entries = 0
integer(psb_lpk_), allocatable :: entry_rows(:), entry_cols(:)
complex(psb_spk_), allocatable :: entry_vals(:)
end type psb_c_nest_block_buffer
type :: psb_c_nest_matrix
type(psb_ctxt_type) :: context
integer(psb_ipk_) :: n_fields = 0
logical :: assembled = .false.
! construction state
type(psb_desc_type), allocatable :: field_desc(:) ! one descriptor per field
type(psb_c_nest_block_buffer), allocatable :: block_buffer(:,:) ! triplets per block (i,j)
! products (owned; the pointers in a_glob%a point in here)
type(psb_c_nest_sparse_mat) :: block_storage
type(psb_desc_nest_type) :: grid_desc
type(psb_cspmat_type) :: a_glob ! the matrix to hand to Krylov
type(psb_desc_type) :: desc_glob ! the global descriptor
contains
procedure, pass(op) :: init => psb_c_nest_op_init
procedure, pass(op) :: ins => psb_c_nest_op_ins
procedure, pass(op) :: asb => psb_c_nest_op_asb
procedure, pass(op) :: free => psb_c_nest_op_free
! user-friendly queries on the field row distribution (no descriptor
! jargon needed: these replace field_desc(i)%get_local_rows() / %l2g(...))
procedure, pass(op) :: get_owned_rows => psb_c_nest_op_get_owned_rows
procedure, pass(op) :: get_owned_row_count => psb_c_nest_op_get_owned_row_count
end type psb_c_nest_matrix
private
public :: psb_c_nest_matrix
contains
! init: create one descriptor per field (block distribution from the global sizes)
subroutine psb_c_nest_op_init(op, context, field_sizes, info)
class(psb_c_nest_matrix), intent(inout) :: op
type(psb_ctxt_type), intent(in) :: context
integer(psb_lpk_), intent(in) :: field_sizes(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: my_rank, num_procs, n_fields, i_field, field_local_rows
integer(psb_lpk_) :: field_global_size
character(len=24) :: name
info = psb_success_
name = 'psb_c_nest_op_init'
call psb_info(context, my_rank, num_procs)
n_fields = size(field_sizes)
op%context = context
op%n_fields = n_fields
op%assembled = .false.
allocate(op%field_desc(n_fields), op%block_buffer(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do i_field = 1, n_fields
field_global_size = field_sizes(i_field)
! block distribution: field_global_size rows over num_procs processes (total size invariant)
field_local_rows = int(field_global_size / int(num_procs, psb_lpk_), psb_ipk_)
if (int(my_rank, psb_lpk_) < mod(field_global_size, int(num_procs, psb_lpk_))) &
& field_local_rows = field_local_rows + 1
call psb_cdall(context, op%field_desc(i_field), info, nl=field_local_rows)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdall'); return
end if
end do
end subroutine psb_c_nest_op_init
! ins: accumulate the triplets into block (block_row,block_col) and register the
! columns (field block_col index space) into that descriptor's union halo
subroutine psb_c_nest_op_ins(op, block_row, block_col, n_entries, entry_rows, entry_cols, entry_vals, info)
class(psb_c_nest_matrix), intent(inout) :: op
integer(psb_ipk_), intent(in) :: block_row, block_col, n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
complex(psb_spk_), intent(in) :: entry_vals(:)
integer(psb_ipk_), intent(out) :: info
character(len=24) :: name
info = psb_success_
name = 'psb_c_nest_op_ins'
if (op%assembled) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='operator already assembled'); return
end if
if (block_row < 1 .or. block_row > op%n_fields .or. &
& block_col < 1 .or. block_col > op%n_fields) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='block index out of range'); return
end if
if (n_entries <= 0) return
call block_buffer_append(op%block_buffer(block_row,block_col), n_entries, &
& entry_rows, entry_cols, entry_vals, info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
! the columns of block (block_row,block_col) live in field block_col ->
! register their indices into that descriptor's union halo
! (this also applies when block_col == block_row)
call psb_cdins(n_entries, entry_cols(1:n_entries), op%field_desc(block_col), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins'); return
end if
end subroutine psb_c_nest_op_ins
! asb: assemble the descriptors, build the blocks, compose the global
! descriptor, set up the operator and wrap it into a_glob.
! The optional type ('CSR'/'CSC'/'COO', default 'CSR') or mold (any
! class extending psb_c_base_sparse_mat, e.g. the psb_ext ELL/HLL or
! the psb_cuda device formats) selects the storage format of the blocks.
subroutine psb_c_nest_op_asb(op, info, type, mold)
class(psb_c_nest_matrix), intent(inout), target :: op
integer(psb_ipk_), intent(out) :: info
character(len=*), intent(in), optional :: type
class(psb_c_base_sparse_mat), intent(in), optional :: mold
type(psb_c_nest_base_mat) :: nest_operator
integer(psb_ipk_) :: n_fields, i_field, j_field
character(len=24) :: name
info = psb_success_
name = 'psb_c_nest_op_asb'
n_fields = op%n_fields
! 1) assemble the per-field descriptors (with the union halo accumulated in ins)
do i_field = 1, n_fields
call psb_cdasb(op%field_desc(i_field), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdasb'); return
end if
end do
! 2) build the local blocks (generally rectangular) from the triplets
op%block_storage%nrblocks = n_fields
op%block_storage%ncblocks = n_fields
allocate(op%block_storage%mats(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do j_field = 1, n_fields
do i_field = 1, n_fields
if (op%block_buffer(i_field,j_field)%n_entries > 0) then
call psb_c_nest_rect_block(op%block_storage%mats(i_field,j_field), &
& op%block_buffer(i_field,j_field)%n_entries, &
& op%block_buffer(i_field,j_field)%entry_rows, &
& op%block_buffer(i_field,j_field)%entry_cols, &
& op%block_buffer(i_field,j_field)%entry_vals, &
& op%field_desc(i_field), op%field_desc(j_field), info, &
& type=type, mold=mold)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='rect_block'); return
end if
end if
end do
end do
! 3) descriptor grid: descs(i,j) = descriptor of field j
op%grid_desc%nrblocks = n_fields
op%grid_desc%ncblocks = n_fields
allocate(op%grid_desc%descs(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do j_field = 1, n_fields
do i_field = 1, n_fields
call op%field_desc(j_field)%clone(op%grid_desc%descs(i_field,j_field), info)
end do
end do
! 4) composed global descriptor + operator setup
call psb_cd_nest_compose(op%grid_desc, op%desc_glob, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='cd_nest_compose'); return
end if
call psb_c_nest_base_setup(nest_operator, op%block_storage, op%grid_desc, op%desc_glob, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='nest_base_setup'); return
end if
! 5) wrap into the standard matrix object (the pointers keep pointing at op%*)
allocate(op%a_glob%a, source=nest_operator, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
call op%a_glob%set_nrows(op%desc_glob%get_local_rows())
call op%a_glob%set_ncols(op%desc_glob%get_local_cols())
call op%a_glob%set_asb()
! 6) the triplet buffers are no longer needed
do j_field = 1, n_fields
do i_field = 1, n_fields
call block_buffer_free(op%block_buffer(i_field,j_field))
end do
end do
op%assembled = .true.
end subroutine psb_c_nest_op_asb
! free: release everything
subroutine psb_c_nest_op_free(op, info)
class(psb_c_nest_matrix), intent(inout) :: op
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_field, j_field, local_info
info = psb_success_
if (allocated(op%block_buffer)) then
do j_field = 1, size(op%block_buffer,2)
do i_field = 1, size(op%block_buffer,1)
call block_buffer_free(op%block_buffer(i_field,j_field))
end do
end do
deallocate(op%block_buffer, stat=local_info)
end if
if (op%assembled) then
call op%a_glob%free()
call op%desc_glob%free(local_info)
call op%grid_desc%free(local_info)
end if
if (allocated(op%field_desc)) then
do i_field = 1, size(op%field_desc)
call op%field_desc(i_field)%free(local_info)
end do
deallocate(op%field_desc, stat=local_info)
end if
op%n_fields = 0
op%assembled = .false.
end subroutine psb_c_nest_op_free
! get_owned_rows: GLOBAL indices (in the field index space, 1..field size)
! of the rows of field i_field owned by this process. This is the list of
! rows the process is expected to insert through ins:
!
! my_rows = nested_matrix%get_owned_rows(1)
! do k = 1, size(my_rows)
! global_row = my_rows(k)
! ...
!
! An empty array is returned for an out-of-range field index.
function psb_c_nest_op_get_owned_rows(op, i_field) result(owned_global_rows)
class(psb_c_nest_matrix), intent(in) :: op
integer(psb_ipk_), intent(in) :: i_field
integer(psb_lpk_), allocatable :: owned_global_rows(:)
if ((i_field < 1) .or. (i_field > op%n_fields) .or. &
& (.not. allocated(op%field_desc))) then
allocate(owned_global_rows(0))
return
end if
owned_global_rows = op%field_desc(i_field)%get_global_indices(owned=.true.)
end function psb_c_nest_op_get_owned_rows
! get_owned_row_count: how many rows of field i_field this process owns
function psb_c_nest_op_get_owned_row_count(op, i_field) result(owned_row_count)
class(psb_c_nest_matrix), intent(in) :: op
integer(psb_ipk_), intent(in) :: i_field
integer(psb_ipk_) :: owned_row_count
owned_row_count = 0
if ((i_field < 1) .or. (i_field > op%n_fields) .or. &
& (.not. allocated(op%field_desc))) return
owned_row_count = op%field_desc(i_field)%get_local_rows()
end function psb_c_nest_op_get_owned_row_count
!-----------------------------------------------------------------
! private helpers: growing triplet buffer
!-----------------------------------------------------------------
subroutine block_buffer_append(buffer, n_entries, entry_rows, entry_cols, entry_vals, info)
type(psb_c_nest_block_buffer), intent(inout) :: buffer
integer(psb_ipk_), intent(in) :: n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
complex(psb_spk_), intent(in) :: entry_vals(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: required_size
info = psb_success_
required_size = buffer%n_entries + n_entries
call ensure_capacity_lpk(buffer%entry_rows, required_size, info); if (info /= 0) return
call ensure_capacity_lpk(buffer%entry_cols, required_size, info); if (info /= 0) return
call ensure_capacity_val(buffer%entry_vals, required_size, info); if (info /= 0) return
buffer%entry_rows(buffer%n_entries+1:required_size) = entry_rows(1:n_entries)
buffer%entry_cols(buffer%n_entries+1:required_size) = entry_cols(1:n_entries)
buffer%entry_vals(buffer%n_entries+1:required_size) = entry_vals(1:n_entries)
buffer%n_entries = required_size
end subroutine block_buffer_append
subroutine ensure_capacity_lpk(array, required_size, info)
integer(psb_lpk_), allocatable, intent(inout) :: array(:)
integer(psb_ipk_), intent(in) :: required_size
integer(psb_ipk_), intent(out) :: info
integer(psb_lpk_), allocatable :: grown(:)
integer(psb_ipk_) :: capacity
info = 0
if (.not. allocated(array)) then
allocate(array(max(required_size,16)), stat=info); return
end if
capacity = size(array)
if (required_size <= capacity) return
allocate(grown(max(2*capacity, required_size)), stat=info); if (info /= 0) return
grown(1:capacity) = array(1:capacity)
call move_alloc(grown, array)
end subroutine ensure_capacity_lpk
subroutine ensure_capacity_val(array, required_size, info)
complex(psb_spk_), allocatable, intent(inout) :: array(:)
integer(psb_ipk_), intent(in) :: required_size
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_), allocatable :: grown(:)
integer(psb_ipk_) :: capacity
info = 0
if (.not. allocated(array)) then
allocate(array(max(required_size,16)), stat=info); return
end if
capacity = size(array)
if (required_size <= capacity) return
allocate(grown(max(2*capacity, required_size)), stat=info); if (info /= 0) return
grown(1:capacity) = array(1:capacity)
call move_alloc(grown, array)
end subroutine ensure_capacity_val
subroutine block_buffer_free(buffer)
type(psb_c_nest_block_buffer), intent(inout) :: buffer
if (allocated(buffer%entry_rows)) deallocate(buffer%entry_rows)
if (allocated(buffer%entry_cols)) deallocate(buffer%entry_cols)
if (allocated(buffer%entry_vals)) deallocate(buffer%entry_vals)
buffer%n_entries = 0
end subroutine block_buffer_free
end module psb_c_nest_builder_mod

@ -0,0 +1,364 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! Module: psb_c_nest_tools_mod
! Author: Simone Staccone (Stack-1)
!
! Nested-specific assembly wrappers for PSBLAS3 double precision matrix and vector routines
!
module psb_c_nest_tools_mod
use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_spk_, psb_success_, psb_err_alloc_dealloc_, &
psb_err_invalid_input_, psb_err_from_subroutine_, &
psb_dupl_add_, psb_dupl_ovwrt_, psb_dupl_err_, psb_ctxt_type
use psb_error_mod, only : psb_errpush
use psb_c_tools_mod, only : psb_spall, psb_spins, psb_spasb, psb_spfree, psb_sprn, &
psb_geall, psb_geins, psb_geasb, psb_gefree
use psb_desc_nest_mod, only : psb_desc_nest_type
use psb_c_nest_mat_mod, only : psb_c_nest_sparse_mat
use psb_c_mat_mod, only : psb_cspmat_type
use psb_c_base_mat_mod, only : psb_c_coo_sparse_mat, psb_c_base_sparse_mat
use psb_desc_mod, only : psb_desc_type
implicit none
private
public :: psb_spall_nest, psb_spins_nest, psb_spasb_nest, psb_spfree_nest, psb_sprn_nest, &
psb_c_nest_rect_block
contains
! Allocates all (nrblocks x ncblocks) sparse matrix blocks
! and marks all as present. psb_spins_nest lazy-allocates individual
! blocks on first insertion; call psb_spall_nest instead when the
! full block structure is known up front.
subroutine psb_spall_nest(a_nest, desc_nest, info, nnz)
type(psb_c_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nnz
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spall_nest'
a_nest%nrblocks = desc_nest%nrblocks
a_nest%ncblocks = desc_nest%ncblocks
if (.not. allocated(a_nest%mats)) then
allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
end if
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
local_info = psb_success_
if (present(nnz)) then
call psb_spall(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info, nnz=nnz)
else
call psb_spall(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
end if
if (local_info /= psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall')
return
end if
end do
end do
end subroutine psb_spall_nest
! Inserts nz entries into block (blk_i, blk_j) of the nested matrix.
! The block is lazy-allocated on first insertion if psb_spall_nest
! was not called first.
subroutine psb_spins_nest(block_row, block_col, n_entries, entry_rows, entry_cols, entry_vals, a_nest, desc_nest, info)
integer(psb_ipk_), intent(in) :: block_row, block_col, n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
complex(psb_spk_), intent(in) :: entry_vals(:)
type(psb_c_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnz_estimate
character(len=20) :: name
info = psb_success_
name = 'psb_spins_nest'
if (n_entries == 0) return
if (block_row < 1 .or. block_row > a_nest%nrblocks .or. &
block_col < 1 .or. block_col > a_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block indices')
return
end if
if (.not. allocated(a_nest%mats)) then
allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
end if
if (.not. allocated(a_nest%mats(block_row, block_col)%a)) then
! Estimate nnz: use n_entries + 50% buffer for future insertions
nnz_estimate = max(n_entries, 10) + n_entries / 2
call psb_spall(a_nest%mats(block_row, block_col), &
desc_nest%descs(block_row, block_col), info, nnz=nnz_estimate)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall')
return
end if
end if
call psb_spins(n_entries, entry_rows, entry_cols, entry_vals, a_nest%mats(block_row, block_col), &
desc_nest%descs(block_row, block_col), info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spins')
end subroutine psb_spins_nest
! Calls psb_spasb on all present block matrices.
! Must be called after psb_cdasb_nest.
subroutine psb_spasb_nest(a_nest, desc_nest, info, dupl)
type(psb_c_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: dupl
integer(psb_ipk_) :: i_block_row, j_block_col, dupl_mode, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spasb_nest'
dupl_mode = psb_dupl_add_
if (present(dupl)) dupl_mode = dupl
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
if (dupl_mode == psb_dupl_add_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_add_)
else if (dupl_mode == psb_dupl_ovwrt_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_ovwrt_)
else if (dupl_mode == psb_dupl_err_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_err_)
else
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info)
end if
if (local_info /= psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spasb')
return
end if
end if
end do
end do
end subroutine psb_spasb_nest
! Calls psb_spfree on every present block, then deallocates the
! mats array and resets nrblocks/ncblocks to 0.
subroutine psb_spfree_nest(a_nest, desc_nest, info)
type(psb_c_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spfree_nest'
if (allocated(a_nest%mats)) then
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
call psb_spfree(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
if (local_info /= psb_success_ .and. info == psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spfree')
end if
end if
end do
end do
deallocate(a_nest%mats, stat=local_info)
if (local_info /= 0 .and. info == psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
end if
end if
a_nest%nrblocks = 0
a_nest%ncblocks = 0
end subroutine psb_spfree_nest
! Calls psb_sprn on every present block matrix, resetting it to
! the build state while preserving the sparsity pattern.
subroutine psb_sprn_nest(a_nest, desc_nest, info, clear)
type(psb_c_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional :: clear
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_sprn_nest'
if (.not. allocated(a_nest%mats)) return
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
if (present(clear)) then
call psb_sprn(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info, clear=clear)
else
call psb_sprn(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
end if
if (local_info /= psb_success_ .and. info == psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_sprn')
end if
end if
end do
end do
end subroutine psb_sprn_nest
! psb_c_nest_rect_block
!
! Build a local GENERAL (possibly rectangular) block A(i,j) of a nested
! operator, with rows in field i and columns in field j (field i /= field j,
! |field i| /= |field j| allowed). Rows are localized against the field-i
! (row) descriptor, columns against the field-j (column) descriptor which
! must already carry the union halo of column j (cdall + cdins(all column-j
! blocks' columns) + cdasb). The result is a CSR block of shape
! (field-i owned rows) x (field-j local cols incl. halo)
! consumable directly by the nested csmv (psb_c_nest_base_mat).
!
! A single-descriptor psb_spall/psb_spasb cannot express row-field /= col-field
! (it would force rows and columns into the same index space), hence the
! explicit COO build with separate row/column localization.
!
! Arguments (this process's local contribution):
! blk (out) the assembled block (CSR)
! nz number of local entries
! ia_glob(:) GLOBAL field-i row indices (owned by this process)
! ja_glob(:) GLOBAL field-j column indices
! val(:) values
! desc_row field-i descriptor (rows)
! desc_col field-j descriptor (columns, with union halo)
!
subroutine psb_c_nest_rect_block(blk, nz, ia_glob, ja_glob, val, desc_row, desc_col, info, type, mold)
type(psb_cspmat_type), intent(out) :: blk
integer(psb_ipk_), intent(in) :: nz
integer(psb_lpk_), intent(in) :: ia_glob(:), ja_glob(:)
complex(psb_spk_), intent(in) :: val(:)
type(psb_desc_type), intent(in) :: desc_row, desc_col
integer(psb_ipk_), intent(out) :: info
character(len=*), intent(in), optional :: type ! base storage format (default 'CSR')
class(psb_c_base_sparse_mat), intent(in), optional :: mold ! any format, e.g. psb_ext ELL/HLL
type(psb_c_coo_sparse_mat) :: coo_block
integer(psb_ipk_) :: k_entry, n_loc_rows, n_loc_cols, loc_row, loc_col
character(len=24) :: name
info = psb_success_
name = 'psb_c_nest_rect_block'
n_loc_rows = desc_row%get_local_rows() ! owned rows of field i
n_loc_cols = desc_col%get_local_cols() ! field-j local cols (owned + halo)
call coo_block%allocate(n_loc_rows, n_loc_cols, nz)
do k_entry = 1, nz
call desc_row%g2l(ia_glob(k_entry), loc_row, info)
if (info /= 0 .or. loc_row < 1 .or. loc_row > n_loc_rows) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='row not owned / not localizable')
return
end if
call desc_col%g2l(ja_glob(k_entry), loc_col, info)
if (info /= 0 .or. loc_col < 1 .or. loc_col > n_loc_cols) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='column not in field-j descriptor (missing from union halo)')
return
end if
coo_block%ia(k_entry) = loc_row
coo_block%ja(k_entry) = loc_col
coo_block%val(k_entry) = val(k_entry)
end do
call coo_block%set_nzeros(nz)
call coo_block%set_dupl(psb_dupl_add_)
call coo_block%fix(info)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='coo fix'); return
end if
call blk%mv_from(coo_block)
if (present(mold)) then
call blk%cscnv(info, mold=mold)
else if (present(type)) then
call blk%cscnv(info, type=type)
else
call blk%cscnv(info, type='CSR')
end if
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='cscnv'); return
end if
end subroutine psb_c_nest_rect_block
end module psb_c_nest_tools_mod

@ -0,0 +1,584 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! Module: psb_cd_nest_tools_mod
! Author: Simone Staccone (Stack-1)
!
! Nested-specific assembly wrappers for PSBLAS3 descriptor routines
!
module psb_cd_nest_tools_mod
use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_success_, psb_err_alloc_dealloc_, &
psb_err_invalid_input_, psb_err_no_optional_arg_, psb_err_from_subroutine_, &
psb_ctxt_type
use psb_error_mod, only : psb_errpush
use psb_cd_tools_mod, only : psb_cdall, psb_cdasb, psb_cdins, psb_cdcpy, psb_cdprt
use psb_desc_nest_mod, only : psb_desc_nest_type
use psb_desc_mod, only : psb_desc_type
implicit none
private
public :: psb_cdall_nest, psb_cdins_nest, psb_cdins_nest_rc, &
psb_cdasb_nest, psb_cdfree_nest, psb_cdcpy_nest, psb_cdprt_nest, &
psb_cd_nest_compose
! Column-only form: (blk_j, nz, ja, desc_nest, info [,mask, lidx])
! Row+column form: (blk_i, blk_j, nz, ia, ja, desc_nest, info)
interface psb_cdins_nest
#if defined(PSB_IPK4) && defined(PSB_LPK8)
module procedure psb_cdins_nest_c
module procedure psb_cdins_nest_rc_sub
#endif
module procedure psb_lcdins_nest_c
module procedure psb_lcdins_nest_rc
end interface
! Row+column form: (blk_i, blk_j, nz, ia, ja, desc_nest, info)
interface psb_cdins_nest_rc
#if defined(PSB_IPK4) && defined(PSB_LPK8)
module procedure psb_cdins_nest_rc_sub
#endif
module procedure psb_lcdins_nest_rc
end interface
contains
! Allocates the nested descriptor structure and creates block
! descriptors. The first block of each row uses psb_cdall with
! the given local row count; subsequent blocks in the same row
! are clones of the first block (same row distribution).
!
! Arguments:
! ctxt - PSBLAS context
! desc_nest - nested descriptor (output)
! info - error code (output)
! nrblocks - number of block rows (optional, default 2)
! ncblocks - number of block columns (optional, default 2)
! nl - local row count per process (required for first blocks)
subroutine psb_cdall_nest(ctxt, desc_nest, info, nrblocks, ncblocks, nl)
type(psb_ctxt_type), intent(in) :: ctxt
type(psb_desc_nest_type), intent(out) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nrblocks, ncblocks, nl
integer(psb_ipk_) :: i_block_row, j_block_col, n_block_rows, n_block_cols, local_rows
character(len=20) :: name
info = psb_success_
name = 'psb_cdall_nest'
! Set default dimensions
n_block_rows = 2
n_block_cols = 2
if (present(nrblocks)) n_block_rows = nrblocks
if (present(ncblocks)) n_block_cols = ncblocks
if (.not. present(nl)) then
info = psb_err_no_optional_arg_
call psb_errpush(info, name, a_err='nl (local row count)')
return
end if
local_rows = nl
! Allocate nested descriptor structure
desc_nest%nrblocks = n_block_rows
desc_nest%ncblocks = n_block_cols
allocate(desc_nest%descs(n_block_rows, n_block_cols), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
! Build descriptors: each block gets its own independent psb_cdall.
! Cloning a build-state descriptor shares its base_desc pointer; when
! psb_cdasb_nest assembles both the original and the clone the shared
! base_desc is rebuilt twice, corrupting the global-to-local mapping of
! every block in that row. Independent allocations avoid this entirely.
do i_block_row = 1, n_block_rows
do j_block_col = 1, n_block_cols
call psb_cdall(ctxt, desc_nest%descs(i_block_row, j_block_col), info, nl=local_rows)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name)
return
end if
end do
end do
end subroutine psb_cdall_nest
#if defined(PSB_IPK4) && defined(PSB_LPK8)
! psb_cdins_nest_rc_sub: row+col form, ipk_ nz only when ipk_ /= lpk_
subroutine psb_cdins_nest_rc_sub(block_row, block_col, n_entries, entry_rows, entry_cols, desc_nest, info)
integer(psb_ipk_), intent(in) :: block_row, block_col, n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
character(len=20) :: name
info = psb_success_
name = 'psb_cdins_nest'
if (n_entries == 0) return
if (block_row < 1 .or. block_row > desc_nest%nrblocks .or. &
block_col < 1 .or. block_col > desc_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block indices')
return
end if
call psb_cdins(n_entries, entry_rows, entry_cols, desc_nest%descs(block_row, block_col), info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins')
end subroutine psb_cdins_nest_rc_sub
! psb_cdins_nest_c: col-only form, ipk_ nz only when ipk_ /= lpk_
subroutine psb_cdins_nest_c(block_col, n_entries, entry_cols, desc_nest, info, mask, lidx)
integer(psb_ipk_), intent(in) :: block_col, n_entries
integer(psb_lpk_), intent(in) :: entry_cols(:)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional, target :: mask(:)
integer(psb_ipk_), intent(in), optional :: lidx(:)
integer(psb_ipk_) :: i_block_row, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_cdins_nest'
if (n_entries == 0) return
if (block_col < 1 .or. block_col > desc_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block column index')
return
end if
do i_block_row = 1, desc_nest%nrblocks
local_info = psb_success_
if (present(mask)) then
if (present(lidx)) then
call psb_cdins(n_entries, entry_cols, desc_nest%descs(i_block_row, block_col), local_info, mask=mask, lidx=lidx)
else
call psb_cdins(n_entries, entry_cols, desc_nest%descs(i_block_row, block_col), local_info, mask=mask)
end if
else
if (present(lidx)) then
call psb_cdins(n_entries, entry_cols, desc_nest%descs(i_block_row, block_col), local_info, lidx=lidx)
else
call psb_cdins(n_entries, entry_cols, desc_nest%descs(i_block_row, block_col), local_info)
end if
end if
if (local_info /= psb_success_ .and. info == psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins')
end if
end do
end subroutine psb_cdins_nest_c
#endif
! psb_lcdins_nest_rc: row+col form, lpk_ nz
!
! When entries in block (blk_i, blk_j) reference columns owned by other
! processes, use the col-only form afterwards to broadcast those column
! indices across all row-blocks in block-col blk_j.
subroutine psb_lcdins_nest_rc(block_row, block_col, n_entries, entry_rows, entry_cols, desc_nest, info)
integer(psb_ipk_), intent(in) :: block_row, block_col
integer(psb_lpk_), intent(in) :: n_entries, entry_rows(:), entry_cols(:)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
character(len=20) :: name
info = psb_success_
name = 'psb_cdins_nest'
if (n_entries == 0) return
if (block_row < 1 .or. block_row > desc_nest%nrblocks .or. &
block_col < 1 .or. block_col > desc_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block indices')
return
end if
call psb_cdins(n_entries, entry_rows, entry_cols, desc_nest%descs(block_row, block_col), info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins')
end subroutine psb_lcdins_nest_rc
! psb_lcdins_nest_c: col-only form, lpk_ nz
!
! Registers nz global column indices ja into the descriptor for
! block column blk_j across all row-blocks (descs(i, blk_j) for
! i = 1..nrblocks). mask and lidx are forwarded to psb_cdins.
subroutine psb_lcdins_nest_c(block_col, n_entries, entry_cols, desc_nest, info, mask, lidx)
integer(psb_ipk_), intent(in) :: block_col
integer(psb_lpk_), intent(in) :: n_entries, entry_cols(:)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional, target :: mask(:)
integer(psb_ipk_), intent(in), optional :: lidx(:)
integer(psb_ipk_) :: i_block_row, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_cdins_nest'
if (n_entries == 0) return
if (block_col < 1 .or. block_col > desc_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block column index')
return
end if
do i_block_row = 1, desc_nest%nrblocks
local_info = psb_success_
if (present(mask)) then
if (present(lidx)) then
call psb_cdins(n_entries, entry_cols, desc_nest%descs(i_block_row, block_col), local_info, mask=mask, lidx=lidx)
else
call psb_cdins(n_entries, entry_cols, desc_nest%descs(i_block_row, block_col), local_info, mask=mask)
end if
else
if (present(lidx)) then
call psb_cdins(n_entries, entry_cols, desc_nest%descs(i_block_row, block_col), local_info, lidx=lidx)
else
call psb_cdins(n_entries, entry_cols, desc_nest%descs(i_block_row, block_col), local_info)
end if
end if
if (local_info /= psb_success_ .and. info == psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins')
end if
end do
end subroutine psb_lcdins_nest_c
! psb_cdasb_nest: Finalize all nested descriptors
!
! Calls psb_cdasb on all block descriptors in the nested structure.
! This must be called after all psb_cdins_nest calls and
! before psb_spasb_nest.
!
! Arguments:
! desc_nest - nested descriptor (input/output)
! info - error code (output)
subroutine psb_cdasb_nest(desc_nest, info)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col
character(len=20) :: name
info = psb_success_
name = 'psb_cdasb_nest'
do i_block_row = 1, desc_nest%nrblocks
do j_block_col = 1, desc_nest%ncblocks
call psb_cdasb(desc_nest%descs(i_block_row, j_block_col), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdasb')
return
end if
end do
end do
end subroutine psb_cdasb_nest
! psb_cdfree_nest: Free all nested descriptors
!
! Calls psb_cdfree on every block descriptor in the nested
! structure, then deallocates the descriptor array and resets
! nrblocks/ncblocks to 0. Mirrors what psb_cdfree does for a
! single psb_desc_type.
!
! Arguments:
! desc_nest - nested descriptor (input/output)
! info - error code (output)
!
subroutine psb_cdfree_nest(desc_nest, info)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
character(len=20) :: name
info = psb_success_
name = 'psb_cdfree_nest'
call desc_nest%free(info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_desc_nest_free')
end subroutine psb_cdfree_nest
! psb_cdcpy_nest: Deep copy (clone) a nested descriptor
!
! Allocates desc_out and clones each block descriptor from desc_in
! using psb_cdcpy, preserving the full row/column block structure.
!
! Arguments:
! desc_in - source nested descriptor (inout clone may need to read internal state)
! desc_out - destination nested descriptor (output)
! info - error code (output)
subroutine psb_cdcpy_nest(desc_in, desc_out, info)
type(psb_desc_nest_type), intent(inout) :: desc_in
type(psb_desc_nest_type), intent(out) :: desc_out
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col
character(len=20) :: name
info = psb_success_
name = 'psb_cdcpy_nest'
desc_out%nrblocks = desc_in%nrblocks
desc_out%ncblocks = desc_in%ncblocks
allocate(desc_out%descs(desc_in%nrblocks, desc_in%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
do i_block_row = 1, desc_in%nrblocks
do j_block_col = 1, desc_in%ncblocks
call psb_cdcpy(desc_in%descs(i_block_row, j_block_col), desc_out%descs(i_block_row, j_block_col), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdcpy')
return
end if
end do
end do
end subroutine psb_cdcpy_nest
! psb_cdprt_nest: Print all block descriptors (debugging)
!
! Loops over all (i,j) block descriptors in the nested structure
! and calls psb_cdprt on each, prefixing the output with the block
! index. All optional arguments are forwarded unchanged.
!
! Arguments:
! iout - output unit
! desc_nest - nested descriptor (input)
! glob - passed to psb_cdprt (optional)
! short - passed to psb_cdprt (optional)
! verbosity - passed to psb_cdprt (optional)
subroutine psb_cdprt_nest(iout, desc_nest, glob, short, verbosity)
integer(psb_ipk_), intent(in) :: iout
type(psb_desc_nest_type), intent(in) :: desc_nest
logical, intent(in), optional :: glob, short
integer(psb_ipk_), intent(in), optional :: verbosity
integer(psb_ipk_) :: i_block_row, j_block_col
do i_block_row = 1, desc_nest%nrblocks
do j_block_col = 1, desc_nest%ncblocks
write(iout, '(a,i0,a,i0,a)') 'Block (', i_block_row, ',', j_block_col, '):'
if (present(glob)) then
if (present(short)) then
if (present(verbosity)) then
call psb_cdprt(iout, desc_nest%descs(i_block_row,j_block_col), glob=glob, short=short, verbosity=verbosity)
else
call psb_cdprt(iout, desc_nest%descs(i_block_row,j_block_col), glob=glob, short=short)
end if
else
if (present(verbosity)) then
call psb_cdprt(iout, desc_nest%descs(i_block_row,j_block_col), glob=glob, verbosity=verbosity)
else
call psb_cdprt(iout, desc_nest%descs(i_block_row,j_block_col), glob=glob)
end if
end if
else
if (present(short)) then
if (present(verbosity)) then
call psb_cdprt(iout, desc_nest%descs(i_block_row,j_block_col), short=short, verbosity=verbosity)
else
call psb_cdprt(iout, desc_nest%descs(i_block_row,j_block_col), short=short)
end if
else
if (present(verbosity)) then
call psb_cdprt(iout, desc_nest%descs(i_block_row,j_block_col), verbosity=verbosity)
else
call psb_cdprt(iout, desc_nest%descs(i_block_row,j_block_col))
end if
end if
end if
end do
end do
end subroutine psb_cdprt_nest
! psb_cd_nest_compose (P1 / step 6a)
!
! Compose the per-field block descriptors into a SINGLE global psb_desc_type
! describing the whole nested operator. The global index space is the
! concatenation of the field spaces:
!
! global index = offset_k + (field-k global index), offset_k = sum_{m<k} n_m
!
! Each process owns its slice of every field; the global halo is the union of
! the per-field halos, each remapped by its field offset. Once composed, the
! nested operator can be presented to Krylov/AMG4PSBLAS as a standard
! distributed matrix/vector (MATNEST-style).
!
! Assumes a square block structure (nrblocks == ncblocks); field k is taken to
! be column k, whose distribution and halo are read from descs(1,k) (all
! descs(i,k) for fixed k share the same column space).
!
subroutine psb_cd_nest_compose(desc_grid, desc_global, info)
type(psb_desc_nest_type), intent(in) :: desc_grid
type(psb_desc_type), intent(out) :: desc_global
integer(psb_ipk_), intent(out) :: info
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: n_fields, i_field, i_loc, n_owned, n_local, owned_count, halo_count
integer(psb_lpk_) :: global_idx
integer(psb_lpk_), allocatable :: field_offset(:), owned_global(:), halo_global(:)
character(len=24) :: name
info = psb_success_
name = 'psb_cd_nest_compose'
if (.not. allocated(desc_grid%descs)) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='nested descriptor not allocated')
return
end if
if (desc_grid%nrblocks /= desc_grid%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='nested block structure must be square')
return
end if
n_fields = desc_grid%ncblocks
ctxt = desc_grid%descs(1,1)%get_context()
! 1. field offsets in the global numbering
allocate(field_offset(n_fields+1), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
field_offset(1) = 0
do i_field = 1, n_fields
field_offset(i_field+1) = field_offset(i_field) + desc_grid%descs(1,i_field)%get_global_rows()
end do
! 2. local owned global indices: U_k { offset_k + l2g(owned of field i_field) }
owned_count = 0
do i_field = 1, n_fields
owned_count = owned_count + desc_grid%descs(1,i_field)%get_local_rows()
end do
allocate(owned_global(owned_count), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
owned_count = 0
do i_field = 1, n_fields
n_owned = desc_grid%descs(1,i_field)%get_local_rows()
do i_loc = 1, n_owned
call desc_grid%descs(1,i_field)%l2g(i_loc, global_idx, info)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='l2g'); return
end if
owned_count = owned_count + 1
owned_global(owned_count) = field_offset(i_field) + global_idx
end do
end do
! 3. allocate the global descriptor with the concatenated ownership
call psb_cdall(ctxt, desc_global, info, vl=owned_global)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdall'); return
end if
! 4. global halo: U_k { offset_k + l2g(halo of field i_field) }
! field-i_field halo local indices are local_rows+1 .. local_cols
halo_count = 0
do i_field = 1, n_fields
halo_count = halo_count + (desc_grid%descs(1,i_field)%get_local_cols() &
& - desc_grid%descs(1,i_field)%get_local_rows())
end do
if (halo_count > 0) then
allocate(halo_global(halo_count), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
halo_count = 0
do i_field = 1, n_fields
n_owned = desc_grid%descs(1,i_field)%get_local_rows()
n_local = desc_grid%descs(1,i_field)%get_local_cols()
do i_loc = n_owned + 1, n_local
call desc_grid%descs(1,i_field)%l2g(i_loc, global_idx, info)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='l2g halo'); return
end if
halo_count = halo_count + 1
halo_global(halo_count) = field_offset(i_field) + global_idx
end do
end do
call psb_cdins(halo_count, halo_global, desc_global, info)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins'); return
end if
end if
! 5. assemble: build the global halo communication schedule (union halo)
call psb_cdasb(desc_global, info)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdasb'); return
end if
end subroutine psb_cd_nest_compose
end module psb_cd_nest_tools_mod

@ -0,0 +1,414 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific prior written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_d_nest_builder_mod.F90
!
! Module: psb_d_nest_builder_mod
! Author: Simone Staccone (Stack-1)
!
! User-friendly frontend to build a nested (MATNEST) operator without manually
! managing per-field descriptors, the union halo, composition and setup.
!
! All the boilerplate (identical for every nested operator) is hidden behind a
! single type, psb_d_nest_matrix, with the usual PSBLAS init/ins/asb pattern:
!
! type(psb_d_nest_matrix) :: nested_matrix
! call nested_matrix%init(ctxt, [n1, n2], info) ! 2 fields of global size n1, n2
! call nested_matrix%ins(1,1, n, rows, cols, vals, info) ! values of block (1,1) = A
! call nested_matrix%ins(1,2, n, rows, cols, vals, info) ! values of block (1,2) = B^T
! call nested_matrix%ins(2,1, n, rows, cols, vals, info) ! values of block (2,1) = B
! ... ! (absent blocks = not inserted)
! call nested_matrix%asb(info) ! assemble: builds a_glob, desc_glob
!
! ! from here on nested_matrix%a_glob and nested_matrix%desc_glob are an
! ! ordinary distributed matrix/descriptor:
! call psb_geall(x, nested_matrix%desc_glob, info)
! call psb_krylov('CG', nested_matrix%a_glob, prec, b, x, eps, nested_matrix%desc_glob, info, ...)
!
! Indices: in ins(block_row, block_col, ...) the rows live in the index space of
! field block_row, the columns in the index space of field block_col (GLOBAL
! field indices, 1..field_size). Each process inserts only the rows it owns
! (PSBLAS convention). Off-diagonal blocks may be rectangular.
!
! NOTE: after asb the object holds consistent internal pointers (a_glob%a points
! to block_storage / grid_desc): do not copy/move the object after assembly.
!
module psb_d_nest_builder_mod
use psb_const_mod
use psb_error_mod, only : psb_errpush
use psb_penv_mod, only : psb_ctxt_type, psb_info
use psb_desc_mod, only : psb_desc_type
use psb_d_mat_mod, only : psb_dspmat_type
use psb_d_base_mat_mod, only : psb_d_base_sparse_mat
use psb_cd_tools_mod, only : psb_cdall, psb_cdins, psb_cdasb
use psb_desc_nest_mod, only : psb_desc_nest_type
use psb_d_nest_mat_mod, only : psb_d_nest_sparse_mat
use psb_d_nest_base_mat_mod, only : psb_d_nest_base_mat, psb_d_nest_base_setup
use psb_cd_nest_tools_mod, only : psb_cd_nest_compose
use psb_d_nest_tools_mod, only : psb_d_nest_rect_block
implicit none
! growing triplet buffer for a single block
type :: psb_d_nest_block_buffer
integer(psb_ipk_) :: n_entries = 0
integer(psb_lpk_), allocatable :: entry_rows(:), entry_cols(:)
real(psb_dpk_), allocatable :: entry_vals(:)
end type psb_d_nest_block_buffer
type :: psb_d_nest_matrix
type(psb_ctxt_type) :: context
integer(psb_ipk_) :: n_fields = 0
logical :: assembled = .false.
! construction state
type(psb_desc_type), allocatable :: field_desc(:) ! one descriptor per field
type(psb_d_nest_block_buffer), allocatable :: block_buffer(:,:) ! triplets per block (i,j)
! products (owned; the pointers in a_glob%a point in here)
type(psb_d_nest_sparse_mat) :: block_storage
type(psb_desc_nest_type) :: grid_desc
type(psb_dspmat_type) :: a_glob ! the matrix to hand to Krylov
type(psb_desc_type) :: desc_glob ! the global descriptor
contains
procedure, pass(op) :: init => psb_d_nest_op_init
procedure, pass(op) :: ins => psb_d_nest_op_ins
procedure, pass(op) :: asb => psb_d_nest_op_asb
procedure, pass(op) :: free => psb_d_nest_op_free
! user-friendly queries on the field row distribution (no descriptor
! jargon needed: these replace field_desc(i)%get_local_rows() / %l2g(...))
procedure, pass(op) :: get_owned_rows => psb_d_nest_op_get_owned_rows
procedure, pass(op) :: get_owned_row_count => psb_d_nest_op_get_owned_row_count
end type psb_d_nest_matrix
private
public :: psb_d_nest_matrix
contains
! init: create one descriptor per field (block distribution from the global sizes)
subroutine psb_d_nest_op_init(op, context, field_sizes, info)
class(psb_d_nest_matrix), intent(inout) :: op
type(psb_ctxt_type), intent(in) :: context
integer(psb_lpk_), intent(in) :: field_sizes(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: my_rank, num_procs, n_fields, i_field, field_local_rows
integer(psb_lpk_) :: field_global_size
character(len=24) :: name
info = psb_success_
name = 'psb_d_nest_op_init'
call psb_info(context, my_rank, num_procs)
n_fields = size(field_sizes)
op%context = context
op%n_fields = n_fields
op%assembled = .false.
allocate(op%field_desc(n_fields), op%block_buffer(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do i_field = 1, n_fields
field_global_size = field_sizes(i_field)
! block distribution: field_global_size rows over num_procs processes (total size invariant)
field_local_rows = int(field_global_size / int(num_procs, psb_lpk_), psb_ipk_)
if (int(my_rank, psb_lpk_) < mod(field_global_size, int(num_procs, psb_lpk_))) &
& field_local_rows = field_local_rows + 1
call psb_cdall(context, op%field_desc(i_field), info, nl=field_local_rows)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdall'); return
end if
end do
end subroutine psb_d_nest_op_init
! ins: accumulate the triplets into block (block_row,block_col) and register the
! columns (field block_col index space) into that descriptor's union halo
subroutine psb_d_nest_op_ins(op, block_row, block_col, n_entries, entry_rows, entry_cols, entry_vals, info)
class(psb_d_nest_matrix), intent(inout) :: op
integer(psb_ipk_), intent(in) :: block_row, block_col, n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
real(psb_dpk_), intent(in) :: entry_vals(:)
integer(psb_ipk_), intent(out) :: info
character(len=24) :: name
info = psb_success_
name = 'psb_d_nest_op_ins'
if (op%assembled) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='operator already assembled'); return
end if
if (block_row < 1 .or. block_row > op%n_fields .or. &
& block_col < 1 .or. block_col > op%n_fields) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='block index out of range'); return
end if
if (n_entries <= 0) return
call block_buffer_append(op%block_buffer(block_row,block_col), n_entries, &
& entry_rows, entry_cols, entry_vals, info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
! the columns of block (block_row,block_col) live in field block_col ->
! register their indices into that descriptor's union halo
! (this also applies when block_col == block_row)
call psb_cdins(n_entries, entry_cols(1:n_entries), op%field_desc(block_col), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins'); return
end if
end subroutine psb_d_nest_op_ins
! asb: assemble the descriptors, build the blocks, compose the global
! descriptor, set up the operator and wrap it into a_glob.
! The optional type ('CSR'/'CSC'/'COO', default 'CSR') or mold (any
! class extending psb_d_base_sparse_mat, e.g. the psb_ext ELL/HLL or
! the psb_cuda device formats) selects the storage format of the blocks.
subroutine psb_d_nest_op_asb(op, info, type, mold)
class(psb_d_nest_matrix), intent(inout), target :: op
integer(psb_ipk_), intent(out) :: info
character(len=*), intent(in), optional :: type
class(psb_d_base_sparse_mat), intent(in), optional :: mold
type(psb_d_nest_base_mat) :: nest_operator
integer(psb_ipk_) :: n_fields, i_field, j_field
character(len=24) :: name
info = psb_success_
name = 'psb_d_nest_op_asb'
n_fields = op%n_fields
! 1) assemble the per-field descriptors (with the union halo accumulated in ins)
do i_field = 1, n_fields
call psb_cdasb(op%field_desc(i_field), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdasb'); return
end if
end do
! 2) build the local blocks (generally rectangular) from the triplets
op%block_storage%nrblocks = n_fields
op%block_storage%ncblocks = n_fields
allocate(op%block_storage%mats(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do j_field = 1, n_fields
do i_field = 1, n_fields
if (op%block_buffer(i_field,j_field)%n_entries > 0) then
call psb_d_nest_rect_block(op%block_storage%mats(i_field,j_field), &
& op%block_buffer(i_field,j_field)%n_entries, &
& op%block_buffer(i_field,j_field)%entry_rows, &
& op%block_buffer(i_field,j_field)%entry_cols, &
& op%block_buffer(i_field,j_field)%entry_vals, &
& op%field_desc(i_field), op%field_desc(j_field), info, &
& type=type, mold=mold)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='rect_block'); return
end if
end if
end do
end do
! 3) descriptor grid: descs(i,j) = descriptor of field j
op%grid_desc%nrblocks = n_fields
op%grid_desc%ncblocks = n_fields
allocate(op%grid_desc%descs(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do j_field = 1, n_fields
do i_field = 1, n_fields
call op%field_desc(j_field)%clone(op%grid_desc%descs(i_field,j_field), info)
end do
end do
! 4) composed global descriptor + operator setup
call psb_cd_nest_compose(op%grid_desc, op%desc_glob, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='cd_nest_compose'); return
end if
call psb_d_nest_base_setup(nest_operator, op%block_storage, op%grid_desc, op%desc_glob, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='nest_base_setup'); return
end if
! 5) wrap into the standard matrix object (the pointers keep pointing at op%*)
allocate(op%a_glob%a, source=nest_operator, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
call op%a_glob%set_nrows(op%desc_glob%get_local_rows())
call op%a_glob%set_ncols(op%desc_glob%get_local_cols())
call op%a_glob%set_asb()
! 6) the triplet buffers are no longer needed
do j_field = 1, n_fields
do i_field = 1, n_fields
call block_buffer_free(op%block_buffer(i_field,j_field))
end do
end do
op%assembled = .true.
end subroutine psb_d_nest_op_asb
! free: release everything
subroutine psb_d_nest_op_free(op, info)
class(psb_d_nest_matrix), intent(inout) :: op
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_field, j_field, local_info
info = psb_success_
if (allocated(op%block_buffer)) then
do j_field = 1, size(op%block_buffer,2)
do i_field = 1, size(op%block_buffer,1)
call block_buffer_free(op%block_buffer(i_field,j_field))
end do
end do
deallocate(op%block_buffer, stat=local_info)
end if
if (op%assembled) then
call op%a_glob%free()
call op%desc_glob%free(local_info)
call op%grid_desc%free(local_info)
end if
if (allocated(op%field_desc)) then
do i_field = 1, size(op%field_desc)
call op%field_desc(i_field)%free(local_info)
end do
deallocate(op%field_desc, stat=local_info)
end if
op%n_fields = 0
op%assembled = .false.
end subroutine psb_d_nest_op_free
! get_owned_rows: GLOBAL indices (in the field index space, 1..field size)
! of the rows of field i_field owned by this process. This is the list of
! rows the process is expected to insert through ins:
!
! my_rows = nested_matrix%get_owned_rows(1)
! do k = 1, size(my_rows)
! global_row = my_rows(k)
! ...
!
! An empty array is returned for an out-of-range field index.
function psb_d_nest_op_get_owned_rows(op, i_field) result(owned_global_rows)
class(psb_d_nest_matrix), intent(in) :: op
integer(psb_ipk_), intent(in) :: i_field
integer(psb_lpk_), allocatable :: owned_global_rows(:)
if ((i_field < 1) .or. (i_field > op%n_fields) .or. &
& (.not. allocated(op%field_desc))) then
allocate(owned_global_rows(0))
return
end if
owned_global_rows = op%field_desc(i_field)%get_global_indices(owned=.true.)
end function psb_d_nest_op_get_owned_rows
! get_owned_row_count: how many rows of field i_field this process owns
function psb_d_nest_op_get_owned_row_count(op, i_field) result(owned_row_count)
class(psb_d_nest_matrix), intent(in) :: op
integer(psb_ipk_), intent(in) :: i_field
integer(psb_ipk_) :: owned_row_count
owned_row_count = 0
if ((i_field < 1) .or. (i_field > op%n_fields) .or. &
& (.not. allocated(op%field_desc))) return
owned_row_count = op%field_desc(i_field)%get_local_rows()
end function psb_d_nest_op_get_owned_row_count
!-----------------------------------------------------------------
! private helpers: growing triplet buffer
!-----------------------------------------------------------------
subroutine block_buffer_append(buffer, n_entries, entry_rows, entry_cols, entry_vals, info)
type(psb_d_nest_block_buffer), intent(inout) :: buffer
integer(psb_ipk_), intent(in) :: n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
real(psb_dpk_), intent(in) :: entry_vals(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: required_size
info = psb_success_
required_size = buffer%n_entries + n_entries
call ensure_capacity_lpk(buffer%entry_rows, required_size, info); if (info /= 0) return
call ensure_capacity_lpk(buffer%entry_cols, required_size, info); if (info /= 0) return
call ensure_capacity_val(buffer%entry_vals, required_size, info); if (info /= 0) return
buffer%entry_rows(buffer%n_entries+1:required_size) = entry_rows(1:n_entries)
buffer%entry_cols(buffer%n_entries+1:required_size) = entry_cols(1:n_entries)
buffer%entry_vals(buffer%n_entries+1:required_size) = entry_vals(1:n_entries)
buffer%n_entries = required_size
end subroutine block_buffer_append
subroutine ensure_capacity_lpk(array, required_size, info)
integer(psb_lpk_), allocatable, intent(inout) :: array(:)
integer(psb_ipk_), intent(in) :: required_size
integer(psb_ipk_), intent(out) :: info
integer(psb_lpk_), allocatable :: grown(:)
integer(psb_ipk_) :: capacity
info = 0
if (.not. allocated(array)) then
allocate(array(max(required_size,16)), stat=info); return
end if
capacity = size(array)
if (required_size <= capacity) return
allocate(grown(max(2*capacity, required_size)), stat=info); if (info /= 0) return
grown(1:capacity) = array(1:capacity)
call move_alloc(grown, array)
end subroutine ensure_capacity_lpk
subroutine ensure_capacity_val(array, required_size, info)
real(psb_dpk_), allocatable, intent(inout) :: array(:)
integer(psb_ipk_), intent(in) :: required_size
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_), allocatable :: grown(:)
integer(psb_ipk_) :: capacity
info = 0
if (.not. allocated(array)) then
allocate(array(max(required_size,16)), stat=info); return
end if
capacity = size(array)
if (required_size <= capacity) return
allocate(grown(max(2*capacity, required_size)), stat=info); if (info /= 0) return
grown(1:capacity) = array(1:capacity)
call move_alloc(grown, array)
end subroutine ensure_capacity_val
subroutine block_buffer_free(buffer)
type(psb_d_nest_block_buffer), intent(inout) :: buffer
if (allocated(buffer%entry_rows)) deallocate(buffer%entry_rows)
if (allocated(buffer%entry_cols)) deallocate(buffer%entry_cols)
if (allocated(buffer%entry_vals)) deallocate(buffer%entry_vals)
buffer%n_entries = 0
end subroutine block_buffer_free
end module psb_d_nest_builder_mod

@ -0,0 +1,364 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! Module: psb_d_nest_tools_mod
! Author: Simone Staccone (Stack-1)
!
! Nested-specific assembly wrappers for PSBLAS3 double precision matrix and vector routines
!
module psb_d_nest_tools_mod
use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_dpk_, psb_success_, psb_err_alloc_dealloc_, &
psb_err_invalid_input_, psb_err_from_subroutine_, &
psb_dupl_add_, psb_dupl_ovwrt_, psb_dupl_err_, psb_ctxt_type
use psb_error_mod, only : psb_errpush
use psb_d_tools_mod, only : psb_spall, psb_spins, psb_spasb, psb_spfree, psb_sprn, &
psb_geall, psb_geins, psb_geasb, psb_gefree
use psb_desc_nest_mod, only : psb_desc_nest_type
use psb_d_nest_mat_mod, only : psb_d_nest_sparse_mat
use psb_d_mat_mod, only : psb_dspmat_type
use psb_d_base_mat_mod, only : psb_d_coo_sparse_mat, psb_d_base_sparse_mat
use psb_desc_mod, only : psb_desc_type
implicit none
private
public :: psb_spall_nest, psb_spins_nest, psb_spasb_nest, psb_spfree_nest, psb_sprn_nest, &
psb_d_nest_rect_block
contains
! Allocates all (nrblocks x ncblocks) sparse matrix blocks
! and marks all as present. psb_spins_nest lazy-allocates individual
! blocks on first insertion; call psb_spall_nest instead when the
! full block structure is known up front.
subroutine psb_spall_nest(a_nest, desc_nest, info, nnz)
type(psb_d_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nnz
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spall_nest'
a_nest%nrblocks = desc_nest%nrblocks
a_nest%ncblocks = desc_nest%ncblocks
if (.not. allocated(a_nest%mats)) then
allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
end if
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
local_info = psb_success_
if (present(nnz)) then
call psb_spall(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info, nnz=nnz)
else
call psb_spall(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
end if
if (local_info /= psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall')
return
end if
end do
end do
end subroutine psb_spall_nest
! Inserts nz entries into block (blk_i, blk_j) of the nested matrix.
! The block is lazy-allocated on first insertion if psb_spall_nest
! was not called first.
subroutine psb_spins_nest(block_row, block_col, n_entries, entry_rows, entry_cols, entry_vals, a_nest, desc_nest, info)
integer(psb_ipk_), intent(in) :: block_row, block_col, n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
real(psb_dpk_), intent(in) :: entry_vals(:)
type(psb_d_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnz_estimate
character(len=20) :: name
info = psb_success_
name = 'psb_spins_nest'
if (n_entries == 0) return
if (block_row < 1 .or. block_row > a_nest%nrblocks .or. &
block_col < 1 .or. block_col > a_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block indices')
return
end if
if (.not. allocated(a_nest%mats)) then
allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
end if
if (.not. allocated(a_nest%mats(block_row, block_col)%a)) then
! Estimate nnz: use n_entries + 50% buffer for future insertions
nnz_estimate = max(n_entries, 10) + n_entries / 2
call psb_spall(a_nest%mats(block_row, block_col), &
desc_nest%descs(block_row, block_col), info, nnz=nnz_estimate)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall')
return
end if
end if
call psb_spins(n_entries, entry_rows, entry_cols, entry_vals, a_nest%mats(block_row, block_col), &
desc_nest%descs(block_row, block_col), info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spins')
end subroutine psb_spins_nest
! Calls psb_spasb on all present block matrices.
! Must be called after psb_cdasb_nest.
subroutine psb_spasb_nest(a_nest, desc_nest, info, dupl)
type(psb_d_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: dupl
integer(psb_ipk_) :: i_block_row, j_block_col, dupl_mode, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spasb_nest'
dupl_mode = psb_dupl_add_
if (present(dupl)) dupl_mode = dupl
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
if (dupl_mode == psb_dupl_add_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_add_)
else if (dupl_mode == psb_dupl_ovwrt_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_ovwrt_)
else if (dupl_mode == psb_dupl_err_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_err_)
else
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info)
end if
if (local_info /= psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spasb')
return
end if
end if
end do
end do
end subroutine psb_spasb_nest
! Calls psb_spfree on every present block, then deallocates the
! mats array and resets nrblocks/ncblocks to 0.
subroutine psb_spfree_nest(a_nest, desc_nest, info)
type(psb_d_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spfree_nest'
if (allocated(a_nest%mats)) then
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
call psb_spfree(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
if (local_info /= psb_success_ .and. info == psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spfree')
end if
end if
end do
end do
deallocate(a_nest%mats, stat=local_info)
if (local_info /= 0 .and. info == psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
end if
end if
a_nest%nrblocks = 0
a_nest%ncblocks = 0
end subroutine psb_spfree_nest
! Calls psb_sprn on every present block matrix, resetting it to
! the build state while preserving the sparsity pattern.
subroutine psb_sprn_nest(a_nest, desc_nest, info, clear)
type(psb_d_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional :: clear
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_sprn_nest'
if (.not. allocated(a_nest%mats)) return
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
if (present(clear)) then
call psb_sprn(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info, clear=clear)
else
call psb_sprn(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
end if
if (local_info /= psb_success_ .and. info == psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_sprn')
end if
end if
end do
end do
end subroutine psb_sprn_nest
! psb_d_nest_rect_block
!
! Build a local GENERAL (possibly rectangular) block A(i,j) of a nested
! operator, with rows in field i and columns in field j (field i /= field j,
! |field i| /= |field j| allowed). Rows are localized against the field-i
! (row) descriptor, columns against the field-j (column) descriptor which
! must already carry the union halo of column j (cdall + cdins(all column-j
! blocks' columns) + cdasb). The result is a CSR block of shape
! (field-i owned rows) x (field-j local cols incl. halo)
! consumable directly by the nested csmv (psb_d_nest_base_mat).
!
! A single-descriptor psb_spall/psb_spasb cannot express row-field /= col-field
! (it would force rows and columns into the same index space), hence the
! explicit COO build with separate row/column localization.
!
! Arguments (this process's local contribution):
! blk (out) the assembled block (CSR)
! nz number of local entries
! ia_glob(:) GLOBAL field-i row indices (owned by this process)
! ja_glob(:) GLOBAL field-j column indices
! val(:) values
! desc_row field-i descriptor (rows)
! desc_col field-j descriptor (columns, with union halo)
!
subroutine psb_d_nest_rect_block(blk, nz, ia_glob, ja_glob, val, desc_row, desc_col, info, type, mold)
type(psb_dspmat_type), intent(out) :: blk
integer(psb_ipk_), intent(in) :: nz
integer(psb_lpk_), intent(in) :: ia_glob(:), ja_glob(:)
real(psb_dpk_), intent(in) :: val(:)
type(psb_desc_type), intent(in) :: desc_row, desc_col
integer(psb_ipk_), intent(out) :: info
character(len=*), intent(in), optional :: type ! base storage format (default 'CSR')
class(psb_d_base_sparse_mat), intent(in), optional :: mold ! any format, e.g. psb_ext ELL/HLL
type(psb_d_coo_sparse_mat) :: coo_block
integer(psb_ipk_) :: k_entry, n_loc_rows, n_loc_cols, loc_row, loc_col
character(len=24) :: name
info = psb_success_
name = 'psb_d_nest_rect_block'
n_loc_rows = desc_row%get_local_rows() ! owned rows of field i
n_loc_cols = desc_col%get_local_cols() ! field-j local cols (owned + halo)
call coo_block%allocate(n_loc_rows, n_loc_cols, nz)
do k_entry = 1, nz
call desc_row%g2l(ia_glob(k_entry), loc_row, info)
if (info /= 0 .or. loc_row < 1 .or. loc_row > n_loc_rows) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='row not owned / not localizable')
return
end if
call desc_col%g2l(ja_glob(k_entry), loc_col, info)
if (info /= 0 .or. loc_col < 1 .or. loc_col > n_loc_cols) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='column not in field-j descriptor (missing from union halo)')
return
end if
coo_block%ia(k_entry) = loc_row
coo_block%ja(k_entry) = loc_col
coo_block%val(k_entry) = val(k_entry)
end do
call coo_block%set_nzeros(nz)
call coo_block%set_dupl(psb_dupl_add_)
call coo_block%fix(info)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='coo fix'); return
end if
call blk%mv_from(coo_block)
if (present(mold)) then
call blk%cscnv(info, mold=mold)
else if (present(type)) then
call blk%cscnv(info, type=type)
else
call blk%cscnv(info, type='CSR')
end if
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='cscnv'); return
end if
end subroutine psb_d_nest_rect_block
end module psb_d_nest_tools_mod

@ -0,0 +1,414 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific prior written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_s_nest_builder_mod.F90
!
! Module: psb_s_nest_builder_mod
! Author: Simone Staccone (Stack-1)
!
! User-friendly frontend to build a nested (MATNEST) operator without manually
! managing per-field descriptors, the union halo, composition and setup.
!
! All the boilerplate (identical for every nested operator) is hidden behind a
! single type, psb_s_nest_matrix, with the usual PSBLAS init/ins/asb pattern:
!
! type(psb_s_nest_matrix) :: nested_matrix
! call nested_matrix%init(ctxt, [n1, n2], info) ! 2 fields of global size n1, n2
! call nested_matrix%ins(1,1, n, rows, cols, vals, info) ! values of block (1,1) = A
! call nested_matrix%ins(1,2, n, rows, cols, vals, info) ! values of block (1,2) = B^T
! call nested_matrix%ins(2,1, n, rows, cols, vals, info) ! values of block (2,1) = B
! ... ! (absent blocks = not inserted)
! call nested_matrix%asb(info) ! assemble: builds a_glob, desc_glob
!
! ! from here on nested_matrix%a_glob and nested_matrix%desc_glob are an
! ! ordinary distributed matrix/descriptor:
! call psb_geall(x, nested_matrix%desc_glob, info)
! call psb_krylov('CG', nested_matrix%a_glob, prec, b, x, eps, nested_matrix%desc_glob, info, ...)
!
! Indices: in ins(block_row, block_col, ...) the rows live in the index space of
! field block_row, the columns in the index space of field block_col (GLOBAL
! field indices, 1..field_size). Each process inserts only the rows it owns
! (PSBLAS convention). Off-diagonal blocks may be rectangular.
!
! NOTE: after asb the object holds consistent internal pointers (a_glob%a points
! to block_storage / grid_desc): do not copy/move the object after assembly.
!
module psb_s_nest_builder_mod
use psb_const_mod
use psb_error_mod, only : psb_errpush
use psb_penv_mod, only : psb_ctxt_type, psb_info
use psb_desc_mod, only : psb_desc_type
use psb_s_mat_mod, only : psb_sspmat_type
use psb_s_base_mat_mod, only : psb_s_base_sparse_mat
use psb_cd_tools_mod, only : psb_cdall, psb_cdins, psb_cdasb
use psb_desc_nest_mod, only : psb_desc_nest_type
use psb_s_nest_mat_mod, only : psb_s_nest_sparse_mat
use psb_s_nest_base_mat_mod, only : psb_s_nest_base_mat, psb_s_nest_base_setup
use psb_cd_nest_tools_mod, only : psb_cd_nest_compose
use psb_s_nest_tools_mod, only : psb_s_nest_rect_block
implicit none
! growing triplet buffer for a single block
type :: psb_s_nest_block_buffer
integer(psb_ipk_) :: n_entries = 0
integer(psb_lpk_), allocatable :: entry_rows(:), entry_cols(:)
real(psb_spk_), allocatable :: entry_vals(:)
end type psb_s_nest_block_buffer
type :: psb_s_nest_matrix
type(psb_ctxt_type) :: context
integer(psb_ipk_) :: n_fields = 0
logical :: assembled = .false.
! construction state
type(psb_desc_type), allocatable :: field_desc(:) ! one descriptor per field
type(psb_s_nest_block_buffer), allocatable :: block_buffer(:,:) ! triplets per block (i,j)
! products (owned; the pointers in a_glob%a point in here)
type(psb_s_nest_sparse_mat) :: block_storage
type(psb_desc_nest_type) :: grid_desc
type(psb_sspmat_type) :: a_glob ! the matrix to hand to Krylov
type(psb_desc_type) :: desc_glob ! the global descriptor
contains
procedure, pass(op) :: init => psb_s_nest_op_init
procedure, pass(op) :: ins => psb_s_nest_op_ins
procedure, pass(op) :: asb => psb_s_nest_op_asb
procedure, pass(op) :: free => psb_s_nest_op_free
! user-friendly queries on the field row distribution (no descriptor
! jargon needed: these replace field_desc(i)%get_local_rows() / %l2g(...))
procedure, pass(op) :: get_owned_rows => psb_s_nest_op_get_owned_rows
procedure, pass(op) :: get_owned_row_count => psb_s_nest_op_get_owned_row_count
end type psb_s_nest_matrix
private
public :: psb_s_nest_matrix
contains
! init: create one descriptor per field (block distribution from the global sizes)
subroutine psb_s_nest_op_init(op, context, field_sizes, info)
class(psb_s_nest_matrix), intent(inout) :: op
type(psb_ctxt_type), intent(in) :: context
integer(psb_lpk_), intent(in) :: field_sizes(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: my_rank, num_procs, n_fields, i_field, field_local_rows
integer(psb_lpk_) :: field_global_size
character(len=24) :: name
info = psb_success_
name = 'psb_s_nest_op_init'
call psb_info(context, my_rank, num_procs)
n_fields = size(field_sizes)
op%context = context
op%n_fields = n_fields
op%assembled = .false.
allocate(op%field_desc(n_fields), op%block_buffer(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do i_field = 1, n_fields
field_global_size = field_sizes(i_field)
! block distribution: field_global_size rows over num_procs processes (total size invariant)
field_local_rows = int(field_global_size / int(num_procs, psb_lpk_), psb_ipk_)
if (int(my_rank, psb_lpk_) < mod(field_global_size, int(num_procs, psb_lpk_))) &
& field_local_rows = field_local_rows + 1
call psb_cdall(context, op%field_desc(i_field), info, nl=field_local_rows)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdall'); return
end if
end do
end subroutine psb_s_nest_op_init
! ins: accumulate the triplets into block (block_row,block_col) and register the
! columns (field block_col index space) into that descriptor's union halo
subroutine psb_s_nest_op_ins(op, block_row, block_col, n_entries, entry_rows, entry_cols, entry_vals, info)
class(psb_s_nest_matrix), intent(inout) :: op
integer(psb_ipk_), intent(in) :: block_row, block_col, n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
real(psb_spk_), intent(in) :: entry_vals(:)
integer(psb_ipk_), intent(out) :: info
character(len=24) :: name
info = psb_success_
name = 'psb_s_nest_op_ins'
if (op%assembled) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='operator already assembled'); return
end if
if (block_row < 1 .or. block_row > op%n_fields .or. &
& block_col < 1 .or. block_col > op%n_fields) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='block index out of range'); return
end if
if (n_entries <= 0) return
call block_buffer_append(op%block_buffer(block_row,block_col), n_entries, &
& entry_rows, entry_cols, entry_vals, info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
! the columns of block (block_row,block_col) live in field block_col ->
! register their indices into that descriptor's union halo
! (this also applies when block_col == block_row)
call psb_cdins(n_entries, entry_cols(1:n_entries), op%field_desc(block_col), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins'); return
end if
end subroutine psb_s_nest_op_ins
! asb: assemble the descriptors, build the blocks, compose the global
! descriptor, set up the operator and wrap it into a_glob.
! The optional type ('CSR'/'CSC'/'COO', default 'CSR') or mold (any
! class extending psb_s_base_sparse_mat, e.g. the psb_ext ELL/HLL or
! the psb_cuda device formats) selects the storage format of the blocks.
subroutine psb_s_nest_op_asb(op, info, type, mold)
class(psb_s_nest_matrix), intent(inout), target :: op
integer(psb_ipk_), intent(out) :: info
character(len=*), intent(in), optional :: type
class(psb_s_base_sparse_mat), intent(in), optional :: mold
type(psb_s_nest_base_mat) :: nest_operator
integer(psb_ipk_) :: n_fields, i_field, j_field
character(len=24) :: name
info = psb_success_
name = 'psb_s_nest_op_asb'
n_fields = op%n_fields
! 1) assemble the per-field descriptors (with the union halo accumulated in ins)
do i_field = 1, n_fields
call psb_cdasb(op%field_desc(i_field), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdasb'); return
end if
end do
! 2) build the local blocks (generally rectangular) from the triplets
op%block_storage%nrblocks = n_fields
op%block_storage%ncblocks = n_fields
allocate(op%block_storage%mats(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do j_field = 1, n_fields
do i_field = 1, n_fields
if (op%block_buffer(i_field,j_field)%n_entries > 0) then
call psb_s_nest_rect_block(op%block_storage%mats(i_field,j_field), &
& op%block_buffer(i_field,j_field)%n_entries, &
& op%block_buffer(i_field,j_field)%entry_rows, &
& op%block_buffer(i_field,j_field)%entry_cols, &
& op%block_buffer(i_field,j_field)%entry_vals, &
& op%field_desc(i_field), op%field_desc(j_field), info, &
& type=type, mold=mold)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='rect_block'); return
end if
end if
end do
end do
! 3) descriptor grid: descs(i,j) = descriptor of field j
op%grid_desc%nrblocks = n_fields
op%grid_desc%ncblocks = n_fields
allocate(op%grid_desc%descs(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do j_field = 1, n_fields
do i_field = 1, n_fields
call op%field_desc(j_field)%clone(op%grid_desc%descs(i_field,j_field), info)
end do
end do
! 4) composed global descriptor + operator setup
call psb_cd_nest_compose(op%grid_desc, op%desc_glob, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='cd_nest_compose'); return
end if
call psb_s_nest_base_setup(nest_operator, op%block_storage, op%grid_desc, op%desc_glob, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='nest_base_setup'); return
end if
! 5) wrap into the standard matrix object (the pointers keep pointing at op%*)
allocate(op%a_glob%a, source=nest_operator, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
call op%a_glob%set_nrows(op%desc_glob%get_local_rows())
call op%a_glob%set_ncols(op%desc_glob%get_local_cols())
call op%a_glob%set_asb()
! 6) the triplet buffers are no longer needed
do j_field = 1, n_fields
do i_field = 1, n_fields
call block_buffer_free(op%block_buffer(i_field,j_field))
end do
end do
op%assembled = .true.
end subroutine psb_s_nest_op_asb
! free: release everything
subroutine psb_s_nest_op_free(op, info)
class(psb_s_nest_matrix), intent(inout) :: op
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_field, j_field, local_info
info = psb_success_
if (allocated(op%block_buffer)) then
do j_field = 1, size(op%block_buffer,2)
do i_field = 1, size(op%block_buffer,1)
call block_buffer_free(op%block_buffer(i_field,j_field))
end do
end do
deallocate(op%block_buffer, stat=local_info)
end if
if (op%assembled) then
call op%a_glob%free()
call op%desc_glob%free(local_info)
call op%grid_desc%free(local_info)
end if
if (allocated(op%field_desc)) then
do i_field = 1, size(op%field_desc)
call op%field_desc(i_field)%free(local_info)
end do
deallocate(op%field_desc, stat=local_info)
end if
op%n_fields = 0
op%assembled = .false.
end subroutine psb_s_nest_op_free
! get_owned_rows: GLOBAL indices (in the field index space, 1..field size)
! of the rows of field i_field owned by this process. This is the list of
! rows the process is expected to insert through ins:
!
! my_rows = nested_matrix%get_owned_rows(1)
! do k = 1, size(my_rows)
! global_row = my_rows(k)
! ...
!
! An empty array is returned for an out-of-range field index.
function psb_s_nest_op_get_owned_rows(op, i_field) result(owned_global_rows)
class(psb_s_nest_matrix), intent(in) :: op
integer(psb_ipk_), intent(in) :: i_field
integer(psb_lpk_), allocatable :: owned_global_rows(:)
if ((i_field < 1) .or. (i_field > op%n_fields) .or. &
& (.not. allocated(op%field_desc))) then
allocate(owned_global_rows(0))
return
end if
owned_global_rows = op%field_desc(i_field)%get_global_indices(owned=.true.)
end function psb_s_nest_op_get_owned_rows
! get_owned_row_count: how many rows of field i_field this process owns
function psb_s_nest_op_get_owned_row_count(op, i_field) result(owned_row_count)
class(psb_s_nest_matrix), intent(in) :: op
integer(psb_ipk_), intent(in) :: i_field
integer(psb_ipk_) :: owned_row_count
owned_row_count = 0
if ((i_field < 1) .or. (i_field > op%n_fields) .or. &
& (.not. allocated(op%field_desc))) return
owned_row_count = op%field_desc(i_field)%get_local_rows()
end function psb_s_nest_op_get_owned_row_count
!-----------------------------------------------------------------
! private helpers: growing triplet buffer
!-----------------------------------------------------------------
subroutine block_buffer_append(buffer, n_entries, entry_rows, entry_cols, entry_vals, info)
type(psb_s_nest_block_buffer), intent(inout) :: buffer
integer(psb_ipk_), intent(in) :: n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
real(psb_spk_), intent(in) :: entry_vals(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: required_size
info = psb_success_
required_size = buffer%n_entries + n_entries
call ensure_capacity_lpk(buffer%entry_rows, required_size, info); if (info /= 0) return
call ensure_capacity_lpk(buffer%entry_cols, required_size, info); if (info /= 0) return
call ensure_capacity_val(buffer%entry_vals, required_size, info); if (info /= 0) return
buffer%entry_rows(buffer%n_entries+1:required_size) = entry_rows(1:n_entries)
buffer%entry_cols(buffer%n_entries+1:required_size) = entry_cols(1:n_entries)
buffer%entry_vals(buffer%n_entries+1:required_size) = entry_vals(1:n_entries)
buffer%n_entries = required_size
end subroutine block_buffer_append
subroutine ensure_capacity_lpk(array, required_size, info)
integer(psb_lpk_), allocatable, intent(inout) :: array(:)
integer(psb_ipk_), intent(in) :: required_size
integer(psb_ipk_), intent(out) :: info
integer(psb_lpk_), allocatable :: grown(:)
integer(psb_ipk_) :: capacity
info = 0
if (.not. allocated(array)) then
allocate(array(max(required_size,16)), stat=info); return
end if
capacity = size(array)
if (required_size <= capacity) return
allocate(grown(max(2*capacity, required_size)), stat=info); if (info /= 0) return
grown(1:capacity) = array(1:capacity)
call move_alloc(grown, array)
end subroutine ensure_capacity_lpk
subroutine ensure_capacity_val(array, required_size, info)
real(psb_spk_), allocatable, intent(inout) :: array(:)
integer(psb_ipk_), intent(in) :: required_size
integer(psb_ipk_), intent(out) :: info
real(psb_spk_), allocatable :: grown(:)
integer(psb_ipk_) :: capacity
info = 0
if (.not. allocated(array)) then
allocate(array(max(required_size,16)), stat=info); return
end if
capacity = size(array)
if (required_size <= capacity) return
allocate(grown(max(2*capacity, required_size)), stat=info); if (info /= 0) return
grown(1:capacity) = array(1:capacity)
call move_alloc(grown, array)
end subroutine ensure_capacity_val
subroutine block_buffer_free(buffer)
type(psb_s_nest_block_buffer), intent(inout) :: buffer
if (allocated(buffer%entry_rows)) deallocate(buffer%entry_rows)
if (allocated(buffer%entry_cols)) deallocate(buffer%entry_cols)
if (allocated(buffer%entry_vals)) deallocate(buffer%entry_vals)
buffer%n_entries = 0
end subroutine block_buffer_free
end module psb_s_nest_builder_mod

@ -0,0 +1,364 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! Module: psb_s_nest_tools_mod
! Author: Simone Staccone (Stack-1)
!
! Nested-specific assembly wrappers for PSBLAS3 double precision matrix and vector routines
!
module psb_s_nest_tools_mod
use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_spk_, psb_success_, psb_err_alloc_dealloc_, &
psb_err_invalid_input_, psb_err_from_subroutine_, &
psb_dupl_add_, psb_dupl_ovwrt_, psb_dupl_err_, psb_ctxt_type
use psb_error_mod, only : psb_errpush
use psb_s_tools_mod, only : psb_spall, psb_spins, psb_spasb, psb_spfree, psb_sprn, &
psb_geall, psb_geins, psb_geasb, psb_gefree
use psb_desc_nest_mod, only : psb_desc_nest_type
use psb_s_nest_mat_mod, only : psb_s_nest_sparse_mat
use psb_s_mat_mod, only : psb_sspmat_type
use psb_s_base_mat_mod, only : psb_s_coo_sparse_mat, psb_s_base_sparse_mat
use psb_desc_mod, only : psb_desc_type
implicit none
private
public :: psb_spall_nest, psb_spins_nest, psb_spasb_nest, psb_spfree_nest, psb_sprn_nest, &
psb_s_nest_rect_block
contains
! Allocates all (nrblocks x ncblocks) sparse matrix blocks
! and marks all as present. psb_spins_nest lazy-allocates individual
! blocks on first insertion; call psb_spall_nest instead when the
! full block structure is known up front.
subroutine psb_spall_nest(a_nest, desc_nest, info, nnz)
type(psb_s_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nnz
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spall_nest'
a_nest%nrblocks = desc_nest%nrblocks
a_nest%ncblocks = desc_nest%ncblocks
if (.not. allocated(a_nest%mats)) then
allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
end if
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
local_info = psb_success_
if (present(nnz)) then
call psb_spall(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info, nnz=nnz)
else
call psb_spall(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
end if
if (local_info /= psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall')
return
end if
end do
end do
end subroutine psb_spall_nest
! Inserts nz entries into block (blk_i, blk_j) of the nested matrix.
! The block is lazy-allocated on first insertion if psb_spall_nest
! was not called first.
subroutine psb_spins_nest(block_row, block_col, n_entries, entry_rows, entry_cols, entry_vals, a_nest, desc_nest, info)
integer(psb_ipk_), intent(in) :: block_row, block_col, n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
real(psb_spk_), intent(in) :: entry_vals(:)
type(psb_s_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnz_estimate
character(len=20) :: name
info = psb_success_
name = 'psb_spins_nest'
if (n_entries == 0) return
if (block_row < 1 .or. block_row > a_nest%nrblocks .or. &
block_col < 1 .or. block_col > a_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block indices')
return
end if
if (.not. allocated(a_nest%mats)) then
allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
end if
if (.not. allocated(a_nest%mats(block_row, block_col)%a)) then
! Estimate nnz: use n_entries + 50% buffer for future insertions
nnz_estimate = max(n_entries, 10) + n_entries / 2
call psb_spall(a_nest%mats(block_row, block_col), &
desc_nest%descs(block_row, block_col), info, nnz=nnz_estimate)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall')
return
end if
end if
call psb_spins(n_entries, entry_rows, entry_cols, entry_vals, a_nest%mats(block_row, block_col), &
desc_nest%descs(block_row, block_col), info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spins')
end subroutine psb_spins_nest
! Calls psb_spasb on all present block matrices.
! Must be called after psb_cdasb_nest.
subroutine psb_spasb_nest(a_nest, desc_nest, info, dupl)
type(psb_s_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: dupl
integer(psb_ipk_) :: i_block_row, j_block_col, dupl_mode, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spasb_nest'
dupl_mode = psb_dupl_add_
if (present(dupl)) dupl_mode = dupl
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
if (dupl_mode == psb_dupl_add_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_add_)
else if (dupl_mode == psb_dupl_ovwrt_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_ovwrt_)
else if (dupl_mode == psb_dupl_err_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_err_)
else
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info)
end if
if (local_info /= psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spasb')
return
end if
end if
end do
end do
end subroutine psb_spasb_nest
! Calls psb_spfree on every present block, then deallocates the
! mats array and resets nrblocks/ncblocks to 0.
subroutine psb_spfree_nest(a_nest, desc_nest, info)
type(psb_s_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spfree_nest'
if (allocated(a_nest%mats)) then
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
call psb_spfree(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
if (local_info /= psb_success_ .and. info == psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spfree')
end if
end if
end do
end do
deallocate(a_nest%mats, stat=local_info)
if (local_info /= 0 .and. info == psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
end if
end if
a_nest%nrblocks = 0
a_nest%ncblocks = 0
end subroutine psb_spfree_nest
! Calls psb_sprn on every present block matrix, resetting it to
! the build state while preserving the sparsity pattern.
subroutine psb_sprn_nest(a_nest, desc_nest, info, clear)
type(psb_s_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional :: clear
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_sprn_nest'
if (.not. allocated(a_nest%mats)) return
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
if (present(clear)) then
call psb_sprn(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info, clear=clear)
else
call psb_sprn(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
end if
if (local_info /= psb_success_ .and. info == psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_sprn')
end if
end if
end do
end do
end subroutine psb_sprn_nest
! psb_s_nest_rect_block
!
! Build a local GENERAL (possibly rectangular) block A(i,j) of a nested
! operator, with rows in field i and columns in field j (field i /= field j,
! |field i| /= |field j| allowed). Rows are localized against the field-i
! (row) descriptor, columns against the field-j (column) descriptor which
! must already carry the union halo of column j (cdall + cdins(all column-j
! blocks' columns) + cdasb). The result is a CSR block of shape
! (field-i owned rows) x (field-j local cols incl. halo)
! consumable directly by the nested csmv (psb_s_nest_base_mat).
!
! A single-descriptor psb_spall/psb_spasb cannot express row-field /= col-field
! (it would force rows and columns into the same index space), hence the
! explicit COO build with separate row/column localization.
!
! Arguments (this process's local contribution):
! blk (out) the assembled block (CSR)
! nz number of local entries
! ia_glob(:) GLOBAL field-i row indices (owned by this process)
! ja_glob(:) GLOBAL field-j column indices
! val(:) values
! desc_row field-i descriptor (rows)
! desc_col field-j descriptor (columns, with union halo)
!
subroutine psb_s_nest_rect_block(blk, nz, ia_glob, ja_glob, val, desc_row, desc_col, info, type, mold)
type(psb_sspmat_type), intent(out) :: blk
integer(psb_ipk_), intent(in) :: nz
integer(psb_lpk_), intent(in) :: ia_glob(:), ja_glob(:)
real(psb_spk_), intent(in) :: val(:)
type(psb_desc_type), intent(in) :: desc_row, desc_col
integer(psb_ipk_), intent(out) :: info
character(len=*), intent(in), optional :: type ! base storage format (default 'CSR')
class(psb_s_base_sparse_mat), intent(in), optional :: mold ! any format, e.g. psb_ext ELL/HLL
type(psb_s_coo_sparse_mat) :: coo_block
integer(psb_ipk_) :: k_entry, n_loc_rows, n_loc_cols, loc_row, loc_col
character(len=24) :: name
info = psb_success_
name = 'psb_s_nest_rect_block'
n_loc_rows = desc_row%get_local_rows() ! owned rows of field i
n_loc_cols = desc_col%get_local_cols() ! field-j local cols (owned + halo)
call coo_block%allocate(n_loc_rows, n_loc_cols, nz)
do k_entry = 1, nz
call desc_row%g2l(ia_glob(k_entry), loc_row, info)
if (info /= 0 .or. loc_row < 1 .or. loc_row > n_loc_rows) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='row not owned / not localizable')
return
end if
call desc_col%g2l(ja_glob(k_entry), loc_col, info)
if (info /= 0 .or. loc_col < 1 .or. loc_col > n_loc_cols) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='column not in field-j descriptor (missing from union halo)')
return
end if
coo_block%ia(k_entry) = loc_row
coo_block%ja(k_entry) = loc_col
coo_block%val(k_entry) = val(k_entry)
end do
call coo_block%set_nzeros(nz)
call coo_block%set_dupl(psb_dupl_add_)
call coo_block%fix(info)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='coo fix'); return
end if
call blk%mv_from(coo_block)
if (present(mold)) then
call blk%cscnv(info, mold=mold)
else if (present(type)) then
call blk%cscnv(info, type=type)
else
call blk%cscnv(info, type='CSR')
end if
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='cscnv'); return
end if
end subroutine psb_s_nest_rect_block
end module psb_s_nest_tools_mod

@ -44,4 +44,6 @@ module psb_tools_mod
use psb_d_tools_mod
use psb_c_tools_mod
use psb_z_tools_mod
use psb_cd_nest_tools_mod
use psb_d_nest_tools_mod
end module psb_tools_mod

@ -0,0 +1,414 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific prior written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_z_nest_builder_mod.F90
!
! Module: psb_z_nest_builder_mod
! Author: Simone Staccone (Stack-1)
!
! User-friendly frontend to build a nested (MATNEST) operator without manually
! managing per-field descriptors, the union halo, composition and setup.
!
! All the boilerplate (identical for every nested operator) is hidden behind a
! single type, psb_z_nest_matrix, with the usual PSBLAS init/ins/asb pattern:
!
! type(psb_z_nest_matrix) :: nested_matrix
! call nested_matrix%init(ctxt, [n1, n2], info) ! 2 fields of global size n1, n2
! call nested_matrix%ins(1,1, n, rows, cols, vals, info) ! values of block (1,1) = A
! call nested_matrix%ins(1,2, n, rows, cols, vals, info) ! values of block (1,2) = B^T
! call nested_matrix%ins(2,1, n, rows, cols, vals, info) ! values of block (2,1) = B
! ... ! (absent blocks = not inserted)
! call nested_matrix%asb(info) ! assemble: builds a_glob, desc_glob
!
! ! from here on nested_matrix%a_glob and nested_matrix%desc_glob are an
! ! ordinary distributed matrix/descriptor:
! call psb_geall(x, nested_matrix%desc_glob, info)
! call psb_krylov('CG', nested_matrix%a_glob, prec, b, x, eps, nested_matrix%desc_glob, info, ...)
!
! Indices: in ins(block_row, block_col, ...) the rows live in the index space of
! field block_row, the columns in the index space of field block_col (GLOBAL
! field indices, 1..field_size). Each process inserts only the rows it owns
! (PSBLAS convention). Off-diagonal blocks may be rectangular.
!
! NOTE: after asb the object holds consistent internal pointers (a_glob%a points
! to block_storage / grid_desc): do not copy/move the object after assembly.
!
module psb_z_nest_builder_mod
use psb_const_mod
use psb_error_mod, only : psb_errpush
use psb_penv_mod, only : psb_ctxt_type, psb_info
use psb_desc_mod, only : psb_desc_type
use psb_z_mat_mod, only : psb_zspmat_type
use psb_z_base_mat_mod, only : psb_z_base_sparse_mat
use psb_cd_tools_mod, only : psb_cdall, psb_cdins, psb_cdasb
use psb_desc_nest_mod, only : psb_desc_nest_type
use psb_z_nest_mat_mod, only : psb_z_nest_sparse_mat
use psb_z_nest_base_mat_mod, only : psb_z_nest_base_mat, psb_z_nest_base_setup
use psb_cd_nest_tools_mod, only : psb_cd_nest_compose
use psb_z_nest_tools_mod, only : psb_z_nest_rect_block
implicit none
! growing triplet buffer for a single block
type :: psb_z_nest_block_buffer
integer(psb_ipk_) :: n_entries = 0
integer(psb_lpk_), allocatable :: entry_rows(:), entry_cols(:)
complex(psb_dpk_), allocatable :: entry_vals(:)
end type psb_z_nest_block_buffer
type :: psb_z_nest_matrix
type(psb_ctxt_type) :: context
integer(psb_ipk_) :: n_fields = 0
logical :: assembled = .false.
! construction state
type(psb_desc_type), allocatable :: field_desc(:) ! one descriptor per field
type(psb_z_nest_block_buffer), allocatable :: block_buffer(:,:) ! triplets per block (i,j)
! products (owned; the pointers in a_glob%a point in here)
type(psb_z_nest_sparse_mat) :: block_storage
type(psb_desc_nest_type) :: grid_desc
type(psb_zspmat_type) :: a_glob ! the matrix to hand to Krylov
type(psb_desc_type) :: desc_glob ! the global descriptor
contains
procedure, pass(op) :: init => psb_z_nest_op_init
procedure, pass(op) :: ins => psb_z_nest_op_ins
procedure, pass(op) :: asb => psb_z_nest_op_asb
procedure, pass(op) :: free => psb_z_nest_op_free
! user-friendly queries on the field row distribution (no descriptor
! jargon needed: these replace field_desc(i)%get_local_rows() / %l2g(...))
procedure, pass(op) :: get_owned_rows => psb_z_nest_op_get_owned_rows
procedure, pass(op) :: get_owned_row_count => psb_z_nest_op_get_owned_row_count
end type psb_z_nest_matrix
private
public :: psb_z_nest_matrix
contains
! init: create one descriptor per field (block distribution from the global sizes)
subroutine psb_z_nest_op_init(op, context, field_sizes, info)
class(psb_z_nest_matrix), intent(inout) :: op
type(psb_ctxt_type), intent(in) :: context
integer(psb_lpk_), intent(in) :: field_sizes(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: my_rank, num_procs, n_fields, i_field, field_local_rows
integer(psb_lpk_) :: field_global_size
character(len=24) :: name
info = psb_success_
name = 'psb_z_nest_op_init'
call psb_info(context, my_rank, num_procs)
n_fields = size(field_sizes)
op%context = context
op%n_fields = n_fields
op%assembled = .false.
allocate(op%field_desc(n_fields), op%block_buffer(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do i_field = 1, n_fields
field_global_size = field_sizes(i_field)
! block distribution: field_global_size rows over num_procs processes (total size invariant)
field_local_rows = int(field_global_size / int(num_procs, psb_lpk_), psb_ipk_)
if (int(my_rank, psb_lpk_) < mod(field_global_size, int(num_procs, psb_lpk_))) &
& field_local_rows = field_local_rows + 1
call psb_cdall(context, op%field_desc(i_field), info, nl=field_local_rows)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdall'); return
end if
end do
end subroutine psb_z_nest_op_init
! ins: accumulate the triplets into block (block_row,block_col) and register the
! columns (field block_col index space) into that descriptor's union halo
subroutine psb_z_nest_op_ins(op, block_row, block_col, n_entries, entry_rows, entry_cols, entry_vals, info)
class(psb_z_nest_matrix), intent(inout) :: op
integer(psb_ipk_), intent(in) :: block_row, block_col, n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
complex(psb_dpk_), intent(in) :: entry_vals(:)
integer(psb_ipk_), intent(out) :: info
character(len=24) :: name
info = psb_success_
name = 'psb_z_nest_op_ins'
if (op%assembled) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='operator already assembled'); return
end if
if (block_row < 1 .or. block_row > op%n_fields .or. &
& block_col < 1 .or. block_col > op%n_fields) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='block index out of range'); return
end if
if (n_entries <= 0) return
call block_buffer_append(op%block_buffer(block_row,block_col), n_entries, &
& entry_rows, entry_cols, entry_vals, info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
! the columns of block (block_row,block_col) live in field block_col ->
! register their indices into that descriptor's union halo
! (this also applies when block_col == block_row)
call psb_cdins(n_entries, entry_cols(1:n_entries), op%field_desc(block_col), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins'); return
end if
end subroutine psb_z_nest_op_ins
! asb: assemble the descriptors, build the blocks, compose the global
! descriptor, set up the operator and wrap it into a_glob.
! The optional type ('CSR'/'CSC'/'COO', default 'CSR') or mold (any
! class extending psb_z_base_sparse_mat, e.g. the psb_ext ELL/HLL or
! the psb_cuda device formats) selects the storage format of the blocks.
subroutine psb_z_nest_op_asb(op, info, type, mold)
class(psb_z_nest_matrix), intent(inout), target :: op
integer(psb_ipk_), intent(out) :: info
character(len=*), intent(in), optional :: type
class(psb_z_base_sparse_mat), intent(in), optional :: mold
type(psb_z_nest_base_mat) :: nest_operator
integer(psb_ipk_) :: n_fields, i_field, j_field
character(len=24) :: name
info = psb_success_
name = 'psb_z_nest_op_asb'
n_fields = op%n_fields
! 1) assemble the per-field descriptors (with the union halo accumulated in ins)
do i_field = 1, n_fields
call psb_cdasb(op%field_desc(i_field), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdasb'); return
end if
end do
! 2) build the local blocks (generally rectangular) from the triplets
op%block_storage%nrblocks = n_fields
op%block_storage%ncblocks = n_fields
allocate(op%block_storage%mats(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do j_field = 1, n_fields
do i_field = 1, n_fields
if (op%block_buffer(i_field,j_field)%n_entries > 0) then
call psb_z_nest_rect_block(op%block_storage%mats(i_field,j_field), &
& op%block_buffer(i_field,j_field)%n_entries, &
& op%block_buffer(i_field,j_field)%entry_rows, &
& op%block_buffer(i_field,j_field)%entry_cols, &
& op%block_buffer(i_field,j_field)%entry_vals, &
& op%field_desc(i_field), op%field_desc(j_field), info, &
& type=type, mold=mold)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='rect_block'); return
end if
end if
end do
end do
! 3) descriptor grid: descs(i,j) = descriptor of field j
op%grid_desc%nrblocks = n_fields
op%grid_desc%ncblocks = n_fields
allocate(op%grid_desc%descs(n_fields,n_fields), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
do j_field = 1, n_fields
do i_field = 1, n_fields
call op%field_desc(j_field)%clone(op%grid_desc%descs(i_field,j_field), info)
end do
end do
! 4) composed global descriptor + operator setup
call psb_cd_nest_compose(op%grid_desc, op%desc_glob, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='cd_nest_compose'); return
end if
call psb_z_nest_base_setup(nest_operator, op%block_storage, op%grid_desc, op%desc_glob, info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='nest_base_setup'); return
end if
! 5) wrap into the standard matrix object (the pointers keep pointing at op%*)
allocate(op%a_glob%a, source=nest_operator, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_; call psb_errpush(info, name); return
end if
call op%a_glob%set_nrows(op%desc_glob%get_local_rows())
call op%a_glob%set_ncols(op%desc_glob%get_local_cols())
call op%a_glob%set_asb()
! 6) the triplet buffers are no longer needed
do j_field = 1, n_fields
do i_field = 1, n_fields
call block_buffer_free(op%block_buffer(i_field,j_field))
end do
end do
op%assembled = .true.
end subroutine psb_z_nest_op_asb
! free: release everything
subroutine psb_z_nest_op_free(op, info)
class(psb_z_nest_matrix), intent(inout) :: op
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_field, j_field, local_info
info = psb_success_
if (allocated(op%block_buffer)) then
do j_field = 1, size(op%block_buffer,2)
do i_field = 1, size(op%block_buffer,1)
call block_buffer_free(op%block_buffer(i_field,j_field))
end do
end do
deallocate(op%block_buffer, stat=local_info)
end if
if (op%assembled) then
call op%a_glob%free()
call op%desc_glob%free(local_info)
call op%grid_desc%free(local_info)
end if
if (allocated(op%field_desc)) then
do i_field = 1, size(op%field_desc)
call op%field_desc(i_field)%free(local_info)
end do
deallocate(op%field_desc, stat=local_info)
end if
op%n_fields = 0
op%assembled = .false.
end subroutine psb_z_nest_op_free
! get_owned_rows: GLOBAL indices (in the field index space, 1..field size)
! of the rows of field i_field owned by this process. This is the list of
! rows the process is expected to insert through ins:
!
! my_rows = nested_matrix%get_owned_rows(1)
! do k = 1, size(my_rows)
! global_row = my_rows(k)
! ...
!
! An empty array is returned for an out-of-range field index.
function psb_z_nest_op_get_owned_rows(op, i_field) result(owned_global_rows)
class(psb_z_nest_matrix), intent(in) :: op
integer(psb_ipk_), intent(in) :: i_field
integer(psb_lpk_), allocatable :: owned_global_rows(:)
if ((i_field < 1) .or. (i_field > op%n_fields) .or. &
& (.not. allocated(op%field_desc))) then
allocate(owned_global_rows(0))
return
end if
owned_global_rows = op%field_desc(i_field)%get_global_indices(owned=.true.)
end function psb_z_nest_op_get_owned_rows
! get_owned_row_count: how many rows of field i_field this process owns
function psb_z_nest_op_get_owned_row_count(op, i_field) result(owned_row_count)
class(psb_z_nest_matrix), intent(in) :: op
integer(psb_ipk_), intent(in) :: i_field
integer(psb_ipk_) :: owned_row_count
owned_row_count = 0
if ((i_field < 1) .or. (i_field > op%n_fields) .or. &
& (.not. allocated(op%field_desc))) return
owned_row_count = op%field_desc(i_field)%get_local_rows()
end function psb_z_nest_op_get_owned_row_count
!-----------------------------------------------------------------
! private helpers: growing triplet buffer
!-----------------------------------------------------------------
subroutine block_buffer_append(buffer, n_entries, entry_rows, entry_cols, entry_vals, info)
type(psb_z_nest_block_buffer), intent(inout) :: buffer
integer(psb_ipk_), intent(in) :: n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
complex(psb_dpk_), intent(in) :: entry_vals(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: required_size
info = psb_success_
required_size = buffer%n_entries + n_entries
call ensure_capacity_lpk(buffer%entry_rows, required_size, info); if (info /= 0) return
call ensure_capacity_lpk(buffer%entry_cols, required_size, info); if (info /= 0) return
call ensure_capacity_val(buffer%entry_vals, required_size, info); if (info /= 0) return
buffer%entry_rows(buffer%n_entries+1:required_size) = entry_rows(1:n_entries)
buffer%entry_cols(buffer%n_entries+1:required_size) = entry_cols(1:n_entries)
buffer%entry_vals(buffer%n_entries+1:required_size) = entry_vals(1:n_entries)
buffer%n_entries = required_size
end subroutine block_buffer_append
subroutine ensure_capacity_lpk(array, required_size, info)
integer(psb_lpk_), allocatable, intent(inout) :: array(:)
integer(psb_ipk_), intent(in) :: required_size
integer(psb_ipk_), intent(out) :: info
integer(psb_lpk_), allocatable :: grown(:)
integer(psb_ipk_) :: capacity
info = 0
if (.not. allocated(array)) then
allocate(array(max(required_size,16)), stat=info); return
end if
capacity = size(array)
if (required_size <= capacity) return
allocate(grown(max(2*capacity, required_size)), stat=info); if (info /= 0) return
grown(1:capacity) = array(1:capacity)
call move_alloc(grown, array)
end subroutine ensure_capacity_lpk
subroutine ensure_capacity_val(array, required_size, info)
complex(psb_dpk_), allocatable, intent(inout) :: array(:)
integer(psb_ipk_), intent(in) :: required_size
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_), allocatable :: grown(:)
integer(psb_ipk_) :: capacity
info = 0
if (.not. allocated(array)) then
allocate(array(max(required_size,16)), stat=info); return
end if
capacity = size(array)
if (required_size <= capacity) return
allocate(grown(max(2*capacity, required_size)), stat=info); if (info /= 0) return
grown(1:capacity) = array(1:capacity)
call move_alloc(grown, array)
end subroutine ensure_capacity_val
subroutine block_buffer_free(buffer)
type(psb_z_nest_block_buffer), intent(inout) :: buffer
if (allocated(buffer%entry_rows)) deallocate(buffer%entry_rows)
if (allocated(buffer%entry_cols)) deallocate(buffer%entry_cols)
if (allocated(buffer%entry_vals)) deallocate(buffer%entry_vals)
buffer%n_entries = 0
end subroutine block_buffer_free
end module psb_z_nest_builder_mod

@ -0,0 +1,364 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! Module: psb_z_nest_tools_mod
! Author: Simone Staccone (Stack-1)
!
! Nested-specific assembly wrappers for PSBLAS3 double precision matrix and vector routines
!
module psb_z_nest_tools_mod
use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_dpk_, psb_success_, psb_err_alloc_dealloc_, &
psb_err_invalid_input_, psb_err_from_subroutine_, &
psb_dupl_add_, psb_dupl_ovwrt_, psb_dupl_err_, psb_ctxt_type
use psb_error_mod, only : psb_errpush
use psb_z_tools_mod, only : psb_spall, psb_spins, psb_spasb, psb_spfree, psb_sprn, &
psb_geall, psb_geins, psb_geasb, psb_gefree
use psb_desc_nest_mod, only : psb_desc_nest_type
use psb_z_nest_mat_mod, only : psb_z_nest_sparse_mat
use psb_z_mat_mod, only : psb_zspmat_type
use psb_z_base_mat_mod, only : psb_z_coo_sparse_mat, psb_z_base_sparse_mat
use psb_desc_mod, only : psb_desc_type
implicit none
private
public :: psb_spall_nest, psb_spins_nest, psb_spasb_nest, psb_spfree_nest, psb_sprn_nest, &
psb_z_nest_rect_block
contains
! Allocates all (nrblocks x ncblocks) sparse matrix blocks
! and marks all as present. psb_spins_nest lazy-allocates individual
! blocks on first insertion; call psb_spall_nest instead when the
! full block structure is known up front.
subroutine psb_spall_nest(a_nest, desc_nest, info, nnz)
type(psb_z_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nnz
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spall_nest'
a_nest%nrblocks = desc_nest%nrblocks
a_nest%ncblocks = desc_nest%ncblocks
if (.not. allocated(a_nest%mats)) then
allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
end if
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
local_info = psb_success_
if (present(nnz)) then
call psb_spall(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info, nnz=nnz)
else
call psb_spall(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
end if
if (local_info /= psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall')
return
end if
end do
end do
end subroutine psb_spall_nest
! Inserts nz entries into block (blk_i, blk_j) of the nested matrix.
! The block is lazy-allocated on first insertion if psb_spall_nest
! was not called first.
subroutine psb_spins_nest(block_row, block_col, n_entries, entry_rows, entry_cols, entry_vals, a_nest, desc_nest, info)
integer(psb_ipk_), intent(in) :: block_row, block_col, n_entries
integer(psb_lpk_), intent(in) :: entry_rows(:), entry_cols(:)
complex(psb_dpk_), intent(in) :: entry_vals(:)
type(psb_z_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnz_estimate
character(len=20) :: name
info = psb_success_
name = 'psb_spins_nest'
if (n_entries == 0) return
if (block_row < 1 .or. block_row > a_nest%nrblocks .or. &
block_col < 1 .or. block_col > a_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block indices')
return
end if
if (.not. allocated(a_nest%mats)) then
allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
end if
if (.not. allocated(a_nest%mats(block_row, block_col)%a)) then
! Estimate nnz: use n_entries + 50% buffer for future insertions
nnz_estimate = max(n_entries, 10) + n_entries / 2
call psb_spall(a_nest%mats(block_row, block_col), &
desc_nest%descs(block_row, block_col), info, nnz=nnz_estimate)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall')
return
end if
end if
call psb_spins(n_entries, entry_rows, entry_cols, entry_vals, a_nest%mats(block_row, block_col), &
desc_nest%descs(block_row, block_col), info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spins')
end subroutine psb_spins_nest
! Calls psb_spasb on all present block matrices.
! Must be called after psb_cdasb_nest.
subroutine psb_spasb_nest(a_nest, desc_nest, info, dupl)
type(psb_z_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: dupl
integer(psb_ipk_) :: i_block_row, j_block_col, dupl_mode, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spasb_nest'
dupl_mode = psb_dupl_add_
if (present(dupl)) dupl_mode = dupl
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
if (dupl_mode == psb_dupl_add_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_add_)
else if (dupl_mode == psb_dupl_ovwrt_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_ovwrt_)
else if (dupl_mode == psb_dupl_err_) then
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info, dupl=psb_dupl_err_)
else
call psb_spasb(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), &
local_info)
end if
if (local_info /= psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spasb')
return
end if
end if
end do
end do
end subroutine psb_spasb_nest
! Calls psb_spfree on every present block, then deallocates the
! mats array and resets nrblocks/ncblocks to 0.
subroutine psb_spfree_nest(a_nest, desc_nest, info)
type(psb_z_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_spfree_nest'
if (allocated(a_nest%mats)) then
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
call psb_spfree(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
if (local_info /= psb_success_ .and. info == psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spfree')
end if
end if
end do
end do
deallocate(a_nest%mats, stat=local_info)
if (local_info /= 0 .and. info == psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
end if
end if
a_nest%nrblocks = 0
a_nest%ncblocks = 0
end subroutine psb_spfree_nest
! Calls psb_sprn on every present block matrix, resetting it to
! the build state while preserving the sparsity pattern.
subroutine psb_sprn_nest(a_nest, desc_nest, info, clear)
type(psb_z_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional :: clear
integer(psb_ipk_) :: i_block_row, j_block_col, local_info
character(len=20) :: name
info = psb_success_
name = 'psb_sprn_nest'
if (.not. allocated(a_nest%mats)) return
do i_block_row = 1, a_nest%nrblocks
do j_block_col = 1, a_nest%ncblocks
if (allocated(a_nest%mats(i_block_row, j_block_col)%a)) then
local_info = psb_success_
if (present(clear)) then
call psb_sprn(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info, clear=clear)
else
call psb_sprn(a_nest%mats(i_block_row, j_block_col), desc_nest%descs(i_block_row, j_block_col), local_info)
end if
if (local_info /= psb_success_ .and. info == psb_success_) then
info = local_info
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_sprn')
end if
end if
end do
end do
end subroutine psb_sprn_nest
! psb_z_nest_rect_block
!
! Build a local GENERAL (possibly rectangular) block A(i,j) of a nested
! operator, with rows in field i and columns in field j (field i /= field j,
! |field i| /= |field j| allowed). Rows are localized against the field-i
! (row) descriptor, columns against the field-j (column) descriptor which
! must already carry the union halo of column j (cdall + cdins(all column-j
! blocks' columns) + cdasb). The result is a CSR block of shape
! (field-i owned rows) x (field-j local cols incl. halo)
! consumable directly by the nested csmv (psb_z_nest_base_mat).
!
! A single-descriptor psb_spall/psb_spasb cannot express row-field /= col-field
! (it would force rows and columns into the same index space), hence the
! explicit COO build with separate row/column localization.
!
! Arguments (this process's local contribution):
! blk (out) the assembled block (CSR)
! nz number of local entries
! ia_glob(:) GLOBAL field-i row indices (owned by this process)
! ja_glob(:) GLOBAL field-j column indices
! val(:) values
! desc_row field-i descriptor (rows)
! desc_col field-j descriptor (columns, with union halo)
!
subroutine psb_z_nest_rect_block(blk, nz, ia_glob, ja_glob, val, desc_row, desc_col, info, type, mold)
type(psb_zspmat_type), intent(out) :: blk
integer(psb_ipk_), intent(in) :: nz
integer(psb_lpk_), intent(in) :: ia_glob(:), ja_glob(:)
complex(psb_dpk_), intent(in) :: val(:)
type(psb_desc_type), intent(in) :: desc_row, desc_col
integer(psb_ipk_), intent(out) :: info
character(len=*), intent(in), optional :: type ! base storage format (default 'CSR')
class(psb_z_base_sparse_mat), intent(in), optional :: mold ! any format, e.g. psb_ext ELL/HLL
type(psb_z_coo_sparse_mat) :: coo_block
integer(psb_ipk_) :: k_entry, n_loc_rows, n_loc_cols, loc_row, loc_col
character(len=24) :: name
info = psb_success_
name = 'psb_z_nest_rect_block'
n_loc_rows = desc_row%get_local_rows() ! owned rows of field i
n_loc_cols = desc_col%get_local_cols() ! field-j local cols (owned + halo)
call coo_block%allocate(n_loc_rows, n_loc_cols, nz)
do k_entry = 1, nz
call desc_row%g2l(ia_glob(k_entry), loc_row, info)
if (info /= 0 .or. loc_row < 1 .or. loc_row > n_loc_rows) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='row not owned / not localizable')
return
end if
call desc_col%g2l(ja_glob(k_entry), loc_col, info)
if (info /= 0 .or. loc_col < 1 .or. loc_col > n_loc_cols) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='column not in field-j descriptor (missing from union halo)')
return
end if
coo_block%ia(k_entry) = loc_row
coo_block%ja(k_entry) = loc_col
coo_block%val(k_entry) = val(k_entry)
end do
call coo_block%set_nzeros(nz)
call coo_block%set_dupl(psb_dupl_add_)
call coo_block%fix(info)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='coo fix'); return
end if
call blk%mv_from(coo_block)
if (present(mold)) then
call blk%cscnv(info, mold=mold)
else if (present(type)) then
call blk%cscnv(info, type=type)
else
call blk%cscnv(info, type='CSR')
end if
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='cscnv'); return
end if
end subroutine psb_z_nest_rect_block
end module psb_z_nest_tools_mod

@ -1,586 +0,0 @@
Welcome to PSBLAS version: 3.9.1
This is the psb_geaxpby_test sample program
Number of processes used in this computation: 1
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y1_a1_b1.mtx 1/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y1_a1_b2.mtx 2/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y1_a1_b3.mtx 3/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y1_a2_b1.mtx 4/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y1_a2_b2.mtx 5/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y1_a2_b3.mtx 6/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y1_a3_b1.mtx 7/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y1_a3_b2.mtx 8/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y1_a3_b3.mtx 9/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y2_a1_b1.mtx 10/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y2_a1_b2.mtx 11/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y2_a1_b3.mtx 12/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y2_a2_b1.mtx 13/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y2_a2_b2.mtx 14/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y2_a2_b3.mtx 15/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y2_a3_b1.mtx 16/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y2_a3_b2.mtx 17/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y2_a3_b3.mtx 18/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y3_a1_b1.mtx 19/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y3_a1_b2.mtx 20/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y3_a1_b3.mtx 21/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y3_a2_b1.mtx 22/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y3_a2_b2.mtx 23/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y3_a2_b3.mtx 24/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y3_a3_b1.mtx 25/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y3_a3_b2.mtx 26/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y3_a3_b3.mtx 27/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y4_a1_b1.mtx 28/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y4_a1_b2.mtx 29/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y4_a1_b3.mtx 30/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y4_a2_b1.mtx 31/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y4_a2_b2.mtx 32/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y4_a2_b3.mtx 33/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y4_a3_b1.mtx 34/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y4_a3_b2.mtx 35/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x1_y4_a3_b3.mtx 36/144 [OK]
[2026-06-04 21:19:20] Generation geaxpby single precision result file serial/sol_x2_y1_a1_b1.mtx 37/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y1_a1_b2.mtx 38/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y1_a1_b3.mtx 39/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y1_a2_b1.mtx 40/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y1_a2_b2.mtx 41/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y1_a2_b3.mtx 42/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y1_a3_b1.mtx 43/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y1_a3_b2.mtx 44/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y1_a3_b3.mtx 45/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y2_a1_b1.mtx 46/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y2_a1_b2.mtx 47/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y2_a1_b3.mtx 48/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y2_a2_b1.mtx 49/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y2_a2_b2.mtx 50/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y2_a2_b3.mtx 51/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y2_a3_b1.mtx 52/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y2_a3_b2.mtx 53/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y2_a3_b3.mtx 54/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y3_a1_b1.mtx 55/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y3_a1_b2.mtx 56/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y3_a1_b3.mtx 57/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y3_a2_b1.mtx 58/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y3_a2_b2.mtx 59/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y3_a2_b3.mtx 60/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y3_a3_b1.mtx 61/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y3_a3_b2.mtx 62/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y3_a3_b3.mtx 63/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y4_a1_b1.mtx 64/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y4_a1_b2.mtx 65/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y4_a1_b3.mtx 66/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y4_a2_b1.mtx 67/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y4_a2_b2.mtx 68/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y4_a2_b3.mtx 69/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y4_a3_b1.mtx 70/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y4_a3_b2.mtx 71/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x2_y4_a3_b3.mtx 72/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x3_y1_a1_b1.mtx 73/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x3_y1_a1_b2.mtx 74/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x3_y1_a1_b3.mtx 75/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x3_y1_a2_b1.mtx 76/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x3_y1_a2_b2.mtx 77/144 [OK]
[2026-06-04 21:19:21] Generation geaxpby single precision result file serial/sol_x3_y1_a2_b3.mtx 78/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y1_a3_b1.mtx 79/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y1_a3_b2.mtx 80/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y1_a3_b3.mtx 81/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y2_a1_b1.mtx 82/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y2_a1_b2.mtx 83/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y2_a1_b3.mtx 84/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y2_a2_b1.mtx 85/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y2_a2_b2.mtx 86/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y2_a2_b3.mtx 87/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y2_a3_b1.mtx 88/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y2_a3_b2.mtx 89/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y2_a3_b3.mtx 90/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y3_a1_b1.mtx 91/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y3_a1_b2.mtx 92/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y3_a1_b3.mtx 93/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y3_a2_b1.mtx 94/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y3_a2_b2.mtx 95/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y3_a2_b3.mtx 96/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y3_a3_b1.mtx 97/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y3_a3_b2.mtx 98/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y3_a3_b3.mtx 99/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y4_a1_b1.mtx 100/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y4_a1_b2.mtx 101/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y4_a1_b3.mtx 102/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y4_a2_b1.mtx 103/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y4_a2_b2.mtx 104/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y4_a2_b3.mtx 105/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y4_a3_b1.mtx 106/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y4_a3_b2.mtx 107/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x3_y4_a3_b3.mtx 108/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y1_a1_b1.mtx 109/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y1_a1_b2.mtx 110/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y1_a1_b3.mtx 111/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y1_a2_b1.mtx 112/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y1_a2_b2.mtx 113/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y1_a2_b3.mtx 114/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y1_a3_b1.mtx 115/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y1_a3_b2.mtx 116/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y1_a3_b3.mtx 117/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y2_a1_b1.mtx 118/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y2_a1_b2.mtx 119/144 [OK]
[2026-06-04 21:19:22] Generation geaxpby single precision result file serial/sol_x4_y2_a1_b3.mtx 120/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y2_a2_b1.mtx 121/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y2_a2_b2.mtx 122/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y2_a2_b3.mtx 123/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y2_a3_b1.mtx 124/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y2_a3_b2.mtx 125/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y2_a3_b3.mtx 126/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y3_a1_b1.mtx 127/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y3_a1_b2.mtx 128/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y3_a1_b3.mtx 129/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y3_a2_b1.mtx 130/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y3_a2_b2.mtx 131/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y3_a2_b3.mtx 132/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y3_a3_b1.mtx 133/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y3_a3_b2.mtx 134/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y3_a3_b3.mtx 135/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y4_a1_b1.mtx 136/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y4_a1_b2.mtx 137/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y4_a1_b3.mtx 138/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y4_a2_b1.mtx 139/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y4_a2_b2.mtx 140/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y4_a2_b3.mtx 141/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y4_a3_b1.mtx 142/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y4_a3_b2.mtx 143/144 [OK]
[2026-06-04 21:19:23] Generation geaxpby single precision result file serial/sol_x4_y4_a3_b3.mtx 144/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y1_a1_b1.mtx 1/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y1_a1_b2.mtx 2/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y1_a1_b3.mtx 3/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y1_a2_b1.mtx 4/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y1_a2_b2.mtx 5/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y1_a2_b3.mtx 6/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y1_a3_b1.mtx 7/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y1_a3_b2.mtx 8/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y1_a3_b3.mtx 9/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y2_a1_b1.mtx 10/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y2_a1_b2.mtx 11/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y2_a1_b3.mtx 12/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y2_a2_b1.mtx 13/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y2_a2_b2.mtx 14/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y2_a2_b3.mtx 15/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y2_a3_b1.mtx 16/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y2_a3_b2.mtx 17/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y2_a3_b3.mtx 18/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y3_a1_b1.mtx 19/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y3_a1_b2.mtx 20/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y3_a1_b3.mtx 21/144 [OK]
[2026-06-04 21:19:23] Double precision check on file serial/sol_x1_y3_a2_b1.mtx 22/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y3_a2_b2.mtx 23/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y3_a2_b3.mtx 24/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y3_a3_b1.mtx 25/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y3_a3_b2.mtx 26/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y3_a3_b3.mtx 27/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y4_a1_b1.mtx 28/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y4_a1_b2.mtx 29/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y4_a1_b3.mtx 30/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y4_a2_b1.mtx 31/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y4_a2_b2.mtx 32/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y4_a2_b3.mtx 33/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y4_a3_b1.mtx 34/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y4_a3_b2.mtx 35/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x1_y4_a3_b3.mtx 36/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y1_a1_b1.mtx 37/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y1_a1_b2.mtx 38/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y1_a1_b3.mtx 39/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y1_a2_b1.mtx 40/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y1_a2_b2.mtx 41/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y1_a2_b3.mtx 42/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y1_a3_b1.mtx 43/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y1_a3_b2.mtx 44/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y1_a3_b3.mtx 45/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y2_a1_b1.mtx 46/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y2_a1_b2.mtx 47/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y2_a1_b3.mtx 48/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y2_a2_b1.mtx 49/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y2_a2_b2.mtx 50/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y2_a2_b3.mtx 51/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y2_a3_b1.mtx 52/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y2_a3_b2.mtx 53/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y2_a3_b3.mtx 54/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y3_a1_b1.mtx 55/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y3_a1_b2.mtx 56/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y3_a1_b3.mtx 57/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y3_a2_b1.mtx 58/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y3_a2_b2.mtx 59/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y3_a2_b3.mtx 60/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y3_a3_b1.mtx 61/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y3_a3_b2.mtx 62/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y3_a3_b3.mtx 63/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y4_a1_b1.mtx 64/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y4_a1_b2.mtx 65/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y4_a1_b3.mtx 66/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y4_a2_b1.mtx 67/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y4_a2_b2.mtx 68/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y4_a2_b3.mtx 69/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y4_a3_b1.mtx 70/144 [OK]
[2026-06-04 21:19:24] Double precision check on file serial/sol_x2_y4_a3_b2.mtx 71/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x2_y4_a3_b3.mtx 72/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y1_a1_b1.mtx 73/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y1_a1_b2.mtx 74/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y1_a1_b3.mtx 75/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y1_a2_b1.mtx 76/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y1_a2_b2.mtx 77/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y1_a2_b3.mtx 78/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y1_a3_b1.mtx 79/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y1_a3_b2.mtx 80/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y1_a3_b3.mtx 81/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y2_a1_b1.mtx 82/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y2_a1_b2.mtx 83/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y2_a1_b3.mtx 84/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y2_a2_b1.mtx 85/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y2_a2_b2.mtx 86/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y2_a2_b3.mtx 87/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y2_a3_b1.mtx 88/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y2_a3_b2.mtx 89/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y2_a3_b3.mtx 90/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y3_a1_b1.mtx 91/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y3_a1_b2.mtx 92/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y3_a1_b3.mtx 93/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y3_a2_b1.mtx 94/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y3_a2_b2.mtx 95/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y3_a2_b3.mtx 96/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y3_a3_b1.mtx 97/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y3_a3_b2.mtx 98/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y3_a3_b3.mtx 99/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y4_a1_b1.mtx 100/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y4_a1_b2.mtx 101/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y4_a1_b3.mtx 102/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y4_a2_b1.mtx 103/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y4_a2_b2.mtx 104/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y4_a2_b3.mtx 105/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y4_a3_b1.mtx 106/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y4_a3_b2.mtx 107/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x3_y4_a3_b3.mtx 108/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y1_a1_b1.mtx 109/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y1_a1_b2.mtx 110/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y1_a1_b3.mtx 111/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y1_a2_b1.mtx 112/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y1_a2_b2.mtx 113/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y1_a2_b3.mtx 114/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y1_a3_b1.mtx 115/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y1_a3_b2.mtx 116/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y1_a3_b3.mtx 117/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y2_a1_b1.mtx 118/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y2_a1_b2.mtx 119/144 [OK]
[2026-06-04 21:19:25] Double precision check on file serial/sol_x4_y2_a1_b3.mtx 120/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y2_a2_b1.mtx 121/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y2_a2_b2.mtx 122/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y2_a2_b3.mtx 123/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y2_a3_b1.mtx 124/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y2_a3_b2.mtx 125/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y2_a3_b3.mtx 126/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y3_a1_b1.mtx 127/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y3_a1_b2.mtx 128/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y3_a1_b3.mtx 129/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y3_a2_b1.mtx 130/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y3_a2_b2.mtx 131/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y3_a2_b3.mtx 132/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y3_a3_b1.mtx 133/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y3_a3_b2.mtx 134/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y3_a3_b3.mtx 135/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y4_a1_b1.mtx 136/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y4_a1_b2.mtx 137/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y4_a1_b3.mtx 138/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y4_a2_b1.mtx 139/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y4_a2_b2.mtx 140/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y4_a2_b3.mtx 141/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y4_a3_b1.mtx 142/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y4_a3_b2.mtx 143/144 [OK]
[2026-06-04 21:19:26] Double precision check on file serial/sol_x4_y4_a3_b3.mtx 144/144 [OK]
Welcome to PSBLAS version: 3.9.1
This is the psb_geaxpby_test sample program
Number of processes used in this computation: 4
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y1_a1_b1.mtx 1/144 [OK]
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y1_a1_b2.mtx 2/144 [OK]
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y1_a1_b3.mtx 3/144 [OK]
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y1_a2_b1.mtx 4/144 [OK]
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y1_a2_b2.mtx 5/144 [OK]
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y1_a2_b3.mtx 6/144 [OK]
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y1_a3_b1.mtx 7/144 [OK]
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y1_a3_b2.mtx 8/144 [OK]
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y1_a3_b3.mtx 9/144 [OK]
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y2_a1_b1.mtx 10/144 [OK]
[2026-06-04 21:19:26] Generation geaxpby single precision result file parallel/sol_x1_y2_a1_b2.mtx 11/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y2_a1_b3.mtx 12/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y2_a2_b1.mtx 13/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y2_a2_b2.mtx 14/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y2_a2_b3.mtx 15/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y2_a3_b1.mtx 16/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y2_a3_b2.mtx 17/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y2_a3_b3.mtx 18/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y3_a1_b1.mtx 19/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y3_a1_b2.mtx 20/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y3_a1_b3.mtx 21/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y3_a2_b1.mtx 22/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y3_a2_b2.mtx 23/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y3_a2_b3.mtx 24/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y3_a3_b1.mtx 25/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y3_a3_b2.mtx 26/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y3_a3_b3.mtx 27/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y4_a1_b1.mtx 28/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y4_a1_b2.mtx 29/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y4_a1_b3.mtx 30/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y4_a2_b1.mtx 31/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y4_a2_b2.mtx 32/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y4_a2_b3.mtx 33/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y4_a3_b1.mtx 34/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y4_a3_b2.mtx 35/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x1_y4_a3_b3.mtx 36/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y1_a1_b1.mtx 37/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y1_a1_b2.mtx 38/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y1_a1_b3.mtx 39/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y1_a2_b1.mtx 40/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y1_a2_b2.mtx 41/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y1_a2_b3.mtx 42/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y1_a3_b1.mtx 43/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y1_a3_b2.mtx 44/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y1_a3_b3.mtx 45/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y2_a1_b1.mtx 46/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y2_a1_b2.mtx 47/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y2_a1_b3.mtx 48/144 [OK]
[2026-06-04 21:19:27] Generation geaxpby single precision result file parallel/sol_x2_y2_a2_b1.mtx 49/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y2_a2_b2.mtx 50/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y2_a2_b3.mtx 51/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y2_a3_b1.mtx 52/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y2_a3_b2.mtx 53/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y2_a3_b3.mtx 54/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y3_a1_b1.mtx 55/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y3_a1_b2.mtx 56/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y3_a1_b3.mtx 57/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y3_a2_b1.mtx 58/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y3_a2_b2.mtx 59/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y3_a2_b3.mtx 60/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y3_a3_b1.mtx 61/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y3_a3_b2.mtx 62/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y3_a3_b3.mtx 63/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y4_a1_b1.mtx 64/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y4_a1_b2.mtx 65/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y4_a1_b3.mtx 66/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y4_a2_b1.mtx 67/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y4_a2_b2.mtx 68/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y4_a2_b3.mtx 69/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y4_a3_b1.mtx 70/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y4_a3_b2.mtx 71/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x2_y4_a3_b3.mtx 72/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y1_a1_b1.mtx 73/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y1_a1_b2.mtx 74/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y1_a1_b3.mtx 75/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y1_a2_b1.mtx 76/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y1_a2_b2.mtx 77/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y1_a2_b3.mtx 78/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y1_a3_b1.mtx 79/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y1_a3_b2.mtx 80/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y1_a3_b3.mtx 81/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y2_a1_b1.mtx 82/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y2_a1_b2.mtx 83/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y2_a1_b3.mtx 84/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y2_a2_b1.mtx 85/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y2_a2_b2.mtx 86/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y2_a2_b3.mtx 87/144 [OK]
[2026-06-04 21:19:28] Generation geaxpby single precision result file parallel/sol_x3_y2_a3_b1.mtx 88/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y2_a3_b2.mtx 89/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y2_a3_b3.mtx 90/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y3_a1_b1.mtx 91/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y3_a1_b2.mtx 92/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y3_a1_b3.mtx 93/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y3_a2_b1.mtx 94/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y3_a2_b2.mtx 95/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y3_a2_b3.mtx 96/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y3_a3_b1.mtx 97/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y3_a3_b2.mtx 98/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y3_a3_b3.mtx 99/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y4_a1_b1.mtx 100/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y4_a1_b2.mtx 101/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y4_a1_b3.mtx 102/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y4_a2_b1.mtx 103/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y4_a2_b2.mtx 104/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y4_a2_b3.mtx 105/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y4_a3_b1.mtx 106/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y4_a3_b2.mtx 107/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x3_y4_a3_b3.mtx 108/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y1_a1_b1.mtx 109/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y1_a1_b2.mtx 110/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y1_a1_b3.mtx 111/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y1_a2_b1.mtx 112/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y1_a2_b2.mtx 113/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y1_a2_b3.mtx 114/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y1_a3_b1.mtx 115/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y1_a3_b2.mtx 116/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y1_a3_b3.mtx 117/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y2_a1_b1.mtx 118/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y2_a1_b2.mtx 119/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y2_a1_b3.mtx 120/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y2_a2_b1.mtx 121/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y2_a2_b2.mtx 122/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y2_a2_b3.mtx 123/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y2_a3_b1.mtx 124/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y2_a3_b2.mtx 125/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y2_a3_b3.mtx 126/144 [OK]
[2026-06-04 21:19:29] Generation geaxpby single precision result file parallel/sol_x4_y3_a1_b1.mtx 127/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y3_a1_b2.mtx 128/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y3_a1_b3.mtx 129/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y3_a2_b1.mtx 130/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y3_a2_b2.mtx 131/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y3_a2_b3.mtx 132/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y3_a3_b1.mtx 133/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y3_a3_b2.mtx 134/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y3_a3_b3.mtx 135/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y4_a1_b1.mtx 136/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y4_a1_b2.mtx 137/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y4_a1_b3.mtx 138/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y4_a2_b1.mtx 139/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y4_a2_b2.mtx 140/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y4_a2_b3.mtx 141/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y4_a3_b1.mtx 142/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y4_a3_b2.mtx 143/144 [OK]
[2026-06-04 21:19:30] Generation geaxpby single precision result file parallel/sol_x4_y4_a3_b3.mtx 144/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y1_a1_b1.mtx 1/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y1_a1_b2.mtx 2/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y1_a1_b3.mtx 3/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y1_a2_b1.mtx 4/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y1_a2_b2.mtx 5/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y1_a2_b3.mtx 6/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y1_a3_b1.mtx 7/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y1_a3_b2.mtx 8/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y1_a3_b3.mtx 9/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y2_a1_b1.mtx 10/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y2_a1_b2.mtx 11/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y2_a1_b3.mtx 12/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y2_a2_b1.mtx 13/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y2_a2_b2.mtx 14/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y2_a2_b3.mtx 15/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y2_a3_b1.mtx 16/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y2_a3_b2.mtx 17/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y2_a3_b3.mtx 18/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y3_a1_b1.mtx 19/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y3_a1_b2.mtx 20/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y3_a1_b3.mtx 21/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y3_a2_b1.mtx 22/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y3_a2_b2.mtx 23/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y3_a2_b3.mtx 24/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y3_a3_b1.mtx 25/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y3_a3_b2.mtx 26/144 [OK]
[2026-06-04 21:19:30] Double precision check on file parallel/sol_x1_y3_a3_b3.mtx 27/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x1_y4_a1_b1.mtx 28/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x1_y4_a1_b2.mtx 29/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x1_y4_a1_b3.mtx 30/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x1_y4_a2_b1.mtx 31/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x1_y4_a2_b2.mtx 32/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x1_y4_a2_b3.mtx 33/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x1_y4_a3_b1.mtx 34/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x1_y4_a3_b2.mtx 35/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x1_y4_a3_b3.mtx 36/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y1_a1_b1.mtx 37/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y1_a1_b2.mtx 38/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y1_a1_b3.mtx 39/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y1_a2_b1.mtx 40/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y1_a2_b2.mtx 41/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y1_a2_b3.mtx 42/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y1_a3_b1.mtx 43/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y1_a3_b2.mtx 44/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y1_a3_b3.mtx 45/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y2_a1_b1.mtx 46/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y2_a1_b2.mtx 47/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y2_a1_b3.mtx 48/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y2_a2_b1.mtx 49/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y2_a2_b2.mtx 50/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y2_a2_b3.mtx 51/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y2_a3_b1.mtx 52/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y2_a3_b2.mtx 53/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y2_a3_b3.mtx 54/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y3_a1_b1.mtx 55/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y3_a1_b2.mtx 56/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y3_a1_b3.mtx 57/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y3_a2_b1.mtx 58/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y3_a2_b2.mtx 59/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y3_a2_b3.mtx 60/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y3_a3_b1.mtx 61/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y3_a3_b2.mtx 62/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y3_a3_b3.mtx 63/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y4_a1_b1.mtx 64/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y4_a1_b2.mtx 65/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y4_a1_b3.mtx 66/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y4_a2_b1.mtx 67/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y4_a2_b2.mtx 68/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y4_a2_b3.mtx 69/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y4_a3_b1.mtx 70/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y4_a3_b2.mtx 71/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x2_y4_a3_b3.mtx 72/144 [OK]
[2026-06-04 21:19:31] Double precision check on file parallel/sol_x3_y1_a1_b1.mtx 73/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y1_a1_b2.mtx 74/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y1_a1_b3.mtx 75/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y1_a2_b1.mtx 76/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y1_a2_b2.mtx 77/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y1_a2_b3.mtx 78/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y1_a3_b1.mtx 79/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y1_a3_b2.mtx 80/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y1_a3_b3.mtx 81/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y2_a1_b1.mtx 82/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y2_a1_b2.mtx 83/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y2_a1_b3.mtx 84/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y2_a2_b1.mtx 85/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y2_a2_b2.mtx 86/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y2_a2_b3.mtx 87/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y2_a3_b1.mtx 88/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y2_a3_b2.mtx 89/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y2_a3_b3.mtx 90/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y3_a1_b1.mtx 91/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y3_a1_b2.mtx 92/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y3_a1_b3.mtx 93/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y3_a2_b1.mtx 94/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y3_a2_b2.mtx 95/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y3_a2_b3.mtx 96/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y3_a3_b1.mtx 97/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y3_a3_b2.mtx 98/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y3_a3_b3.mtx 99/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y4_a1_b1.mtx 100/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y4_a1_b2.mtx 101/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y4_a1_b3.mtx 102/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y4_a2_b1.mtx 103/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y4_a2_b2.mtx 104/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y4_a2_b3.mtx 105/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y4_a3_b1.mtx 106/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y4_a3_b2.mtx 107/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x3_y4_a3_b3.mtx 108/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y1_a1_b1.mtx 109/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y1_a1_b2.mtx 110/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y1_a1_b3.mtx 111/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y1_a2_b1.mtx 112/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y1_a2_b2.mtx 113/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y1_a2_b3.mtx 114/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y1_a3_b1.mtx 115/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y1_a3_b2.mtx 116/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y1_a3_b3.mtx 117/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y2_a1_b1.mtx 118/144 [OK]
[2026-06-04 21:19:32] Double precision check on file parallel/sol_x4_y2_a1_b2.mtx 119/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y2_a1_b3.mtx 120/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y2_a2_b1.mtx 121/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y2_a2_b2.mtx 122/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y2_a2_b3.mtx 123/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y2_a3_b1.mtx 124/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y2_a3_b2.mtx 125/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y2_a3_b3.mtx 126/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y3_a1_b1.mtx 127/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y3_a1_b2.mtx 128/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y3_a1_b3.mtx 129/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y3_a2_b1.mtx 130/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y3_a2_b2.mtx 131/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y3_a2_b3.mtx 132/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y3_a3_b1.mtx 133/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y3_a3_b2.mtx 134/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y3_a3_b3.mtx 135/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y4_a1_b1.mtx 136/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y4_a1_b2.mtx 137/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y4_a1_b3.mtx 138/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y4_a2_b1.mtx 139/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y4_a2_b2.mtx 140/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y4_a2_b3.mtx 141/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y4_a3_b1.mtx 142/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y4_a3_b2.mtx 143/144 [OK]
[2026-06-04 21:19:33] Double precision check on file parallel/sol_x4_y4_a3_b3.mtx 144/144 [OK]

@ -1,402 +0,0 @@
[RUN] psb_gedot
[DATE] 2026-05-28 10:23:37
[RUN] psb_gedot
[DATE] 2026-05-28 10:26:58
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 4 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 4 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 4 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 4 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 4 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 4 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 4 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 4 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 4 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-05-28 10:50:32
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 4 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 4 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 4 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 4 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 4 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 4 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 4 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 4 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 4 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-05-28 15:14:11
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 4 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 4 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 4 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 4 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 4 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 4 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 4 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 4 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 4 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-05-28 16:06:34
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 4 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 4 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 4 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 4 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 4 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 4 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 4 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 4 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 4 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-05-28 17:38:00
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: differences
[RUN] psb_gedot
[DATE] 2026-05-28 17:41:50
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-05-29 08:14:58
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-05-30 12:38:04
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-05-30 12:40:04
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-05-30 13:02:07
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-03 13:16:42
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-03 13:22:24
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-03 14:22:44
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-03 15:22:09
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-03 15:29:48
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-03 15:55:09
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-03 15:57:37
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 1 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 1 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 1 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 1 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 1 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 1 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 1 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-03 16:10:36
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 0 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 0 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 0 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 0 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 0 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 0 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 0 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-04 10:45:28
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 0 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 0 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 0 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 0 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 0 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 0 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 0 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-04 21:00:08
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 0 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 0 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 0 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 0 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 0 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 0 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 0 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences
[RUN] psb_gedot
[DATE] 2026-06-04 21:19:38
[DIFF] serial/sol_x1_y1.mtx vs parallel/sol_x1_y1.mtx: 0 differences
[DIFF] serial/sol_x1_y2.mtx vs parallel/sol_x1_y2.mtx: 0 differences
[DIFF] serial/sol_x1_y3.mtx vs parallel/sol_x1_y3.mtx: 0 differences
[DIFF] serial/sol_x1_y4.mtx vs parallel/sol_x1_y4.mtx: 0 differences
[DIFF] serial/sol_x2_y1.mtx vs parallel/sol_x2_y1.mtx: 0 differences
[DIFF] serial/sol_x2_y2.mtx vs parallel/sol_x2_y2.mtx: 0 differences
[DIFF] serial/sol_x2_y3.mtx vs parallel/sol_x2_y3.mtx: 0 differences
[DIFF] serial/sol_x2_y4.mtx vs parallel/sol_x2_y4.mtx: 0 differences
[DIFF] serial/sol_x3_y1.mtx vs parallel/sol_x3_y1.mtx: 0 differences
[DIFF] serial/sol_x3_y2.mtx vs parallel/sol_x3_y2.mtx: 0 differences
[DIFF] serial/sol_x3_y3.mtx vs parallel/sol_x3_y3.mtx: 0 differences
[DIFF] serial/sol_x3_y4.mtx vs parallel/sol_x3_y4.mtx: 0 differences
[DIFF] serial/sol_x4_y1.mtx vs parallel/sol_x4_y1.mtx: 0 differences
[DIFF] serial/sol_x4_y2.mtx vs parallel/sol_x4_y2.mtx: 0 differences
[DIFF] serial/sol_x4_y3.mtx vs parallel/sol_x4_y3.mtx: 0 differences
[DIFF] serial/sol_x4_y4.mtx vs parallel/sol_x4_y4.mtx: 0 differences

@ -1,74 +0,0 @@
Welcome to PSBLAS version: 3.9.1
This is the psb_gedot_test sample program
Number of processes used in this computation: 1
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x1_y1.mtx 1/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x1_y2.mtx 2/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x1_y3.mtx 3/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x1_y4.mtx 4/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x2_y1.mtx 5/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x2_y2.mtx 6/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x2_y3.mtx 7/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x2_y4.mtx 8/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x3_y1.mtx 9/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x3_y2.mtx 10/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x3_y3.mtx 11/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x3_y4.mtx 12/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x4_y1.mtx 13/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x4_y2.mtx 14/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x4_y3.mtx 15/16 [OK]
[2026-06-04 21:19:38] Generation gedot single precision result file serial/sol_x4_y4.mtx 16/16 [OK]
[2026-06-04 21:19:38] Double precision check on file serial/sol_x1_y1.mtx 1/16 [OK]
[2026-06-04 21:19:38] Double precision check on file serial/sol_x1_y2.mtx 2/16 [OK]
[2026-06-04 21:19:38] Double precision check on file serial/sol_x1_y3.mtx 3/16 [OK]
[2026-06-04 21:19:38] Double precision check on file serial/sol_x1_y4.mtx 4/16 [OK]
[2026-06-04 21:19:38] Double precision check on file serial/sol_x2_y1.mtx 5/16 [OK]
[2026-06-04 21:19:38] Double precision check on file serial/sol_x2_y2.mtx 6/16 [OK]
[2026-06-04 21:19:39] Double precision check on file serial/sol_x2_y3.mtx 7/16 [OK]
[2026-06-04 21:19:39] Double precision check on file serial/sol_x2_y4.mtx 8/16 [OK]
[2026-06-04 21:19:39] Double precision check on file serial/sol_x3_y1.mtx 9/16 [OK]
[2026-06-04 21:19:39] Double precision check on file serial/sol_x3_y2.mtx 10/16 [OK]
[2026-06-04 21:19:39] Double precision check on file serial/sol_x3_y3.mtx 11/16 [OK]
[2026-06-04 21:19:39] Double precision check on file serial/sol_x3_y4.mtx 12/16 [OK]
[2026-06-04 21:19:39] Double precision check on file serial/sol_x4_y1.mtx 13/16 [OK]
[2026-06-04 21:19:39] Double precision check on file serial/sol_x4_y2.mtx 14/16 [OK]
[2026-06-04 21:19:39] Double precision check on file serial/sol_x4_y3.mtx 15/16 [OK]
[2026-06-04 21:19:39] Double precision check on file serial/sol_x4_y4.mtx 16/16 [OK]
Welcome to PSBLAS version: 3.9.1
This is the psb_gedot_test sample program
Number of processes used in this computation: 4
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x1_y1.mtx 1/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x1_y2.mtx 2/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x1_y3.mtx 3/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x1_y4.mtx 4/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x2_y1.mtx 5/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x2_y2.mtx 6/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x2_y3.mtx 7/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x2_y4.mtx 8/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x3_y1.mtx 9/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x3_y2.mtx 10/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x3_y3.mtx 11/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x3_y4.mtx 12/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x4_y1.mtx 13/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x4_y2.mtx 14/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x4_y3.mtx 15/16 [OK]
[2026-06-04 21:19:39] Generation gedot single precision result file parallel/sol_x4_y4.mtx 16/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x1_y1.mtx 1/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x1_y2.mtx 2/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x1_y3.mtx 3/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x1_y4.mtx 4/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x2_y1.mtx 5/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x2_y2.mtx 6/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x2_y3.mtx 7/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x2_y4.mtx 8/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x3_y1.mtx 9/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x3_y2.mtx 10/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x3_y3.mtx 11/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x3_y4.mtx 12/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x4_y1.mtx 13/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x4_y2.mtx 14/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x4_y3.mtx 15/16 [OK]
[2026-06-04 21:19:39] Double precision check on file parallel/sol_x4_y4.mtx 16/16 [OK]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,44 @@
cmake_minimum_required(VERSION 3.10)
project(nested Fortran)
# Check for the installation path for psblas
if(NOT DEFINED PSBLAS_INSTALL_DIR)
message(FATAL_ERROR "Please specify the path to the psblas installation directory using -DPSBLAS_INSTALL_DIR=<path>")
endif()
# Set the include and library directories based on the provided path
set(INSTALLDIR "${PSBLAS_INSTALL_DIR}")
set(INCDIR "${INSTALLDIR}/include")
set(MODDIR "${INSTALLDIR}/modules")
set(LIBDIR "${INSTALLDIR}/lib")
# Find the psblas package
find_package(psblas REQUIRED PATHS ${INSTALLDIR})
# Include directories for the Fortran compiler
include_directories(${INCDIR} ${MODDIR})
# Define executable directory
set(EXEDIR "${CMAKE_CURRENT_SOURCE_DIR}/runs")
file(MAKE_DIRECTORY ${EXEDIR})
# Nested (block-structured / MATNEST) tests
set(SOURCES_D_NEST_GLOB_TEST psb_d_nest_glob_test.F90)
set(SOURCES_D_NEST_RECT_TEST psb_d_nest_rect_test.F90)
set(SOURCES_D_NEST_CG_TEST psb_d_nest_cg_test.F90)
add_executable(psb_d_nest_glob_test ${SOURCES_D_NEST_GLOB_TEST})
target_link_libraries(psb_d_nest_glob_test psblas::util psblas::linsolve psblas::prec psblas::ext psblas::base)
add_executable(psb_d_nest_rect_test ${SOURCES_D_NEST_RECT_TEST})
target_link_libraries(psb_d_nest_rect_test psblas::util psblas::linsolve psblas::prec psblas::ext psblas::base)
add_executable(psb_d_nest_cg_test ${SOURCES_D_NEST_CG_TEST})
target_link_libraries(psb_d_nest_cg_test psblas::util psblas::linsolve psblas::prec psblas::ext psblas::base)
# Set output directory for executables
foreach(target psb_d_nest_glob_test psb_d_nest_rect_test psb_d_nest_cg_test)
set_target_properties(${target} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${EXEDIR}
)
endforeach()

@ -0,0 +1,42 @@
INSTALLDIR=../..
INCDIR=$(INSTALLDIR)/include
MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas
#
# Libraries used
LIBDIR=$(INSTALLDIR)/lib
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_ext -lpsb_base
LDLIBS= $(PSBLDLIBS)
#
# Compilers and such
#
CCOPT= -g
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
EXEDIR=./runs
all: runsd psb_d_nest_glob_test psb_d_nest_rect_test psb_d_nest_cg_test
runsd:
(if test ! -d runs ; then mkdir runs; fi)
psb_d_nest_glob_test: psb_d_nest_glob_test.o
$(FLINK) psb_d_nest_glob_test.o -o psb_d_nest_glob_test $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_nest_glob_test $(EXEDIR)
psb_d_nest_rect_test: psb_d_nest_rect_test.o
$(FLINK) psb_d_nest_rect_test.o -o psb_d_nest_rect_test $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_nest_rect_test $(EXEDIR)
psb_d_nest_cg_test: psb_d_nest_cg_test.o
$(FLINK) psb_d_nest_cg_test.o -o psb_d_nest_cg_test $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_nest_cg_test $(EXEDIR)
clean:
/bin/rm -f psb_d_nest_glob_test.o psb_d_nest_rect_test.o psb_d_nest_cg_test.o *$(.mod) \
$(EXEDIR)/psb_d_nest_glob_test $(EXEDIR)/psb_d_nest_rect_test $(EXEDIR)/psb_d_nest_cg_test
verycleanlib:
(cd ../..; make veryclean)
lib:
(cd ../../; make library)

@ -0,0 +1,248 @@
# Nested (block-structured / MATNEST) matrices in PSBLAS
Author: Simone Staccone (Stack-1)
This directory contains the tests for the **nested matrix** support added to PSBLAS: a block-structured distributed operator
```
[ A11 A12 ... ]
M = [ A21 A22 ... ]
[ ... ... ]
```
whose blocks are kept as separate sparse matrices (one per field) but which presents itself to Krylov solvers and preconditioners as a **single ordinary distributed matrix**. It is the PSBLAS analogue of PETSc's `MATNEST`.
The motivating case is the **saddle-point** system
```
M = [ A B^T ]
[ B 0 ]
```
(symmetric indefinite, with the (2,2) block absent), but the implementation supports any square multi-field block operator with possibly **rectangular**
sub-blocks.
## 1. Concepts
* **Field** — a contiguous index space (e.g. velocity `V` and pressure `Q` in a saddle-point problem). Each field has its own `psb_desc_type` distribution.
* **Block (i,j)** — the sub-matrix coupling field `i` (rows) with field `j` (columns). It may be rectangular (different field sizes) and may be absent.
* **Global operator** — the blocks are concatenated into a single **square** operator `M` of size `sum(field_sizes)`, distributed over one **composed global descriptor** with a **union halo** (one halo exchange per matrix-vector product, covering all blocks of a given column field at once).
* **Rectangular blocks** — PSBLAS does not support rectangular *distributed* matrices, but it does support rectangular *local* CSR/COO matrices. The rectangular product therefore happens only in the **local** block `csmv`; the only object carrying a descriptor (and hence communication) is the global operator, which is always square.
The global operator (`a_glob`) and global descriptor (`desc_glob`) can be passed unchanged to `psb_spmm`, `psb_krylov`, and the standard preconditioners.
## 2. Quick start: `psb_d_nest_matrix`
The easy way to build a nested matrix is the `psb_d_nest_matrix` type (module `psb_d_nest_builder_mod`, re-exported by the umbrella `psb_d_nest_mod`), which follows the usual PSBLAS `init` / `ins` / `asb` pattern and hides all the descriptor / halo / compose / setup boilerplate:
```fortran
use psb_d_nest_mod
type(psb_d_nest_matrix) :: nested_matrix
integer(psb_lpk_) :: n1, n2
! 1) declare the field structure: 2 fields of global size n1, n2
call nested_matrix%init(ctxt, [n1, n2], info)
! 2) insert the block values, owned rows only (PSBLAS convention).
call nested_matrix%ins(1, 1, nz_A, iaA, jaA, valA, info) ! A = block (1,1)
call nested_matrix%ins(1, 2, nz_Bt, iaBt, jaBt, valBt, info) ! B^T = block (1,2)
call nested_matrix%ins(2, 1, nz_B, iaB, jaB, valB, info) ! B = block (2,1)
! (the (2,2) block is simply not inserted)
! 3) assemble: builds nested_matrix%a_glob and nested_matrix%desc_glob
call nested_matrix%asb(info)
! 4) from here on it is an ordinary distributed matrix/descriptor
call psb_geall(x, nested_matrix%desc_glob, info)
...
call prec%init(ctxt, 'BJAC', info)
call prec%build(nested_matrix%a_glob, nested_matrix%desc_glob, info)
call psb_krylov('CG', nested_matrix%a_glob, prec, b, x, eps, &
& nested_matrix%desc_glob, info, itmax=..., iter=..., err=...)
! 5) release
call nested_matrix%free(info)
```
## 3. User API reference
All of the public API is available through the umbrella module:
```fortran
use psb_d_nest_mod
```
### 3.1 `type(psb_d_nest_matrix)` — the nested matrix (recommended)
| Member | Meaning |
|--------|---------|
| `a_glob` | `type(psb_dspmat_type)` — the assembled global operator; pass it to `psb_spmm`, `psb_krylov`, `prec%build` |
| `desc_glob` | `type(psb_desc_type)` — the composed global descriptor; pass it wherever a descriptor is expected |
| `field_desc(i)` | `type(psb_desc_type)` — the descriptor of field `i` (advanced use; for the common queries see `get_owned_rows` below) |
| `n_fields` | number of fields |
To know which rows it must insert, a process asks the matrix directly — no
descriptor jargon needed:
```fortran
integer(psb_lpk_), allocatable :: my_rows(:)
my_rows = nested_matrix%get_owned_rows(1) ! global rows of field 1 owned here
do k = 1, size(my_rows)
global_row = my_rows(k)
... ! build the entries of this row
end do
```
| Query | Result |
|-------|--------|
| `nested_matrix%get_owned_rows(i_field)` | `integer(psb_lpk_), allocatable (:)` — the GLOBAL indices (in the field index space, 1..field size) of the rows of field `i_field` owned by this process |
| `nested_matrix%get_owned_row_count(i_field)` | `integer(psb_ipk_)` — how many rows of field `i_field` this process owns |
Methods (collective over the communicator unless noted):
#### `call nested_matrix%init(ctxt, field_sizes, info)`
Create the field structure. One descriptor per field is created with a block
row distribution; the total size is independent of the number of processes.
| Argument | Type | Intent | Meaning |
|----------|------|--------|---------|
| `ctxt` | `type(psb_ctxt_type)` | in | parallel context from `psb_init` |
| `field_sizes(:)` | `integer(psb_lpk_)` | in | global size of each field, e.g. `[n1, n2]` |
| `info` | `integer(psb_ipk_)` | out | return code, `psb_success_` on success |
#### `call nested_matrix%ins(block_row, block_col, n_entries, entry_rows, entry_cols, entry_vals, info)`
Insert a batch of coefficients into block `(block_row, block_col)`. May be
called any number of times per block, in any order, before `asb`. Each process
inserts only the rows it owns (PSBLAS convention); cross-field columns are
registered into the union halo automatically.
| Argument | Type | Intent | Meaning |
|----------|------|--------|---------|
| `block_row` | `integer(psb_ipk_)` | in | row-field index of the block (1..n_fields) |
| `block_col` | `integer(psb_ipk_)` | in | column-field index of the block (1..n_fields) |
| `n_entries` | `integer(psb_ipk_)` | in | number of triplets in this batch |
| `entry_rows(:)` | `integer(psb_lpk_)` | in | GLOBAL row indices in field `block_row` (1..field size) |
| `entry_cols(:)` | `integer(psb_lpk_)` | in | GLOBAL column indices in field `block_col` (1..field size) |
| `entry_vals(:)` | `real(psb_dpk_)` | in | coefficient values |
| `info` | `integer(psb_ipk_)` | out | return code |
#### `call nested_matrix%asb(info [, type] [, mold])`
Assemble: builds the per-field halos, the (possibly rectangular) local blocks,
the composed global descriptor `desc_glob` and the global operator `a_glob`.
After `asb` no further `ins` is allowed, and the object must not be
copied/moved (the operator holds internal pointers into it).
The optional arguments select the **storage format of the blocks**:
| Argument | Type | Meaning |
|----------|------|---------|
| `type` | `character(len=*)` | a base format name: `'CSR'` (default), `'CSC'`, `'COO'` |
| `mold` | `class(psb_d_base_sparse_mat)` | any format class, e.g. `psb_d_ell_sparse_mat` / `psb_d_hll_sparse_mat` from `psb_ext` |
The nested operator is format-agnostic: every operation delegates to the
blocks' own methods, so each block runs its native kernels.
#### `call nested_matrix%free(info)`
Release every internal object (blocks, descriptors, global operator).
### 3.2 Solvers and preconditioners
`a_glob` / `desc_glob` work with the standard PSBLAS infrastructure:
* **Krylov methods**`psb_krylov('CG' | 'BICGSTAB' | 'GMRES' | ..., nested_matrix%a_glob, prec, b, x, eps, nested_matrix%desc_glob, info, ...)`. Remember that CG requires an SPD operator; a genuine saddle-point operator is indefinite and needs MINRES/GMRES.
* **Preconditioners** — all the stock PSBLAS one-level preconditioners can be built directly on the nested operator:
* `'NONE'` — identity;
* `'DIAG'` / `'JACOBI'` — diagonal scaling (served by the nested `get_diag`, which concatenates the diagonals of the diagonal blocks; absent blocks contribute zeros);
* `'BJAC'` — block Jacobi with ILU factorization of the local rows (served by the nested `csgetrow`, which extracts the local rows of the global operator across all blocks).
```fortran
call prec%init(ctxt, 'BJAC', info)
call prec%build(nested_matrix%a_glob, nested_matrix%desc_glob, info)
```
### 3.3 Implemented base-class contract
The nested operator (`psb_d_nest_base_mat`) implements the standard
`psb_d_base_sparse_mat` contract by delegation to the blocks, so it can be used
wherever an assembled PSBLAS matrix is expected:
* **Products**`csmv` (also transposed, `trans='T'`), `csmm` (multi-RHS),
`vect_mv` (encapsulated vectors: gathers/scatters through the vectors' own
`gth`/`sct` and runs each block through its `vect_mv`, so device block
formats execute their device kernels).
* **Access/conversions**`get_diag`, `csgetrow` (and `csget`/`csgetblk`
through the base generics), `cp_to_coo`/`mv_to_coo` (and `cscnv`, `csclip`,
`tril`/`triu`, ... through the base generics built on the COO route).
* **Reductions**`rowsum`/`arwsum`, `colsum`/`aclsum`, `maxval`,
`spnmi` (infinity norm), `spnm1` (1-norm).
* **Mutation/bookkeeping**`scal` (left/right) and `scals` (the operator is a
view: scaling acts on the blocks), `clone` (shares the blocks, re-owns the
private index maps), `mold`, `sizeof`, `free`, `get_nzeros`, `get_fmt`.
Intentionally **not** implemented (they fail with the standard "missing
override" error): `cp_from_coo`/`mv_from_coo` (a nested operator cannot be
built from a flat matrix without the field structure), `csput` (insertions go
to the blocks before assembly), `cssv`/`cssm` (a triangular solve is undefined
for a block operator).
### 3.4 Low-level API (advanced)
`psb_d_nest_matrix` is built on lower-level pieces, available directly:
* `psb_cd_nest_compose(grid_desc, desc_glob, info)` — compose the per-field descriptors into the single global descriptor with the union halo.
* `psb_d_nest_base_setup(nest_op, block_storage, grid_desc, desc_glob, info)` — set up the `psb_d_nest_base_mat` operator (implements the local `csmv`, `get_diag`, `csgetrow`).
* `psb_d_nest_rect_block(blk, nz, ia, ja, val, desc_row, desc_col, info)` — build a single (possibly rectangular) local block from global triplets, with rows localized against `desc_row` and columns against `desc_col`.
A field-split interface (`psb_d_nest_get_block`, `psb_d_nest_get_field_desc`, `psb_d_nest_restrict_field`, `psb_d_nest_prolong_field`, `psb_d_nest_apply_block`) is exposed on `psb_d_nest_base_mat` as the hook for a future block (field-split / Schur) preconditioner.
## 4. Tests
| Test | What it checks |
|------------------------------|----------------|
| `psb_d_nest_glob_test` | Square 2×2 operator built with `psb_d_nest_matrix`; the nested `psb_spmm` is compared bit-for-bit against the same matrix assembled monolithically in CSR. |
| `psb_d_nest_rect_test` | Same, with fields of different size (`nV = 2 nQ`) and genuinely **rectangular** off-diagonal blocks. |
| `psb_d_nest_cg_test` | Standard PSBLAS **CG** on an SPD, ill-conditioned operator (1D Laplacian reordered red-black), solved under every stock preconditioner (`NONE`, `DIAG`, `BJAC`/ILU(0)); requires convergence to machine precision for all of them, and that `DIAG` reproduces the `NONE` iteration count exactly (a bit-precise check of the nested `get_diag`, since the diagonal is the constant `2I`). |
All tests run both serially and in parallel, and the result is invariant with respect to the number of MPI processes.
### Build and run
The PSBLAS library must be built/installed first (from the repository root):
```sh
make # or the CMake build
```
Then, from this directory:
```sh
make # builds the executables into ./runs
./runs/psb_d_nest_glob_test # serial
mpirun -np 4 ./runs/psb_d_nest_rect_test
mpirun -np 4 ./runs/psb_d_nest_cg_test
```
Each test prints a single `[PASS]` / `[FAIL]` line (printed by rank 0).
## 5. Source files
Library (under `base/modules/`):
* `desc/psb_desc_nest_mod.f90``psb_desc_nest_type` (grid of per-field descriptors)
* `serial/psb_d_nest_mat_mod.f90``psb_d_nest_sparse_mat` (block storage)
* `serial/psb_d_nest_base_mat_mod.F90``psb_d_nest_base_mat` (the MATNEST operator: `csmv`, `get_diag`, `csgetrow`)
* `tools/psb_cd_nest_tools_mod.F90` — descriptor tools (`psb_cd_nest_compose`, ...)
* `tools/psb_d_nest_tools_mod.F90` — block tools (`psb_d_nest_rect_block`, ...)
* `tools/psb_d_nest_builder_mod.F90``psb_d_nest_matrix` frontend (init/ins/asb)
* `psb_d_nest_mod.f90` — umbrella module (`use psb_d_nest_mod`)

@ -0,0 +1,265 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific prior written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_d_nest_cg_test.F90
!
! Program: psb_d_nest_cg_test
! Author: Simone Staccone (Stack-1)
!
! Solves a linear system with the nested operator using the standard PSBLAS CG
! (psb_krylov('CG', ...)) under every stock one-level preconditioner, to show
! that the nested operator plugs into the PSBLAS preconditioning infrastructure:
! NONE (operator only),
! DIAG (exercises the nested get_diag),
! BJAC (ILU(0), exercises the nested csgetrow through the ILU build).
!
! CG needs a SYMMETRIC POSITIVE DEFINITE operator and, to stress the test
! (hundreds of matvecs), an ILL-CONDITIONED one. We use a real case: the 1D
! Laplacian tridiag(-1, 2, -1) on m = 2*field_size nodes, REORDERED red-black
! (odd nodes -> field 1, even nodes -> field 2). Under this reordering the
! Laplacian becomes exactly
!
! M = [ 2I C ] C(r,r) = -1 , C(r,r-1) = -1 (the Laplacian edges)
! [ C^T 2I ] C^T = exact transpose
!
! (odd nodes are not adjacent to each other -> diagonal blocks = 2I; every -1
! edge of the Laplacian becomes the coupling C). M is therefore the 1D
! Laplacian up to a permutation: SPD but with lambda_min ~ (pi/m)^2 => cond ~
! N^2 => CG performs O(N) iterations that GROW with N.
!
! The operator is built with the psb_d_nest_matrix utility. The test passes if
! every solve converges to the exact solution and DIAG reproduces the NONE
! iteration count exactly (with the constant diagonal 2I, Jacobi is a pure
! rescaling, so any mismatch would expose a wrong nested get_diag).
!
! Run: ./psb_d_nest_cg_test ; mpirun -np 4 ./psb_d_nest_cg_test
!
program psb_d_nest_cg_test
use psb_base_mod
use psb_util_mod
use psb_prec_mod
use psb_linsolve_mod
use psb_d_nest_mod ! umbrella: includes psb_d_nest_matrix (builder)
implicit none
type(psb_ctxt_type) :: context
integer(psb_ipk_) :: my_rank, num_procs, info, i_local_row, entry_idx
integer(psb_ipk_) :: field1_local_rows, field2_local_rows
integer(psb_lpk_) :: field1_global_row, field2_global_row, field_size
type(psb_d_nest_matrix) :: nested_matrix
type(psb_dprec_type) :: preconditioner
type(psb_d_vect_type) :: x_solution, rhs, x_exact
integer(psb_lpk_), allocatable :: entry_rows(:), entry_cols(:)
integer(psb_lpk_), allocatable :: field1_rows(:), field2_rows(:)
real(psb_dpk_), allocatable :: entry_vals(:)
! solver parameters
real(psb_dpk_) :: diag_value, stop_tol, final_residual, norm_x_exact, solution_error
integer(psb_ipk_) :: max_iter, trace_level, n_iter, stop_criterion
real(psb_dpk_), parameter :: solution_tol = 1.0e-6_psb_dpk_
! stock preconditioners to exercise on the nested operator
integer(psb_ipk_), parameter :: n_precs = 3
character(len=6), parameter :: prec_names(n_precs) = ['NONE ', 'DIAG ', 'BJAC ']
integer(psb_ipk_) :: i_prec, iter_none, iter_diag
logical :: all_passed
call psb_init(context)
call psb_info(context, my_rank, num_procs)
field_size = 512 ! global rows per field (global N = 2*field_size)
diag_value = 2.0_psb_dpk_ ! Laplacian diagonal (diagonal blocks = diag*I)
stop_tol = 1.0e-9_psb_dpk_
max_iter = 4000
trace_level = 0
stop_criterion = 2 ! stop on the relative residual
!---------------------------------------------------------------
! 1) create the nested operator: 2 fields of global size field_size
!---------------------------------------------------------------
call nested_matrix%init(context, [field_size, field_size], info)
if (info /= psb_success_) then
if (my_rank==0) write(*,*) 'FAIL: nested_matrix%init info=', info; goto 9999
end if
field1_rows = nested_matrix%get_owned_rows(1)
field2_rows = nested_matrix%get_owned_rows(2)
field1_local_rows = size(field1_rows)
field2_local_rows = size(field2_rows)
!---------------------------------------------------------------
! 2) insert the blocks (owned rows only)
!---------------------------------------------------------------
! block (1,1) = diag*I
allocate(entry_rows(field1_local_rows), entry_cols(field1_local_rows), entry_vals(field1_local_rows))
do i_local_row = 1, field1_local_rows
field1_global_row = field1_rows(i_local_row)
entry_rows(i_local_row) = field1_global_row
entry_cols(i_local_row) = field1_global_row
entry_vals(i_local_row) = diag_value
end do
call nested_matrix%ins(1, 1, field1_local_rows, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! block (2,2) = diag*I
allocate(entry_rows(field2_local_rows), entry_cols(field2_local_rows), entry_vals(field2_local_rows))
do i_local_row = 1, field2_local_rows
field2_global_row = field2_rows(i_local_row)
entry_rows(i_local_row) = field2_global_row
entry_cols(i_local_row) = field2_global_row
entry_vals(i_local_row) = diag_value
end do
call nested_matrix%ins(2, 2, field2_local_rows, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! block (1,2) = C : rows field1, cols field2 ; C(r,r)=-1, C(r,r-1)=-1
allocate(entry_rows(2*field1_local_rows), entry_cols(2*field1_local_rows), entry_vals(2*field1_local_rows))
entry_idx = 0
do i_local_row = 1, field1_local_rows
field1_global_row = field1_rows(i_local_row)
entry_idx = entry_idx + 1
entry_rows(entry_idx) = field1_global_row
entry_cols(entry_idx) = field1_global_row
entry_vals(entry_idx) = -1.0_psb_dpk_
if (field1_global_row > 1) then
entry_idx = entry_idx + 1
entry_rows(entry_idx) = field1_global_row
entry_cols(entry_idx) = field1_global_row - 1_psb_lpk_
entry_vals(entry_idx) = -1.0_psb_dpk_
end if
end do
call nested_matrix%ins(1, 2, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! block (2,1) = C^T : rows field2, cols field1 ; C^T(s,s)=-1, C^T(s,s+1)=-1
allocate(entry_rows(2*field2_local_rows), entry_cols(2*field2_local_rows), entry_vals(2*field2_local_rows))
entry_idx = 0
do i_local_row = 1, field2_local_rows
field2_global_row = field2_rows(i_local_row)
entry_idx = entry_idx + 1
entry_rows(entry_idx) = field2_global_row
entry_cols(entry_idx) = field2_global_row
entry_vals(entry_idx) = -1.0_psb_dpk_
if (field2_global_row < field_size) then
entry_idx = entry_idx + 1
entry_rows(entry_idx) = field2_global_row
entry_cols(entry_idx) = field2_global_row + 1_psb_lpk_
entry_vals(entry_idx) = -1.0_psb_dpk_
end if
end do
call nested_matrix%ins(2, 1, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
!---------------------------------------------------------------
! 3) assemble: nested_matrix%a_glob / nested_matrix%desc_glob are ready for Krylov
!---------------------------------------------------------------
call nested_matrix%asb(info)
if (info /= psb_success_) then
if (my_rank==0) write(*,*) 'FAIL: nested_matrix%asb info=', info; goto 9999
end if
!---------------------------------------------------------------
! 4) consistent RHS: x_exact = 1, rhs = M * x_exact (via the nested operator)
!---------------------------------------------------------------
call psb_geall(x_exact, nested_matrix%desc_glob, info)
call psb_geasb(x_exact, nested_matrix%desc_glob, info)
call x_exact%set(done) ! x_exact = 1 everywhere
call psb_geall(rhs, nested_matrix%desc_glob, info); call psb_geasb(rhs, nested_matrix%desc_glob, info)
call psb_spmm(done, nested_matrix%a_glob, x_exact, dzero, rhs, nested_matrix%desc_glob, info)
if (info /= psb_success_) then
if (my_rank == 0) write(*,*) 'FAIL: psb_spmm (RHS) info=', info
goto 9999
end if
norm_x_exact = psb_genrm2(x_exact, nested_matrix%desc_glob, info)
!---------------------------------------------------------------
! 5) solve with the standard PSBLAS CG under every stock preconditioner
!---------------------------------------------------------------
if (my_rank == 0) write(*,'(a,i0,a,i0)') ' np=', num_procs, ' N(global)=', 2*field_size
all_passed = .true.
iter_none = 0
iter_diag = -1
do i_prec = 1, n_precs
call preconditioner%init(context, trim(prec_names(i_prec)), info)
call preconditioner%build(nested_matrix%a_glob, nested_matrix%desc_glob, info)
if (info /= psb_success_) then
if (my_rank == 0) write(*,*) 'FAIL: prec%build (', trim(prec_names(i_prec)), ') info=', info
all_passed = .false.; exit
end if
call psb_geall(x_solution, nested_matrix%desc_glob, info)
call psb_geasb(x_solution, nested_matrix%desc_glob, info)
call psb_krylov('CG', nested_matrix%a_glob, preconditioner, rhs, x_solution, stop_tol, &
& nested_matrix%desc_glob, info, &
& itmax=max_iter, iter=n_iter, err=final_residual, itrace=trace_level, istop=stop_criterion)
if (info /= psb_success_) then
if (my_rank == 0) write(*,*) 'FAIL: psb_krylov(CG,', trim(prec_names(i_prec)), ') info=', info
all_passed = .false.; exit
end if
! solution error: || x_solution - x_exact || / || x_exact ||
call psb_geaxpby(-done, x_exact, done, x_solution, nested_matrix%desc_glob, info)
solution_error = psb_genrm2(x_solution, nested_matrix%desc_glob, info) / norm_x_exact
if (my_rank == 0) then
write(*,'(a,a6,a,i6,a,es12.4,a,es12.4)') ' prec=', prec_names(i_prec), &
& ' CG iterations=', n_iter, ' residual=', final_residual, &
& ' ||x-x_ex||/||x_ex||=', solution_error
end if
if ((n_iter >= max_iter) .or. (solution_error > solution_tol)) all_passed = .false.
if (trim(prec_names(i_prec)) == 'NONE') iter_none = n_iter
if (trim(prec_names(i_prec)) == 'DIAG') iter_diag = n_iter
call psb_gefree(x_solution, nested_matrix%desc_glob, info)
call preconditioner%free(info)
end do
!---------------------------------------------------------------
! 6) verdict: every preconditioner converges to the right solution, and DIAG
! reproduces the NONE iteration count exactly (Jacobi on the constant
! diagonal 2I is a pure rescaling -> exactness check of the nested get_diag)
!---------------------------------------------------------------
if (my_rank == 0) then
if (all_passed .and. (iter_diag == iter_none)) then
write(*,*) '[PASS] CG converges on the nested operator with NONE/DIAG/BJAC'
else
write(*,*) '[FAIL] preconditioned CG on the nested operator (tol ', solution_tol, ')'
end if
end if
call nested_matrix%free(info)
9999 continue
call psb_exit(context)
end program psb_d_nest_cg_test

@ -0,0 +1,226 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific prior written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_d_nest_glob_test.F90
!
! Program: psb_d_nest_glob_test
! Author: Simone Staccone (Stack-1)
!
! Validates the "global nested operator" path built through the
! psb_d_nest_matrix utility (init/ins/asb): the user only supplies the field
! sizes and the block values, and obtains nested_matrix%a_glob /
! nested_matrix%desc_glob ready for psb_spmm. The result is compared against
! the SAME matrix assembled monolithically in CSR on the same global
! descriptor (oracle).
!
! 2x2 operator (fields of size field_size):
! [ A B^T ] A = tridiag(-1, 2, -1) (block 1,1)
! [ B 0 ] B^T = 0.5 * I (block 1,2)
! B = 0.3 * I (block 2,1)
! (2,2) absent
!
! Run: ./psb_d_nest_glob_test (serial)
! mpirun -np 4 ./psb_d_nest_glob_test
!
program psb_d_nest_glob_test
use psb_base_mod
use psb_util_mod
use psb_d_nest_mod
use psb_d_hll_mat_mod, only : psb_d_hll_sparse_mat ! psb_ext format for the blocks
implicit none
type(psb_d_hll_sparse_mat) :: hll_mold
type(psb_ctxt_type) :: context
integer(psb_ipk_) :: my_rank, num_procs, info, i_local_row
integer(psb_ipk_) :: entry_idx, field1_local_rows, field2_local_rows
integer(psb_lpk_) :: global_row, global_col, field_size
type(psb_d_nest_matrix) :: nested_matrix ! the nested operator (init/ins/asb)
type(psb_dspmat_type) :: monolithic_ref ! monolithic CSR oracle
type(psb_d_vect_type) :: x_vec, y_nested, y_monolithic
integer(psb_lpk_), allocatable :: entry_rows(:), entry_cols(:)
integer(psb_lpk_), allocatable :: field1_rows(:), field2_rows(:)
real(psb_dpk_), allocatable :: entry_vals(:)
real(psb_dpk_) :: insert_value(1)
real(psb_dpk_) :: mismatch_norm
real(psb_dpk_), parameter :: tolerance = 1.0e-10_psb_dpk_
call psb_init(context)
call psb_info(context, my_rank, num_procs)
field_size = 32 ! global size of each field
!---------------------------------------------------------------
! 1) build the 2x2 nested operator through the utility
!---------------------------------------------------------------
call nested_matrix%init(context, [field_size, field_size], info)
if (info /= psb_success_) then
if (my_rank==0) write(*,*) 'FAIL: nested_matrix%init info=', info; goto 9999
end if
field1_rows = nested_matrix%get_owned_rows(1)
field2_rows = nested_matrix%get_owned_rows(2)
field1_local_rows = size(field1_rows)
field2_local_rows = size(field2_rows)
!---------------------------------------------------------------
! 2) insert the block values (owned rows only)
!---------------------------------------------------------------
! A = tridiag(-1,2,-1) -> block (1,1)
allocate(entry_rows(3*field1_local_rows), entry_cols(3*field1_local_rows), &
& entry_vals(3*field1_local_rows))
entry_idx = 0
do i_local_row = 1, field1_local_rows
global_row = field1_rows(i_local_row)
entry_idx = entry_idx + 1
entry_rows(entry_idx) = global_row
entry_cols(entry_idx) = global_row
entry_vals(entry_idx) = 2.0_psb_dpk_
if (global_row > 1) then
entry_idx = entry_idx + 1
entry_rows(entry_idx) = global_row
entry_cols(entry_idx) = global_row - 1_psb_lpk_
entry_vals(entry_idx) = -1.0_psb_dpk_
end if
if (global_row < field_size) then
entry_idx = entry_idx + 1
entry_rows(entry_idx) = global_row
entry_cols(entry_idx) = global_row + 1_psb_lpk_
entry_vals(entry_idx) = -1.0_psb_dpk_
end if
end do
call nested_matrix%ins(1, 1, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! B^T = 0.5 I -> block (1,2): rows in field 1, columns in field 2
allocate(entry_rows(field1_local_rows), entry_cols(field1_local_rows), entry_vals(field1_local_rows))
entry_idx = 0
do i_local_row = 1, field1_local_rows
global_row = field1_rows(i_local_row)
entry_idx = entry_idx + 1
entry_rows(entry_idx) = global_row
entry_cols(entry_idx) = global_row
entry_vals(entry_idx) = 0.5_psb_dpk_
end do
call nested_matrix%ins(1, 2, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! B = 0.3 I -> block (2,1): rows in field 2, columns in field 1
allocate(entry_rows(field2_local_rows), entry_cols(field2_local_rows), entry_vals(field2_local_rows))
entry_idx = 0
do i_local_row = 1, field2_local_rows
global_row = field2_rows(i_local_row)
entry_idx = entry_idx + 1
entry_rows(entry_idx) = global_row
entry_cols(entry_idx) = global_row
entry_vals(entry_idx) = 0.3_psb_dpk_
end do
call nested_matrix%ins(2, 1, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! assemble with the blocks stored in HLL (psb_ext format): exercises the
! configurable block storage and the format-agnostic nested matvec
call nested_matrix%asb(info, mold=hll_mold)
if (info /= psb_success_) then
if (my_rank==0) write(*,*) 'FAIL: nested_matrix%asb info=', info; goto 9999
end if
!---------------------------------------------------------------
! 3) monolithic oracle on nested_matrix%desc_glob (global offsets:
! field 1 -> g ; field 2 -> field_size + g)
!---------------------------------------------------------------
call psb_spall(monolithic_ref, nested_matrix%desc_glob, info, &
& nnz=5*nested_matrix%desc_glob%get_local_rows())
do i_local_row = 1, field1_local_rows ! field-1 rows
global_row = field1_rows(i_local_row)
insert_value(1) = 2.0_psb_dpk_
call psb_spins(1,[global_row],[global_row],insert_value,monolithic_ref,nested_matrix%desc_glob,info)
if (global_row > 1) then
insert_value(1)=-1.0_psb_dpk_
call psb_spins(1,[global_row],[global_row-1_psb_lpk_],insert_value,monolithic_ref,nested_matrix%desc_glob,info)
end if
if (global_row < field_size) then
insert_value(1)=-1.0_psb_dpk_
call psb_spins(1,[global_row],[global_row+1_psb_lpk_],insert_value,monolithic_ref,nested_matrix%desc_glob,info)
end if
global_col = field_size + global_row
insert_value(1) = 0.5_psb_dpk_ ! B^T
call psb_spins(1,[global_row],[global_col],insert_value,monolithic_ref,nested_matrix%desc_glob,info)
end do
do i_local_row = 1, field2_local_rows ! field-2 rows
global_row = field2_rows(i_local_row)
global_col = global_row
insert_value(1) = 0.3_psb_dpk_ ! B
call psb_spins(1,[field_size+global_row],[global_col],insert_value,monolithic_ref,nested_matrix%desc_glob,info)
end do
call psb_spasb(monolithic_ref, nested_matrix%desc_glob, info, dupl=psb_dupl_add_)
!---------------------------------------------------------------
! 4) compare the two matrix-vector products on a distinct-valued x (x[g] = g)
!---------------------------------------------------------------
call psb_geall(x_vec, nested_matrix%desc_glob, info)
do i_local_row = 1, nested_matrix%desc_glob%get_local_rows()
call nested_matrix%desc_glob%l2g(i_local_row, global_row, info)
insert_value(1) = real(global_row, psb_dpk_)
call psb_geins(1, [global_row], insert_value, x_vec, nested_matrix%desc_glob, info)
end do
call psb_geasb(x_vec, nested_matrix%desc_glob, info)
call psb_geall(y_nested, nested_matrix%desc_glob, info); call psb_geasb(y_nested, nested_matrix%desc_glob, info)
call psb_geall(y_monolithic, nested_matrix%desc_glob, info); call psb_geasb(y_monolithic, nested_matrix%desc_glob, info)
call psb_spmm(done, nested_matrix%a_glob, x_vec, dzero, y_nested, nested_matrix%desc_glob, info) ! via nested csmv
if (info /= psb_success_) then
if (my_rank == 0) write(*,*) 'FAIL: psb_spmm (nested) info=', info
goto 9999
end if
call psb_spmm(done, monolithic_ref, x_vec, dzero, y_monolithic, nested_matrix%desc_glob, info) ! CSR oracle
call psb_geaxpby(done, y_nested, -done, y_monolithic, nested_matrix%desc_glob, info)
mismatch_norm = psb_genrm2(y_monolithic, nested_matrix%desc_glob, info)
if (my_rank == 0) then
write(*,'(a,i0,a,i0)') ' np=', num_procs, ' N(field)=', field_size
write(*,'(a,es12.4)') ' ||nested - monolithic||_2 = ', mismatch_norm
if (mismatch_norm <= tolerance) then
write(*,*) '[PASS] nested global operator matches monolithic CSR'
else
write(*,*) '[FAIL] mismatch above tolerance ', tolerance
end if
end if
call nested_matrix%free(info)
9999 continue
call psb_exit(context)
end program psb_d_nest_glob_test

@ -0,0 +1,222 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific prior written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_d_nest_rect_test.F90
!
! Program: psb_d_nest_rect_test
! Author: Simone Staccone (Stack-1)
!
! Like psb_d_nest_glob_test but with fields of DIFFERENT size (|V| = 2|Q|) and
! GENUINELY RECTANGULAR off-diagonal blocks. The operator is built with the
! psb_d_nest_matrix utility (init/ins/asb): the user only inserts the values,
! while the cross-field halo registration and the construction of the
! rectangular local blocks are handled internally. Compared against the
! monolithic CSR oracle.
!
! [ A B^T ] A : V x V tridiag(-1,2,-1)
! [ B 0 ] B^T : V x Q rectangular (row r -> col mod(r-1,nQ)+1, val 0.5)
! B : Q x V rectangular (row q -> cols q and q+nQ, val 0.3)
! (2,2) absent
!
! Run: ./psb_d_nest_rect_test ; mpirun -np 4 ./psb_d_nest_rect_test
!
program psb_d_nest_rect_test
use psb_base_mod
use psb_util_mod
use psb_d_nest_mod
implicit none
type(psb_ctxt_type) :: context
integer(psb_ipk_) :: my_rank, num_procs, info, i_local_row
integer(psb_ipk_) :: entry_idx, v_local_rows, q_local_rows
integer(psb_lpk_) :: v_global_row, q_global_row, q_col, v_size, q_size
type(psb_d_nest_matrix) :: nested_matrix
type(psb_dspmat_type) :: monolithic_ref
type(psb_d_vect_type) :: x_vec, y_nested, y_monolithic
real(psb_dpk_) :: insert_value(1)
integer(psb_lpk_), allocatable :: entry_rows(:), entry_cols(:)
integer(psb_lpk_), allocatable :: v_rows(:), q_rows(:)
real(psb_dpk_), allocatable :: entry_vals(:)
real(psb_dpk_) :: mismatch_norm
real(psb_dpk_), parameter :: tolerance = 1.0e-10_psb_dpk_
call psb_init(context)
call psb_info(context, my_rank, num_procs)
q_size = 16 ! global size of field Q
v_size = 2*q_size ! global size of field V (|V| = 2|Q|)
!---------------------------------------------------------------
! 1) build the 2x2 nested operator (fields V, Q)
!---------------------------------------------------------------
call nested_matrix%init(context, [v_size, q_size], info)
if (info /= psb_success_) then
if (my_rank==0) write(*,*) 'FAIL: nested_matrix%init info=', info; goto 9999
end if
v_rows = nested_matrix%get_owned_rows(1)
q_rows = nested_matrix%get_owned_rows(2)
v_local_rows = size(v_rows)
q_local_rows = size(q_rows)
!---------------------------------------------------------------
! 2) insert the blocks (owned rows only)
!---------------------------------------------------------------
! A = tridiag(-1,2,-1) -> block (1,1), V x V
allocate(entry_rows(3*v_local_rows), entry_cols(3*v_local_rows), entry_vals(3*v_local_rows))
entry_idx = 0
do i_local_row = 1, v_local_rows
v_global_row = v_rows(i_local_row)
entry_idx = entry_idx + 1
entry_rows(entry_idx) = v_global_row
entry_cols(entry_idx) = v_global_row
entry_vals(entry_idx) = 2.0_psb_dpk_
if (v_global_row > 1) then
entry_idx = entry_idx + 1
entry_rows(entry_idx) = v_global_row
entry_cols(entry_idx) = v_global_row - 1_psb_lpk_
entry_vals(entry_idx) = -1.0_psb_dpk_
end if
if (v_global_row < v_size) then
entry_idx = entry_idx + 1
entry_rows(entry_idx) = v_global_row
entry_cols(entry_idx) = v_global_row + 1_psb_lpk_
entry_vals(entry_idx) = -1.0_psb_dpk_
end if
end do
call nested_matrix%ins(1, 1, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! B^T rectangular -> block (1,2), V x Q : row r -> col mod(r-1,nQ)+1, val 0.5
allocate(entry_rows(v_local_rows), entry_cols(v_local_rows), entry_vals(v_local_rows))
entry_idx = 0
do i_local_row = 1, v_local_rows
v_global_row = v_rows(i_local_row)
entry_idx = entry_idx + 1
entry_rows(entry_idx) = v_global_row
entry_cols(entry_idx) = mod(v_global_row-1_psb_lpk_, q_size)+1
entry_vals(entry_idx) = 0.5_psb_dpk_
end do
call nested_matrix%ins(1, 2, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! B rectangular -> block (2,1), Q x V : row q -> cols q and q+nQ, val 0.3
allocate(entry_rows(2*q_local_rows), entry_cols(2*q_local_rows), entry_vals(2*q_local_rows))
entry_idx = 0
do i_local_row = 1, q_local_rows
q_global_row = q_rows(i_local_row)
entry_idx = entry_idx + 1
entry_rows(entry_idx) = q_global_row
entry_cols(entry_idx) = q_global_row
entry_vals(entry_idx) = 0.3_psb_dpk_
entry_idx = entry_idx + 1
entry_rows(entry_idx) = q_global_row
entry_cols(entry_idx) = q_global_row + q_size
entry_vals(entry_idx) = 0.3_psb_dpk_
end do
call nested_matrix%ins(2, 1, entry_idx, entry_rows, entry_cols, entry_vals, info)
deallocate(entry_rows, entry_cols, entry_vals)
! assemble with the blocks stored in CSC instead of the CSR default:
! exercises the configurable block storage on a base format
call nested_matrix%asb(info, type='CSC')
if (info /= psb_success_) then
if (my_rank==0) write(*,*) 'FAIL: nested_matrix%asb info=', info; goto 9999
end if
!---------------------------------------------------------------
! 3) monolithic oracle on nested_matrix%desc_glob (global offsets: V -> g, Q -> v_size + g)
!---------------------------------------------------------------
call psb_spall(monolithic_ref, nested_matrix%desc_glob, info, &
& nnz=6*nested_matrix%desc_glob%get_local_rows())
do i_local_row = 1, v_local_rows ! V rows
v_global_row = v_rows(i_local_row)
insert_value(1)=2.0_psb_dpk_
call psb_spins(1,[v_global_row],[v_global_row],insert_value,monolithic_ref,nested_matrix%desc_glob,info)
if (v_global_row>1) then
insert_value(1)=-1.0_psb_dpk_
call psb_spins(1,[v_global_row],[v_global_row-1_psb_lpk_],insert_value,monolithic_ref,nested_matrix%desc_glob,info)
end if
if (v_global_row<v_size) then
insert_value(1)=-1.0_psb_dpk_
call psb_spins(1,[v_global_row],[v_global_row+1_psb_lpk_],insert_value,monolithic_ref,nested_matrix%desc_glob,info)
end if
q_col = v_size + (mod(v_global_row-1_psb_lpk_, q_size) + 1)
insert_value(1)=0.5_psb_dpk_ ! B^T
call psb_spins(1,[v_global_row],[q_col],insert_value,monolithic_ref,nested_matrix%desc_glob,info)
end do
do i_local_row = 1, q_local_rows ! Q rows
q_global_row = q_rows(i_local_row)
insert_value(1)=0.3_psb_dpk_
call psb_spins(1,[v_size+q_global_row],[q_global_row], insert_value,monolithic_ref,nested_matrix%desc_glob,info) ! col q
call psb_spins(1,[v_size+q_global_row],[q_global_row+q_size],insert_value,monolithic_ref,nested_matrix%desc_glob,info) ! col q+nQ
end do
call psb_spasb(monolithic_ref, nested_matrix%desc_glob, info, dupl=psb_dupl_add_)
!---------------------------------------------------------------
! 4) compare the two matrix-vector products on x[g] = g
!---------------------------------------------------------------
call psb_geall(x_vec, nested_matrix%desc_glob, info)
do i_local_row = 1, nested_matrix%desc_glob%get_local_rows()
call nested_matrix%desc_glob%l2g(i_local_row, v_global_row, info)
insert_value(1) = real(v_global_row, psb_dpk_)
call psb_geins(1, [v_global_row], insert_value, x_vec, nested_matrix%desc_glob, info)
end do
call psb_geasb(x_vec, nested_matrix%desc_glob, info)
call psb_geall(y_nested, nested_matrix%desc_glob, info); call psb_geasb(y_nested, nested_matrix%desc_glob, info)
call psb_geall(y_monolithic, nested_matrix%desc_glob, info); call psb_geasb(y_monolithic, nested_matrix%desc_glob, info)
call psb_spmm(done, nested_matrix%a_glob, x_vec, dzero, y_nested, nested_matrix%desc_glob, info)
if (info /= psb_success_) then
if (my_rank==0) write(*,*) 'FAIL spmm nested info=', info; goto 9999
end if
call psb_spmm(done, monolithic_ref, x_vec, dzero, y_monolithic, nested_matrix%desc_glob, info)
call psb_geaxpby(done, y_nested, -done, y_monolithic, nested_matrix%desc_glob, info)
mismatch_norm = psb_genrm2(y_monolithic, nested_matrix%desc_glob, info)
if (my_rank == 0) then
write(*,'(a,i0,a,i0,a,i0)') ' np=', num_procs, ' |V|=', v_size, ' |Q|=', q_size
write(*,'(a,es12.4)') ' ||nested - monolithic||_2 = ', mismatch_norm
if (mismatch_norm <= tolerance) then
write(*,*) '[PASS] rectangular nested operator matches monolithic CSR'
else
write(*,*) '[FAIL] mismatch above tolerance ', tolerance
end if
end if
call nested_matrix%free(info)
9999 continue
call psb_exit(context)
end program psb_d_nest_rect_test

@ -15,6 +15,10 @@ set(LIBDIR "${INSTALLDIR}/lib")
# Find the psblas package
find_package(psblas REQUIRED PATHS ${INSTALLDIR})
set(EXTLIBDIR "/home/fdurastante/psctoolkit/install/lib")
link_directories(${EXTLIBDIR})
# Include directories for the Fortran compiler
include_directories(${INCDIR} ${MODDIR})
@ -30,6 +34,7 @@ set(SOURCES_S_PDE3D psb_s_pde3d.F90)
set(SOURCES_D_PDE2D psb_d_pde2d.F90)
set(SOURCES_S_PDE2D psb_s_pde2d.F90)
# Create executables
add_executable(psb_d_pde3d ${SOURCES_D_PDE3D})
target_link_libraries(psb_d_pde3d psblas::util psblas::linsolve psblas::prec psblas::base)
@ -44,7 +49,7 @@ add_executable(psb_s_pde2d ${SOURCES_S_PDE2D})
target_link_libraries(psb_s_pde2d psblas::util psblas::linsolve psblas::prec psblas::base)
# Set output directory for executables
foreach(target psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d)
foreach(target psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d psb_dist1_pde2d psb_dist2_pde2d psb_dist3_pde2d)
set_target_properties(${target} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${EXEDIR}
)

@ -5,8 +5,10 @@ include $(INCDIR)/Make.inc.psblas
#
# Libraries used
LIBDIR=$(INSTALLDIR)/lib
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
# BLASLIBDIR=/opt/share/sdk/intel/nvidia_hpc_sdk/Linux_x86_64/24.3/compilers/lib
# GKLIBDIR=/home/jalmerol/GKlib/build/Linux-x86_64
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS= $(PSBLDLIBS)
#
# Compilers and such
#
@ -38,10 +40,9 @@ psb_s_pde2d: psb_s_pde2d.o
$(FLINK) psb_s_pde2d.o -o psb_s_pde2d $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_s_pde2d $(EXEDIR)
clean:
/bin/rm -f psb_d_pde3d.o psb_d_oacc_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \
$(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d
/bin/rm -f psb_d_pde3d.o psb_d_oacc_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \
$(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d
verycleanlib:
(cd ../..; make veryclean)
lib:

Loading…
Cancel
Save