Add double-precision nested block-structured matrix/vector layer

nested_matrix_type
jalmerol 2 weeks ago committed by Stack-1
parent 164f393183
commit b8d41569fd

@ -567,6 +567,8 @@ set(PSB_base_source_files
modules/tools/psb_m_tools_a_mod.f90 modules/tools/psb_m_tools_a_mod.f90
modules/tools/psb_cd_tools_mod.F90 modules/tools/psb_cd_tools_mod.F90
modules/tools/psb_d_tools_mod.F90 modules/tools/psb_d_tools_mod.F90
modules/tools/psb_cd_nest_tools_mod.F90
modules/tools/psb_d_nest_tools_mod.F90
modules/tools/psb_c_tools_mod.F90 modules/tools/psb_c_tools_mod.F90
modules/tools/psb_e_tools_a_mod.f90 modules/tools/psb_e_tools_a_mod.f90
modules/tools/psb_i2_tools_a_mod.f90 modules/tools/psb_i2_tools_a_mod.f90
@ -638,6 +640,12 @@ set(PSB_base_source_files
modules/desc/psb_hash_map_mod.F90 modules/desc/psb_hash_map_mod.F90
modules/desc/psb_glist_map_mod.F90 modules/desc/psb_glist_map_mod.F90
modules/psb_base_mod.f90 modules/psb_base_mod.f90
modules/desc/psb_desc_nest_mod.f90
modules/serial/psb_d_nest_vect_mod.f90
modules/serial/psb_d_nest_mat_mod.f90
modules/comm/psb_d_nest_comm_mod.f90
modules/psblas/psb_d_nest_psblas_mod.f90
modules/psb_d_nest_mod.f90
) )
foreach(file IN LISTS PSB_base_source_files) foreach(file IN LISTS PSB_base_source_files)
list(APPEND base_source_files ${CMAKE_CURRENT_LIST_DIR}/${file}) list(APPEND base_source_files ${CMAKE_CURRENT_LIST_DIR}/${file})

@ -36,6 +36,7 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \
serial/psb_i2_base_vect_mod.o serial/psb_i2_vect_mod.o\ serial/psb_i2_base_vect_mod.o serial/psb_i2_vect_mod.o\
serial/psb_i_base_vect_mod.o serial/psb_i_vect_mod.o\ serial/psb_i_base_vect_mod.o serial/psb_i_vect_mod.o\
serial/psb_l_base_vect_mod.o serial/psb_l_vect_mod.o\ serial/psb_l_base_vect_mod.o serial/psb_l_vect_mod.o\
serial/psb_i2_base_vect_mod.o serial/psb_i2_vect_mod.o\
serial/psb_d_base_vect_mod.o serial/psb_d_vect_mod.o\ serial/psb_d_base_vect_mod.o serial/psb_d_vect_mod.o\
serial/psb_s_base_vect_mod.o serial/psb_s_vect_mod.o\ serial/psb_s_base_vect_mod.o serial/psb_s_vect_mod.o\
serial/psb_c_base_vect_mod.o serial/psb_c_vect_mod.o\ serial/psb_c_base_vect_mod.o serial/psb_c_vect_mod.o\
@ -82,7 +83,8 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \
serial/psb_s_base_mat_mod.o serial/psb_s_csr_mat_mod.o serial/psb_s_csc_mat_mod.o serial/psb_s_mat_mod.o \ serial/psb_s_base_mat_mod.o serial/psb_s_csr_mat_mod.o serial/psb_s_csc_mat_mod.o serial/psb_s_mat_mod.o \
serial/psb_d_base_mat_mod.o serial/psb_d_csr_mat_mod.o serial/psb_d_csc_mat_mod.o serial/psb_d_mat_mod.o \ serial/psb_d_base_mat_mod.o serial/psb_d_csr_mat_mod.o serial/psb_d_csc_mat_mod.o serial/psb_d_mat_mod.o \
serial/psb_c_base_mat_mod.o serial/psb_c_csr_mat_mod.o serial/psb_c_csc_mat_mod.o serial/psb_c_mat_mod.o \ serial/psb_c_base_mat_mod.o serial/psb_c_csr_mat_mod.o serial/psb_c_csc_mat_mod.o serial/psb_c_mat_mod.o \
serial/psb_z_base_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_z_csc_mat_mod.o serial/psb_z_mat_mod.o serial/psb_z_base_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_z_csc_mat_mod.o serial/psb_z_mat_mod.o \
serial/psb_d_nest_vect_mod.o serial/psb_d_nest_mat_mod.o
#\ #\
# serial/psb_ls_csr_mat_mod.o serial/psb_ld_csr_mat_mod.o serial/psb_lc_csr_mat_mod.o serial/psb_lz_csr_mat_mod.o # serial/psb_ls_csr_mat_mod.o serial/psb_ld_csr_mat_mod.o serial/psb_lc_csr_mat_mod.o serial/psb_lz_csr_mat_mod.o
#\ #\
@ -91,10 +93,12 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \
UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\ UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\
desc/psb_gen_block_map_mod.o desc/psb_list_map_mod.o desc/psb_repl_map_mod.o\ desc/psb_gen_block_map_mod.o desc/psb_list_map_mod.o desc/psb_repl_map_mod.o\
desc/psb_glist_map_mod.o desc/psb_hash_map_mod.o desc/psb_hashval.o \ desc/psb_glist_map_mod.o desc/psb_hash_map_mod.o desc/psb_hashval.o \
desc/psb_desc_mod.o auxil/psb_sort_mod.o \ desc/psb_desc_mod.o desc/psb_desc_nest_mod.o auxil/psb_sort_mod.o \
tools/psb_cd_tools_mod.o \ tools/psb_cd_tools_mod.o \
tools/psb_cd_nest_tools_mod.o \
tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \ tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \
tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o\ tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o\
tools/psb_d_nest_tools_mod.o \
tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \ tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \
tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \ tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \
tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\ tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\
@ -110,6 +114,7 @@ UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\
comm/psb_s_comm_mod.o comm/psb_d_comm_mod.o\ comm/psb_s_comm_mod.o comm/psb_d_comm_mod.o\
comm/psb_c_comm_mod.o comm/psb_z_comm_mod.o \ comm/psb_c_comm_mod.o comm/psb_z_comm_mod.o \
comm/psb_i2_comm_a_mod.o \ comm/psb_i2_comm_a_mod.o \
comm/psb_d_nest_comm_mod.o \
comm/psb_m_comm_a_mod.o comm/psb_e_comm_a_mod.o \ comm/psb_m_comm_a_mod.o comm/psb_e_comm_a_mod.o \
comm/psb_s_comm_a_mod.o comm/psb_d_comm_a_mod.o\ comm/psb_s_comm_a_mod.o comm/psb_d_comm_a_mod.o\
comm/psb_c_comm_a_mod.o comm/psb_z_comm_a_mod.o \ comm/psb_c_comm_a_mod.o comm/psb_z_comm_a_mod.o \
@ -123,12 +128,13 @@ UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\
psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o \ psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o \
psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o \ psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o \
psblas/psb_psblas_mod.o \ psblas/psb_psblas_mod.o \
psblas/psb_d_nest_psblas_mod.o \
psb_check_mod.o desc/psb_hash_mod.o psb_check_mod.o desc/psb_hash_mod.o
MODULES=$(BASIC_MODS) $(SERIAL_MODS) $(UTIL_MODS) MODULES=$(BASIC_MODS) $(SERIAL_MODS) $(UTIL_MODS)
OBJS = error.o psb_base_mod.o $(EXTRA_COBJS) cutil.o OBJS = error.o psb_base_mod.o psb_d_nest_mod.o $(EXTRA_COBJS) cutil.o
MODDIR=../../modules MODDIR=../../modules
INCDIR=../../include INCDIR=../../include
LIBDIR=../ LIBDIR=../
@ -413,12 +419,19 @@ comm/psi_s_comm_a_mod.o comm/psi_d_comm_a_mod.o \
comm/psi_c_comm_a_mod.o comm/psi_z_comm_a_mod.o: desc/psb_desc_mod.o comm/psi_c_comm_a_mod.o comm/psi_z_comm_a_mod.o: desc/psb_desc_mod.o
tools/psb_tools_mod.o: tools/psb_cd_tools_mod.o tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o\ tools/psb_tools_mod.o: tools/psb_cd_tools_mod.o tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o\
tools/psb_cd_nest_tools_mod.o tools/psb_d_nest_tools_mod.o \
tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \ tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \
tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \ tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \
tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \ tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \
tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\ tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\
tools/psb_c_tools_a_mod.o tools/psb_z_tools_a_mod.o tools/psb_c_tools_a_mod.o tools/psb_z_tools_a_mod.o
tools/psb_cd_nest_tools_mod.o: tools/psb_cd_nest_tools_mod.F90 tools/psb_cd_tools_mod.o desc/psb_desc_nest_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_cd_nest_tools_mod.F90 -o tools/psb_cd_nest_tools_mod.o
tools/psb_d_nest_tools_mod.o: tools/psb_d_nest_tools_mod.F90 tools/psb_d_tools_mod.o \
desc/psb_desc_nest_mod.o serial/psb_d_nest_mat_mod.o serial/psb_d_nest_vect_mod.o
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_d_nest_tools_mod.F90 -o tools/psb_d_nest_tools_mod.o
tools/psb_cd_tools_mod.o tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \ tools/psb_cd_tools_mod.o tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \
tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o \ tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o \
@ -441,6 +454,26 @@ psblas/psb_z_psblas_mod.o: serial/psb_z_vect_mod.o serial/psb_z_mat_mod.o
psblas/psb_psblas_mod.o: psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o psblas/psb_psblas_mod.o: psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o
psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o: serial/psb_mat_mod.o desc/psb_desc_mod.o psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o: serial/psb_mat_mod.o desc/psb_desc_mod.o
# --- nested mat/vec/desc dependencies ---
desc/psb_desc_nest_mod.o: desc/psb_desc_mod.o
serial/psb_d_nest_vect_mod.o: serial/psb_d_vect_mod.o desc/psb_desc_mod.o
serial/psb_d_nest_mat_mod.o: serial/psb_d_mat_mod.o
comm/psb_d_nest_comm_mod.o: \
desc/psb_desc_nest_mod.o \
serial/psb_d_nest_vect_mod.o \
comm/psb_d_comm_mod.o
psblas/psb_d_nest_psblas_mod.o: \
desc/psb_desc_nest_mod.o \
serial/psb_d_nest_vect_mod.o \
serial/psb_d_nest_mat_mod.o \
serial/psb_d_mat_mod.o \
psblas/psb_d_psblas_mod.o \
comm/psb_d_nest_comm_mod.o
psb_d_nest_mod.o: \
desc/psb_desc_nest_mod.o \
serial/psb_d_nest_vect_mod.o \
serial/psb_d_nest_mat_mod.o \
psblas/psb_d_nest_psblas_mod.o
psb_base_mod.o: $(MODULES) psb_base_mod.o: $(MODULES)

@ -0,0 +1,93 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_d_nest_comm_mod
!
! Communication operations for nested (block-structured) double precision
! real vectors.
!
! psb_d_nest_halo
! Halo exchange for all column blocks of a nested vector.
! Calls psb_halo(x(j), descs(1,j)) for each column block j.
! All descriptors descs(i,j) for fixed j are equivalent;
! Called once before block SpMM to populate ghost entries of x.
!
! psb_d_nest_ovrl
! Overlap update for all row blocks of a nested vector.
! Calls psb_ovrl(x(i), descs(i,i)) for each row block i using the
! diagonal descriptor.
! Called after operations that contribute to overlapping rows
! (e.g. FEM assembly).
!
module psb_d_nest_comm_mod
use psb_desc_nest_mod
use psb_d_nest_vect_mod
use psb_d_comm_mod, only : psb_halo, psb_ovrl
use psb_const_mod, only : psb_ipk_
implicit none
private
public :: psb_d_nest_halo, psb_d_nest_ovrl
contains
subroutine psb_d_nest_halo(xnest, descs, info, tran, mode, data)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: tran
integer(psb_ipk_), optional, intent(in) :: mode, data
integer(psb_ipk_) :: j
info = 0
do j = 1, xnest%nblocks
call psb_halo(xnest%vects(j), descs%descs(1,j), info, tran=tran, mode=mode, data=data)
if (info /= 0) return
end do
end subroutine psb_d_nest_halo
subroutine psb_d_nest_ovrl(xnest, descs, info, update, mode)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), optional, intent(in) :: update, mode
integer(psb_ipk_) :: i
info = 0
do i = 1, xnest%nblocks
call psb_ovrl(xnest%vects(i), descs%descs(i,i), info, update=update, mode=mode)
if (info /= 0) return
end do
end subroutine psb_d_nest_ovrl
end module psb_d_nest_comm_mod

@ -0,0 +1,141 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_desc_nest_mod
!
! Defines psb_desc_nest_type: a 2-D array of psb_desc_type objects,
! one per block matrix entry in an nrblocks x ncblocks block system.
!
!
module psb_desc_nest_mod
use psb_desc_mod
implicit none
type :: psb_desc_nest_type
integer(psb_ipk_) :: nrblocks = 0
integer(psb_ipk_) :: ncblocks = 0
type(psb_desc_type), allocatable :: descs(:,:)
contains
procedure :: get_nrblocks => psb_desc_nest_get_nrblocks
procedure :: get_ncblocks => psb_desc_nest_get_ncblocks
procedure :: get_desc => psb_desc_nest_get_desc
procedure :: is_valid => psb_desc_nest_is_valid
procedure :: sizeof => psb_desc_nest_sizeof
procedure :: free => psb_desc_nest_free
end type psb_desc_nest_type
contains
! get_nrblocks / get_ncblocks
function psb_desc_nest_get_nrblocks(d) result(nb)
class(psb_desc_nest_type), intent(in) :: d
integer(psb_ipk_) :: nb
nb = d%nrblocks
end function psb_desc_nest_get_nrblocks
function psb_desc_nest_get_ncblocks(d) result(nb)
class(psb_desc_nest_type), intent(in) :: d
integer(psb_ipk_) :: nb
nb = d%ncblocks
end function psb_desc_nest_get_ncblocks
! get_desc: copy descriptor (i,j) into the output argument
subroutine psb_desc_nest_get_desc(d, i, j, desc, info)
class(psb_desc_nest_type), intent(in) :: d
integer(psb_ipk_), intent(in) :: i, j
type(psb_desc_type), intent(out):: desc
integer(psb_ipk_), intent(out):: info
info = 0
if (i < 1 .or. i > d%nrblocks .or. j < 1 .or. j > d%ncblocks) then
info = -1
return
end if
desc = d%descs(i,j)
end subroutine psb_desc_nest_get_desc
! is_valid: true if all diagonal sub-descriptors are valid
function psb_desc_nest_is_valid(d) result(valid)
class(psb_desc_nest_type), intent(in) :: d
logical :: valid
integer(psb_ipk_) :: i
valid = (d%nrblocks >= 1) .and. (d%ncblocks >= 1) .and. allocated(d%descs)
if (valid) then
do i = 1, min(d%nrblocks, d%ncblocks)
if (.not. d%descs(i,i)%is_valid()) then
valid = .false.
return
end if
end do
end if
end function psb_desc_nest_is_valid
! sizeof: total memory (bytes) of all sub-descriptors
function psb_desc_nest_sizeof(d) result(s)
class(psb_desc_nest_type), intent(in) :: d
integer(psb_epk_) :: s
integer(psb_ipk_) :: i, j
s = 0_psb_epk_
if (allocated(d%descs)) then
do j = 1, d%ncblocks
do i = 1, d%nrblocks
s = s + d%descs(i,j)%sizeof()
end do
end do
end if
end function psb_desc_nest_sizeof
! free: release all sub-descriptors and reset
subroutine psb_desc_nest_free(d, info)
class(psb_desc_nest_type), intent(inout) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, linfo
info = 0
if (allocated(d%descs)) then
do j = 1, d%ncblocks
do i = 1, d%nrblocks
call d%descs(i,j)%free(linfo)
if (linfo /= 0 .and. info == 0) info = linfo
end do
end do
deallocate(d%descs, stat=linfo)
if (linfo /= 0 .and. info == 0) info = linfo
end if
d%nrblocks = 0
d%ncblocks = 0
end subroutine psb_desc_nest_free
end module psb_desc_nest_mod

@ -0,0 +1,46 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_d_nest_mod
!
! Umbrella module for the nested (block-structured) double precision
! real types. Users need only:
!
! use psb_d_nest_mod
!
! to access all three container types and their parallel operations.
!
module psb_d_nest_mod
use psb_desc_nest_mod
use psb_d_nest_vect_mod
use psb_d_nest_mat_mod
use psb_d_nest_psblas_mod
end module psb_d_nest_mod

@ -0,0 +1,627 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_d_nest_psblas_mod
!
! Parallel BLAS operations for the nested (block-structured) double
! precision real types.
!
! psb_d_nest_spmm
! Computes y = alpha * A_nest * x + beta * y (block SpMV).
! Three-phase algorithm:
! Phase 1 scale y upfront:
! if beta == 0: zero all y(i)
! elif beta /= 1: y(i) = beta * y(i) for each block i
! Phase 2 single halo exchange per column block:
! call psb_d_nest_halo(xnest, descs)
! Populates ghost entries of x(j) using descs(1,j) for each j.
! All descs(i,j) for fixed j share the same column space, so
! one exchange covers all block-rows.
! Phase 3 local SpMM accumulation (no further communication):
! For each present block (i,j):
! y(i) += alpha * A(i,j) * x(j)
! (psb_spmm called with doswap=.false. to skip internal halo)
!
! psb_d_nest_geaxpby
! Computes y(i) = alpha * x(i) + beta * y(i) for each block i.
!
! psb_d_nest_genrm2
! Computes ||x||_2 = sqrt( sum_i ||x(i)||_2^2 ) with a single
! global reduction.
!
! psb_d_nest_genrm2s
! Subroutine form of psb_d_nest_genrm2 (result via intent(out) argument).
!
! psb_d_nest_gedot
! Computes dot(x,y) = sum_i dot(x(i), y(i)) with a single global
! reduction.
!
! psb_d_nest_geamax
! Computes ||x||_inf = max_i ||x(i)||_inf with a single global reduction.
!
! psb_d_nest_geasum
! Computes ||x||_1 = sum_i ||x(i)||_1 with a single global reduction.
!
! psb_d_nest_gemin
! Computes min(x) = min_i min(x(i)) with a single global reduction.
!
! psb_d_nest_minquotient
! Computes min(x/y) = min_i min(x(i)/y(i)) with a single global reduction.
!
! psb_d_nest_gemlt
! Computes y(i) = x(i) .* y(i) element-wise for each block i.
!
! psb_d_nest_gediv
! Computes y(i) = x(i) ./ y(i) element-wise for each block i.
!
! psb_d_nest_geinv
! Computes y(i) = 1 / x(i) element-wise for each block i.
!
! psb_d_nest_geabs
! Computes y(i) = |x(i)| element-wise for each block i.
!
! psb_d_nest_geaddconst
! Computes z(i) = x(i) + b for each block i (b is a scalar).
!
! psb_d_nest_gecmp
! Computes z(i) = cmp(x(i), c) for each block i (c is a scalar).
!
! psb_d_nest_mask
! Applies mask operation to each block i; t is .true. iff all blocks
! return .true.
!
! psb_d_nest_upd_xyz
! Applies psb_upd_xyz(alpha,beta,gamma,delta, x(i),y(i),z(i)) for each block i.
!
! psb_d_nest_spsm
! Block-diagonal triangular solve: applies psb_spsm to each diagonal
! block (i,i) of tnest independently.
!
module psb_d_nest_psblas_mod
use psb_desc_nest_mod
use psb_d_nest_vect_mod
use psb_d_nest_mat_mod
use psb_d_mat_mod, only : psb_dspmat_type, psb_csmm
use psb_d_psblas_mod, only : psb_spmm, psb_geaxpby, psb_genrm2, psb_gedot, &
& psb_geamax, psb_geasum, psb_gemin, psb_minquotient, &
& psb_gemlt, psb_gediv, psb_geinv, psb_geabs, psb_geaddconst, &
& psb_gecmp, psb_mask, psb_upd_xyz, psb_spsm
use psb_d_nest_comm_mod, only : psb_d_nest_halo, psb_d_nest_ovrl
use psb_penv_mod, only : psb_sum, psb_max, psb_min, psb_info
use psb_const_mod, only : psb_dpk_, psb_ipk_, psb_epk_, psb_ctxt_type, dzero, done
implicit none
private
public :: psb_d_nest_spmm, psb_d_nest_geaxpby, &
psb_d_nest_genrm2, psb_d_nest_genrm2s, psb_d_nest_gedot, &
psb_d_nest_geamax, psb_d_nest_geasum, psb_d_nest_gemin, &
psb_d_nest_minquotient, &
psb_d_nest_gemlt, psb_d_nest_gediv, psb_d_nest_geinv, &
psb_d_nest_halo, psb_d_nest_ovrl, &
psb_d_nest_geabs, psb_d_nest_geaddconst, psb_d_nest_gecmp, &
psb_d_nest_mask, psb_d_nest_upd_xyz, psb_d_nest_spsm
contains
! y = alpha * A_nest * x + beta * y
subroutine psb_d_nest_spmm(alpha, anest, xnest, beta, ynest, descs, info, trans)
real(psb_dpk_), intent(in) :: alpha, beta
type(psb_d_nest_sparse_mat), intent(in) :: anest
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: ynest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: i, j
character :: trans_
info = 0
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (beta == dzero) then
do i = 1, anest%nrblocks
call ynest%vects(i)%zero()
end do
else if (beta /= done) then
do i = 1, anest%nrblocks
call ynest%vects(i)%scal(beta)
end do
end if
call psb_d_nest_halo(xnest, descs, info)
if (info /= 0) return
do i = 1, anest%nrblocks
do j = 1, anest%ncblocks
if (anest%has_block(i, j)) then
! y(i) += alpha * A(i,j) * x(j) (doswap=.false. skips internal halo, already done in Phase 2)
call psb_spmm(alpha, anest%mats(i,j), xnest%vects(j), &
& done, ynest%vects(i), descs%descs(i,j), info, trans=trans_, doswap=.false.)
if (info /= 0) return
end if
end do
end do
end subroutine psb_d_nest_spmm
! y(i) = alpha * x(i) + beta * y(i) for each block i
subroutine psb_d_nest_geaxpby(alpha, xnest, beta, ynest, descs, info)
real(psb_dpk_), intent(in) :: alpha, beta
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: ynest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
info = 0
do i = 1, xnest%nblocks
call psb_geaxpby(alpha, xnest%vects(i), beta, ynest%vects(i), &
& descs%descs(i,i), info)
if (info /= 0) return
end do
end subroutine psb_d_nest_geaxpby
! ||x||_2 = sqrt( sum_i ||x(i)||_2^2 )
! Uses a single global MPI_Allreduce across all blocks.
! global (optional, default .true.): if .false., skips MPI_Allreduce and returns
! the process-local partial norm; use when the caller manages the reduction itself.
function psb_d_nest_genrm2(xnest, descs, info, global) result(res)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
logical, optional, intent(in) :: global
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
real(psb_dpk_) :: loc_sum, blk_nrm
logical :: global_
type(psb_ctxt_type) :: ctxt
global_ = .true.
if (present(global)) global_ = global
info = 0
loc_sum = dzero
do i = 1, xnest%nblocks
! global=.false. returns local partial norm (sqrt of local partial sum)
blk_nrm = psb_genrm2(xnest%vects(i), descs%descs(i,i), info, global=.false.)
if (info /= 0) then
res = dzero
return
end if
loc_sum = loc_sum + blk_nrm * blk_nrm
end do
if (global_) then
ctxt = descs%descs(1,1)%get_context()
call psb_sum(ctxt, loc_sum)
end if
res = sqrt(loc_sum)
end function psb_d_nest_genrm2
! psb_d_nest_gedot
! dot(x, y) = sum_i dot(x(i), y(i))
! Uses a single global MPI_Allreduce across all blocks.
function psb_d_nest_gedot(xnest, ynest, descs, info, global) result(res)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: ynest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
logical, optional, intent(in) :: global
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
real(psb_dpk_) :: loc_sum, blk_dot
logical :: global_
type(psb_ctxt_type) :: ctxt
global_ = .true.
if (present(global)) global_ = global
info = 0
loc_sum = dzero
do i = 1, xnest%nblocks
blk_dot = psb_gedot(xnest%vects(i), ynest%vects(i), descs%descs(i,i), &
& info, global=.false.)
if (info /= 0) then
res = dzero
return
end if
loc_sum = loc_sum + blk_dot
end do
if (global_) then
ctxt = descs%descs(1,1)%get_context()
call psb_sum(ctxt, loc_sum)
end if
res = loc_sum
end function psb_d_nest_gedot
! Subroutine form: res = ||x||_2 (single global reduction)
subroutine psb_d_nest_genrm2s(res, xnest, descs, info, global)
real(psb_dpk_), intent(out) :: res
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
logical, optional, intent(in) :: global
res = psb_d_nest_genrm2(xnest, descs, info, global)
end subroutine psb_d_nest_genrm2s
! ||x||_inf = max_i ||x(i)||_inf (single global reduction)
function psb_d_nest_geamax(xnest, descs, info, global) result(res)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
logical, optional, intent(in) :: global
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
real(psb_dpk_) :: blk_val
logical :: global_
type(psb_ctxt_type) :: ctxt
global_ = .true.
if (present(global)) global_ = global
info = 0
res = dzero
do i = 1, xnest%nblocks
blk_val = psb_geamax(xnest%vects(i), descs%descs(i,i), info, global=.false.)
if (info /= 0) return
if (blk_val > res) res = blk_val
end do
if (global_) then
ctxt = descs%descs(1,1)%get_context()
call psb_max(ctxt, res)
end if
end function psb_d_nest_geamax
! ||x||_1 = sum_i ||x(i)||_1 (single global reduction)
! function psb_d_nest_geasum(xnest, descs, info, global) result(res)
! type(psb_d_nest_vect_type), intent(inout) :: xnest
! type(psb_desc_nest_type), intent(in) :: descs
! integer(psb_ipk_), intent(out) :: info
! logical, optional, intent(in) :: global
! real(psb_dpk_) :: res
! integer(psb_ipk_) :: i
! real(psb_dpk_) :: blk_val
! logical :: global_
! type(psb_ctxt_type) :: ctxt
! global_ = .true.
! if (present(global)) global_ = global
! info = 0
! res = dzero
! do i = 1, xnest%nblocks
! blk_val = psb_geasum(xnest%vects(i), descs%descs(i,i), info, global=.false.)
! if (info /= 0) return
! res = res + blk_val
! end do
! if (global_) then
! ctxt = descs%descs(1,1)%get_context()
! call psb_sum(ctxt, res)
! end if
! end function psb_d_nest_geasum
! ||x||_1 = sum_i ||x(i)||_1
function psb_d_nest_geasum(xnest, descs, info, global) result(res)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
logical, optional, intent(in) :: global
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
integer(psb_ipk_) :: nloc
real(psb_dpk_) :: blk_val
real(psb_dpk_), allocatable :: blk_vals(:)
logical :: global_
type(psb_ctxt_type) :: ctxt
global_ = .true.
if (present(global)) global_ = global
info = 0
res = dzero
do i = 1, xnest%nblocks
nloc = descs%descs(i,i)%get_local_rows()
blk_vals = xnest%vects(i)%get_vect(nloc)
if (size(blk_vals) > 0) then
blk_val = sum(abs(blk_vals))
else
blk_val = dzero
end if
res = res + blk_val
end do
if (global_) then
ctxt = descs%descs(1,1)%get_context()
call psb_sum(ctxt, res)
end if
end function psb_d_nest_geasum
! min(x) = min_i min(x(i))
function psb_d_nest_gemin(xnest, descs, info, global) result(res)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
logical, optional, intent(in) :: global
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
real(psb_dpk_) :: blk_val
logical :: global_
type(psb_ctxt_type) :: ctxt
global_ = .true.
if (present(global)) global_ = global
info = 0
res = huge(dzero)
do i = 1, xnest%nblocks
blk_val = psb_gemin(xnest%vects(i), descs%descs(i,i), info, global=.false.)
if (info /= 0) return
if (blk_val < res) res = blk_val
end do
if (global_) then
ctxt = descs%descs(1,1)%get_context()
call psb_min(ctxt, res)
end if
end function psb_d_nest_gemin
! min(x/y) = min_i min(x(i)/y(i)) (single global reduction)
function psb_d_nest_minquotient(xnest, ynest, descs, info, global) result(res)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: ynest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
logical, optional, intent(in) :: global
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
real(psb_dpk_) :: blk_val
logical :: global_
type(psb_ctxt_type) :: ctxt
global_ = .true.
if (present(global)) global_ = global
info = 0
res = huge(dzero)
do i = 1, xnest%nblocks
blk_val = psb_minquotient(xnest%vects(i), ynest%vects(i), &
& descs%descs(i,i), info, global=.false.)
if (info /= 0) return
if (blk_val < res) res = blk_val
end do
if (global_) then
ctxt = descs%descs(1,1)%get_context()
call psb_min(ctxt, res)
end if
end function psb_d_nest_minquotient
! y(i) = x(i) .* y(i) element-wise for each block i
subroutine psb_d_nest_gemlt(xnest, ynest, descs, info)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: ynest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
info = 0
do i = 1, xnest%nblocks
call psb_gemlt(xnest%vects(i), ynest%vects(i), descs%descs(i,i), info)
if (info /= 0) return
end do
end subroutine psb_d_nest_gemlt
! y(i) = x(i) ./ y(i) element-wise for each block i
subroutine psb_d_nest_gediv(xnest, ynest, descs, info)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: ynest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
info = 0
do i = 1, xnest%nblocks
call psb_gediv(xnest%vects(i), ynest%vects(i), descs%descs(i,i), info)
if (info /= 0) return
end do
end subroutine psb_d_nest_gediv
! y(i) = 1/x(i) element-wise for each block i
subroutine psb_d_nest_geinv(xnest, ynest, descs, info)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: ynest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
info = 0
do i = 1, xnest%nblocks
call psb_geinv(xnest%vects(i), ynest%vects(i), descs%descs(i,i), info)
if (info /= 0) return
end do
end subroutine psb_d_nest_geinv
! y(i) = |x(i)| element-wise for each block i
subroutine psb_d_nest_geabs(xnest, ynest, descs, info)
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: ynest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
info = 0
do i = 1, xnest%nblocks
call psb_geabs(xnest%vects(i), ynest%vects(i), descs%descs(i,i), info)
if (info /= 0) return
end do
end subroutine psb_d_nest_geabs
! z(i) = x(i) + b for each block i (b is a scalar)
subroutine psb_d_nest_geaddconst(xnest, b, znest, descs, info)
type(psb_d_nest_vect_type), intent(inout) :: xnest
real(psb_dpk_), intent(in) :: b
type(psb_d_nest_vect_type), intent(inout) :: znest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
info = 0
do i = 1, xnest%nblocks
call psb_geaddconst(xnest%vects(i), b, znest%vects(i), descs%descs(i,i), info)
if (info /= 0) return
end do
end subroutine psb_d_nest_geaddconst
! z(i) = cmp(x(i), c) for each block i (c is a scalar)
subroutine psb_d_nest_gecmp(xnest, c, znest, descs, info)
type(psb_d_nest_vect_type), intent(inout) :: xnest
real(psb_dpk_), intent(in) :: c
type(psb_d_nest_vect_type), intent(inout) :: znest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
info = 0
do i = 1, xnest%nblocks
call psb_gecmp(xnest%vects(i), c, znest%vects(i), descs%descs(i,i), info)
if (info /= 0) return
end do
end subroutine psb_d_nest_gecmp
subroutine psb_d_nest_mask(cnest, xnest, mnest, t, descs, info)
type(psb_d_nest_vect_type), intent(inout) :: cnest
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: mnest
logical, intent(out) :: t
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
logical :: t_dummy
real(psb_dpk_) :: mmax
info = 0
do i = 1, cnest%nblocks
! psb_mask(c, x, m, t, desc) semantics after the double-swap in
! d_vect_mask_v d_base_mask_v d_base_mask_a:
! first arg constraint-type selector in d_base_mask_a
! second arg value to test in d_base_mask_a
! So pass xnest (constraint types) first, cnest (values) second.
call psb_mask(xnest%vects(i), cnest%vects(i), mnest%vects(i), &
& t_dummy, descs%descs(i,i), info)
if (info /= 0) return
end do
! t = .true. iff no block has any violated entry (mnest=1).
mmax = psb_d_nest_geamax(mnest, descs, info)
t = (mmax < 0.5_psb_dpk_)
end subroutine psb_d_nest_mask
! Applies psb_upd_xyz(alpha,beta,gamma,delta,x(i),y(i),z(i))
! for each block i.
subroutine psb_d_nest_upd_xyz(alpha, beta, gamma, delta, xnest, ynest, znest, descs, info)
real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: ynest
type(psb_d_nest_vect_type), intent(inout) :: znest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
info = 0
do i = 1, xnest%nblocks
call psb_upd_xyz(alpha, beta, gamma, delta, &
& xnest%vects(i), ynest%vects(i), znest%vects(i), &
& descs%descs(i,i), info)
if (info /= 0) return
end do
end subroutine psb_d_nest_upd_xyz
! Block-diagonal triangular solve: applies psb_spsm to each
! diagonal block (i,i) of tnest independently.
! y(i) = alpha * T(i,i)^{-1} x(i) + beta * y(i)
subroutine psb_d_nest_spsm(alpha, tnest, xnest, beta, ynest, descs, info, &
& trans, scale, choice, work)
real(psb_dpk_), intent(in) :: alpha, beta
type(psb_d_nest_sparse_mat), intent(inout) :: tnest
type(psb_d_nest_vect_type), intent(inout) :: xnest
type(psb_d_nest_vect_type), intent(inout) :: ynest
type(psb_desc_nest_type), intent(in) :: descs
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans, scale
integer(psb_ipk_), optional, intent(in) :: choice
real(psb_dpk_), optional, intent(inout), target :: work(:)
integer(psb_ipk_) :: i
info = 0
do i = 1, tnest%nrblocks
if (.not. tnest%has_block(i, i)) then
! No diagonal block: treat as identity => y(i) = alpha*x(i) + beta*y(i)
call psb_geaxpby(alpha, xnest%vects(i), beta, ynest%vects(i), &
& descs%descs(i,i), info)
else
call psb_spsm(alpha, tnest%mats(i,i), xnest%vects(i), beta, ynest%vects(i), &
& descs%descs(i,i), info, trans=trans, scale=scale, &
& choice=choice, work=work)
end if
if (info /= 0) return
end do
end subroutine psb_d_nest_spsm
end module psb_d_nest_psblas_mod

@ -0,0 +1,149 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific without permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_d_nest_mat_mod
!
! Defines psb_d_nest_sparse_mat: a block-structured distributed sparse
! matrix for double precision real arithmetic.
!
! The matrix is stored as a 2-D array of psb_dspmat_type sub-matrices.
! A companion logical array blk_present(i,j) flags which blocks are
! non-null (absent blocks contribute zero to any product).
!
! Descriptor convention (current nested design)
! ---------------------------------------------
! Each matrix block (i,j) is associated with descs(i,j) from the
! corresponding psb_desc_nest_type. Nested tools (psb_spall_nest,
! psb_spins_nest, psb_spasb_nest, psb_spmm) consistently pass
! descs(i,j) together with mats(i,j).
!
! A block may be structurally absent (NULL/zero): this is represented by
! blk_present(i,j)=.false. and mats(i,j) left unbuilt. In that case the
! block contributes zero and is skipped by nested kernels.
!
! Descriptor storage is distinct from matrix presence: descriptors are
! typically defined for all block positions in descs(:,:), while actual
! matrix blocks may be present only on a subset.
!
! Reference examples in test/pdegen:
! * psb_d_pde_nest.full.F90 (A(2,2) left NULL, blk_present(2,2)=.false.)
! * psb_d_nest_tools.F90 and psb_d_pde_nest_full_tools.F90
! (2-D desc_nest%descs(i,j) used in nested allocation/assembly).
!
module psb_d_nest_mat_mod
use psb_d_mat_mod
implicit none
type :: psb_d_nest_sparse_mat
integer(psb_ipk_) :: nrblocks = 0
integer(psb_ipk_) :: ncblocks = 0
type(psb_dspmat_type), allocatable :: mats(:,:)
logical, allocatable :: blk_present(:,:)
contains
procedure :: get_nrblocks => psb_d_nest_mat_get_nrb
procedure :: get_ncblocks => psb_d_nest_mat_get_ncb
procedure :: has_block => psb_d_nest_mat_has_block
procedure :: sizeof => psb_d_nest_mat_sizeof
procedure :: free => psb_d_nest_mat_free
end type psb_d_nest_sparse_mat
contains
! get_nrblocks / get_ncblocks
function psb_d_nest_mat_get_nrb(a) result(n)
class(psb_d_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: n
n = a%nrblocks
end function psb_d_nest_mat_get_nrb
function psb_d_nest_mat_get_ncb(a) result(n)
class(psb_d_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: n
n = a%ncblocks
end function psb_d_nest_mat_get_ncb
! has_block: return .true. if block (i,j) is non-null
function psb_d_nest_mat_has_block(a, i, j) result(hp)
class(psb_d_nest_sparse_mat), intent(in) :: a
integer(psb_ipk_), intent(in) :: i, j
logical :: hp
hp = .false.
if (i < 1 .or. i > a%nrblocks) return
if (j < 1 .or. j > a%ncblocks) return
if (.not. allocated(a%blk_present)) return
hp = a%blk_present(i, j)
end function psb_d_nest_mat_has_block
! sizeof: total storage across all allocated sub-matrices
function psb_d_nest_mat_sizeof(a) result(s)
class(psb_d_nest_sparse_mat), intent(in) :: a
integer(psb_epk_) :: s
integer(psb_ipk_) :: i, j
s = 0_psb_epk_
if (allocated(a%mats)) then
do j = 1, a%ncblocks
do i = 1, a%nrblocks
if (a%blk_present(i, j)) s = s + a%mats(i, j)%sizeof()
end do
end do
end if
end function psb_d_nest_mat_sizeof
! free: release all sub-matrices
subroutine psb_d_nest_mat_free(a, info)
class(psb_d_nest_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, linfo
info = 0
if (allocated(a%mats)) then
do j = 1, a%ncblocks
do i = 1, a%nrblocks
if (a%blk_present(i, j)) then
call a%mats(i, j)%free()
end if
end do
end do
deallocate(a%mats, stat=linfo)
if (linfo /= 0 .and. info == 0) info = linfo
end if
if (allocated(a%blk_present)) then
deallocate(a%blk_present, stat=linfo)
if (linfo /= 0 .and. info == 0) info = linfo
end if
a%nrblocks = 0
a%ncblocks = 0
end subroutine psb_d_nest_mat_free
end module psb_d_nest_mat_mod

@ -0,0 +1,109 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! module: psb_d_nest_vect_mod
!
! Defines psb_d_nest_vect_type: a block-structured distributed dense
! vector for double precision real arithmetic. Each sub-vector is a
! standard psb_d_vect_type assembled under its own descriptor.
!
! Parallel BLAS operations (nrm2, dot, axpby) are exposed as module
! subroutines/functions in psb_d_nest_psblas_mod so that they can
! exploit a single global reduction per call.
!
module psb_d_nest_vect_mod
use psb_d_vect_mod
use psb_desc_mod
implicit none
type :: psb_d_nest_vect_type
integer(psb_ipk_) :: nblocks = 0
type(psb_d_vect_type), allocatable :: vects(:)
contains
procedure :: get_nblocks => psb_d_nest_vect_get_nblocks
procedure :: zero => psb_d_nest_vect_zero
procedure :: sizeof => psb_d_nest_vect_sizeof
procedure :: free => psb_d_nest_vect_free
end type psb_d_nest_vect_type
contains
! get_nblocks
function psb_d_nest_vect_get_nblocks(x) result(nb)
class(psb_d_nest_vect_type), intent(in) :: x
integer(psb_ipk_) :: nb
nb = x%nblocks
end function psb_d_nest_vect_get_nblocks
! zero: set all sub-vectors to zero (local, no halo zeroing needed)
subroutine psb_d_nest_vect_zero(x)
class(psb_d_nest_vect_type), intent(inout) :: x
integer(psb_ipk_) :: i
if (allocated(x%vects)) then
do i = 1, x%nblocks
call x%vects(i)%zero()
end do
end if
end subroutine psb_d_nest_vect_zero
! sizeof: total bytes across all sub-vectors
function psb_d_nest_vect_sizeof(x) result(s)
class(psb_d_nest_vect_type), intent(in) :: x
integer(psb_epk_) :: s
integer(psb_ipk_) :: i
s = 0_psb_epk_
if (allocated(x%vects)) then
do i = 1, x%nblocks
s = s + x%vects(i)%sizeof()
end do
end if
end function psb_d_nest_vect_sizeof
! free: release all sub-vectors
subroutine psb_d_nest_vect_free(x, info)
class(psb_d_nest_vect_type), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, linfo
info = 0
if (allocated(x%vects)) then
do i = 1, x%nblocks
call x%vects(i)%free(linfo)
if (linfo /= 0 .and. info == 0) info = linfo
end do
deallocate(x%vects, stat=linfo)
if (linfo /= 0 .and. info == 0) info = linfo
end if
x%nblocks = 0
end subroutine psb_d_nest_vect_free
end module psb_d_nest_vect_mod

@ -0,0 +1,457 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! Nested-specific assembly wrappers for PSBLAS3 descriptor routines
!
module psb_cd_nest_tools_mod
use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_success_, psb_err_alloc_dealloc_, &
psb_err_invalid_input_, psb_err_no_optional_arg_, psb_err_from_subroutine_, &
psb_ctxt_type
use psb_error_mod, only : psb_errpush
use psb_cd_tools_mod, only : psb_cdall, psb_cdasb, psb_cdins, psb_cdcpy, psb_cdprt
use psb_desc_nest_mod, only : psb_desc_nest_type
implicit none
private
public :: psb_cdall_nest, psb_cdins_nest, psb_cdins_nest_rc, &
psb_cdasb_nest, psb_cdfree_nest, psb_cdcpy_nest, psb_cdprt_nest
! Column-only form: (blk_j, nz, ja, desc_nest, info [,mask, lidx])
! Row+column form: (blk_i, blk_j, nz, ia, ja, desc_nest, info)
interface psb_cdins_nest
#if defined(PSB_IPK4) && defined(PSB_LPK8)
module procedure psb_cdins_nest_c
module procedure psb_cdins_nest_rc_sub
#endif
module procedure psb_lcdins_nest_c
module procedure psb_lcdins_nest_rc
end interface
! Row+column form: (blk_i, blk_j, nz, ia, ja, desc_nest, info)
interface psb_cdins_nest_rc
#if defined(PSB_IPK4) && defined(PSB_LPK8)
module procedure psb_cdins_nest_rc_sub
#endif
module procedure psb_lcdins_nest_rc
end interface
contains
! Allocates the nested descriptor structure and creates block
! descriptors. The first block of each row uses psb_cdall with
! the given local row count; subsequent blocks in the same row
! are clones of the first block (same row distribution).
!
! Arguments:
! ctxt - PSBLAS context
! desc_nest - nested descriptor (output)
! info - error code (output)
! nrblocks - number of block rows (optional, default 2)
! ncblocks - number of block columns (optional, default 2)
! nl - local row count per process (required for first blocks)
subroutine psb_cdall_nest(ctxt, desc_nest, info, nrblocks, ncblocks, nl)
type(psb_ctxt_type), intent(in) :: ctxt
type(psb_desc_nest_type), intent(out) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nrblocks, ncblocks, nl
integer(psb_ipk_) :: i, j, nr, nc, nl_
character(len=20) :: name
info = psb_success_
name = 'psb_cdall_nest'
! Set default dimensions
nr = 2
nc = 2
if (present(nrblocks)) nr = nrblocks
if (present(ncblocks)) nc = ncblocks
if (.not. present(nl)) then
info = psb_err_no_optional_arg_
call psb_errpush(info, name, a_err='nl (local row count)')
return
end if
nl_ = nl
! Allocate nested descriptor structure
desc_nest%nrblocks = nr
desc_nest%ncblocks = nc
allocate(desc_nest%descs(nr, nc), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
! Build descriptors: each block gets its own independent psb_cdall.
! Cloning a build-state descriptor shares its base_desc pointer; when
! psb_cdasb_nest assembles both the original and the clone the shared
! base_desc is rebuilt twice, corrupting the global-to-local mapping of
! every block in that row. Independent allocations avoid this entirely.
do i = 1, nr
do j = 1, nc
call psb_cdall(ctxt, desc_nest%descs(i, j), info, nl=nl_)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name)
return
end if
end do
end do
end subroutine psb_cdall_nest
#if defined(PSB_IPK4) && defined(PSB_LPK8)
! psb_cdins_nest_rc_sub: row+col form, ipk_ nz only when ipk_ /= lpk_
subroutine psb_cdins_nest_rc_sub(blk_i, blk_j, nz, ia, ja, desc_nest, info)
integer(psb_ipk_), intent(in) :: blk_i, blk_j, nz
integer(psb_lpk_), intent(in) :: ia(:), ja(:)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
character(len=20) :: name
info = psb_success_
name = 'psb_cdins_nest'
if (nz == 0) return
if (blk_i < 1 .or. blk_i > desc_nest%nrblocks .or. &
blk_j < 1 .or. blk_j > desc_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block indices')
return
end if
call psb_cdins(nz, ia, ja, desc_nest%descs(blk_i, blk_j), info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins')
end subroutine psb_cdins_nest_rc_sub
! psb_cdins_nest_c: col-only form, ipk_ nz only when ipk_ /= lpk_
subroutine psb_cdins_nest_c(blk_j, nz, ja, desc_nest, info, mask, lidx)
integer(psb_ipk_), intent(in) :: blk_j, nz
integer(psb_lpk_), intent(in) :: ja(:)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional, target :: mask(:)
integer(psb_ipk_), intent(in), optional :: lidx(:)
integer(psb_ipk_) :: i, linfo
character(len=20) :: name
info = psb_success_
name = 'psb_cdins_nest'
if (nz == 0) return
if (blk_j < 1 .or. blk_j > desc_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block column index')
return
end if
do i = 1, desc_nest%nrblocks
linfo = psb_success_
if (present(mask)) then
if (present(lidx)) then
call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, mask=mask, lidx=lidx)
else
call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, mask=mask)
end if
else
if (present(lidx)) then
call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, lidx=lidx)
else
call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo)
end if
end if
if (linfo /= psb_success_ .and. info == psb_success_) then
info = linfo
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins')
end if
end do
end subroutine psb_cdins_nest_c
#endif
! psb_lcdins_nest_rc: row+col form, lpk_ nz
!
! When entries in block (blk_i, blk_j) reference columns owned by other
! processes, use the col-only form afterwards to broadcast those column
! indices across all row-blocks in block-col blk_j.
subroutine psb_lcdins_nest_rc(blk_i, blk_j, nz, ia, ja, desc_nest, info)
integer(psb_ipk_), intent(in) :: blk_i, blk_j
integer(psb_lpk_), intent(in) :: nz, ia(:), ja(:)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
character(len=20) :: name
info = psb_success_
name = 'psb_cdins_nest'
if (nz == 0) return
if (blk_i < 1 .or. blk_i > desc_nest%nrblocks .or. &
blk_j < 1 .or. blk_j > desc_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block indices')
return
end if
call psb_cdins(nz, ia, ja, desc_nest%descs(blk_i, blk_j), info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins')
end subroutine psb_lcdins_nest_rc
! psb_lcdins_nest_c: col-only form, lpk_ nz
!
! Registers nz global column indices ja into the descriptor for
! block column blk_j across all row-blocks (descs(i, blk_j) for
! i = 1..nrblocks). mask and lidx are forwarded to psb_cdins.
subroutine psb_lcdins_nest_c(blk_j, nz, ja, desc_nest, info, mask, lidx)
integer(psb_ipk_), intent(in) :: blk_j
integer(psb_lpk_), intent(in) :: nz, ja(:)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional, target :: mask(:)
integer(psb_ipk_), intent(in), optional :: lidx(:)
integer(psb_ipk_) :: i, linfo
character(len=20) :: name
info = psb_success_
name = 'psb_cdins_nest'
if (nz == 0) return
if (blk_j < 1 .or. blk_j > desc_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block column index')
return
end if
do i = 1, desc_nest%nrblocks
linfo = psb_success_
if (present(mask)) then
if (present(lidx)) then
call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, mask=mask, lidx=lidx)
else
call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, mask=mask)
end if
else
if (present(lidx)) then
call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, lidx=lidx)
else
call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo)
end if
end if
if (linfo /= psb_success_ .and. info == psb_success_) then
info = linfo
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins')
end if
end do
end subroutine psb_lcdins_nest_c
! psb_cdasb_nest: Finalize all nested descriptors
!
! Calls psb_cdasb on all block descriptors in the nested structure.
! This must be called after all psb_cdins_nest calls and
! before psb_spasb_nest.
!
! Arguments:
! desc_nest - nested descriptor (input/output)
! info - error code (output)
subroutine psb_cdasb_nest(desc_nest, info)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j
character(len=20) :: name
info = psb_success_
name = 'psb_cdasb_nest'
do i = 1, desc_nest%nrblocks
do j = 1, desc_nest%ncblocks
call psb_cdasb(desc_nest%descs(i, j), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdasb')
return
end if
end do
end do
end subroutine psb_cdasb_nest
! psb_cdfree_nest: Free all nested descriptors
!
! Calls psb_cdfree on every block descriptor in the nested
! structure, then deallocates the descriptor array and resets
! nrblocks/ncblocks to 0. Mirrors what psb_cdfree does for a
! single psb_desc_type.
!
! Arguments:
! desc_nest - nested descriptor (input/output)
! info - error code (output)
!
subroutine psb_cdfree_nest(desc_nest, info)
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
character(len=20) :: name
info = psb_success_
name = 'psb_cdfree_nest'
call desc_nest%free(info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_desc_nest_free')
end subroutine psb_cdfree_nest
! psb_cdcpy_nest: Deep copy (clone) a nested descriptor
!
! Allocates desc_out and clones each block descriptor from desc_in
! using psb_cdcpy, preserving the full row/column block structure.
!
! Arguments:
! desc_in - source nested descriptor (inout clone may need to read internal state)
! desc_out - destination nested descriptor (output)
! info - error code (output)
subroutine psb_cdcpy_nest(desc_in, desc_out, info)
type(psb_desc_nest_type), intent(inout) :: desc_in
type(psb_desc_nest_type), intent(out) :: desc_out
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j
character(len=20) :: name
info = psb_success_
name = 'psb_cdcpy_nest'
desc_out%nrblocks = desc_in%nrblocks
desc_out%ncblocks = desc_in%ncblocks
allocate(desc_out%descs(desc_in%nrblocks, desc_in%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
do i = 1, desc_in%nrblocks
do j = 1, desc_in%ncblocks
call psb_cdcpy(desc_in%descs(i, j), desc_out%descs(i, j), info)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdcpy')
return
end if
end do
end do
end subroutine psb_cdcpy_nest
! psb_cdprt_nest: Print all block descriptors (debugging)
!
! Loops over all (i,j) block descriptors in the nested structure
! and calls psb_cdprt on each, prefixing the output with the block
! index. All optional arguments are forwarded unchanged.
!
! Arguments:
! iout - output unit
! desc_nest - nested descriptor (input)
! glob - passed to psb_cdprt (optional)
! short - passed to psb_cdprt (optional)
! verbosity - passed to psb_cdprt (optional)
subroutine psb_cdprt_nest(iout, desc_nest, glob, short, verbosity)
integer(psb_ipk_), intent(in) :: iout
type(psb_desc_nest_type), intent(in) :: desc_nest
logical, intent(in), optional :: glob, short
integer(psb_ipk_), intent(in), optional :: verbosity
integer(psb_ipk_) :: i, j
do i = 1, desc_nest%nrblocks
do j = 1, desc_nest%ncblocks
write(iout, '(a,i0,a,i0,a)') 'Block (', i, ',', j, '):'
if (present(glob)) then
if (present(short)) then
if (present(verbosity)) then
call psb_cdprt(iout, desc_nest%descs(i,j), glob=glob, short=short, verbosity=verbosity)
else
call psb_cdprt(iout, desc_nest%descs(i,j), glob=glob, short=short)
end if
else
if (present(verbosity)) then
call psb_cdprt(iout, desc_nest%descs(i,j), glob=glob, verbosity=verbosity)
else
call psb_cdprt(iout, desc_nest%descs(i,j), glob=glob)
end if
end if
else
if (present(short)) then
if (present(verbosity)) then
call psb_cdprt(iout, desc_nest%descs(i,j), short=short, verbosity=verbosity)
else
call psb_cdprt(iout, desc_nest%descs(i,j), short=short)
end if
else
if (present(verbosity)) then
call psb_cdprt(iout, desc_nest%descs(i,j), verbosity=verbosity)
else
call psb_cdprt(iout, desc_nest%descs(i,j))
end if
end if
end if
end do
end do
end subroutine psb_cdprt_nest
end module psb_cd_nest_tools_mod

@ -0,0 +1,433 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! Nested-specific assembly wrappers for PSBLAS3 double precision matrix and vector routines
!
module psb_d_nest_tools_mod
use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_dpk_, psb_success_, psb_err_alloc_dealloc_, &
psb_err_invalid_input_, psb_err_from_subroutine_, &
psb_dupl_add_, psb_dupl_ovwrt_, psb_dupl_err_, psb_ctxt_type
use psb_error_mod, only : psb_errpush
use psb_d_tools_mod, only : psb_spall, psb_spins, psb_spasb, psb_spfree, psb_sprn, &
psb_geall, psb_geins, psb_geasb, psb_gefree
use psb_desc_nest_mod, only : psb_desc_nest_type
use psb_d_nest_mat_mod, only : psb_d_nest_sparse_mat
use psb_d_nest_vect_mod, only : psb_d_nest_vect_type
implicit none
private
public :: psb_spall_nest, psb_spins_nest, psb_spasb_nest, psb_spfree_nest, psb_sprn_nest, &
psb_geall_nest, psb_geins_nest, psb_geasb_nest, psb_gefree_nest
contains
! Allocates all (nrblocks x ncblocks) sparse matrix blocks
! and marks all as present. psb_spins_nest lazy-allocates individual
! blocks on first insertion; call psb_spall_nest instead when the
! full block structure is known up front.
subroutine psb_spall_nest(a_nest, desc_nest, info, nnz)
type(psb_d_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nnz
integer(psb_ipk_) :: i, j, linfo
character(len=20) :: name
info = psb_success_
name = 'psb_spall_nest'
a_nest%nrblocks = desc_nest%nrblocks
a_nest%ncblocks = desc_nest%ncblocks
if (.not. allocated(a_nest%mats)) then
allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
end if
if (.not. allocated(a_nest%blk_present)) then
allocate(a_nest%blk_present(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
a_nest%blk_present = .false.
end if
do i = 1, a_nest%nrblocks
do j = 1, a_nest%ncblocks
linfo = psb_success_
if (present(nnz)) then
call psb_spall(a_nest%mats(i, j), desc_nest%descs(i, j), linfo, nnz=nnz)
else
call psb_spall(a_nest%mats(i, j), desc_nest%descs(i, j), linfo)
end if
if (linfo /= psb_success_) then
info = linfo
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall')
return
end if
a_nest%blk_present(i, j) = .true.
end do
end do
end subroutine psb_spall_nest
! Inserts nz entries into block (blk_i, blk_j) of the nested matrix.
! The block is lazy-allocated on first insertion if psb_spall_nest
! was not called first.
subroutine psb_spins_nest(blk_i, blk_j, nz, ia, ja, val, a_nest, desc_nest, info)
integer(psb_ipk_), intent(in) :: blk_i, blk_j, nz
integer(psb_lpk_), intent(in) :: ia(:), ja(:)
real(psb_dpk_), intent(in) :: val(:)
type(psb_d_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnz_est
character(len=20) :: name
info = psb_success_
name = 'psb_spins_nest'
if (nz == 0) return
if (blk_i < 1 .or. blk_i > a_nest%nrblocks .or. &
blk_j < 1 .or. blk_j > a_nest%ncblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block indices')
return
end if
if (.not. allocated(a_nest%mats)) then
allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
allocate(a_nest%blk_present(a_nest%nrblocks, a_nest%ncblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
a_nest%blk_present = .false.
end if
if (.not. a_nest%blk_present(blk_i, blk_j)) then
! Estimate nnz: use nz + 50% buffer for future insertions
nnz_est = max(nz, 10) + nz / 2
call psb_spall(a_nest%mats(blk_i, blk_j), &
desc_nest%descs(blk_i, blk_j), info, nnz=nnz_est)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall')
return
end if
a_nest%blk_present(blk_i, blk_j) = .true.
end if
call psb_spins(nz, ia, ja, val, a_nest%mats(blk_i, blk_j), &
desc_nest%descs(blk_i, blk_j), info)
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spins')
end subroutine psb_spins_nest
! Calls psb_spasb on all present block matrices.
! Must be called after psb_cdasb_nest.
subroutine psb_spasb_nest(a_nest, desc_nest, info, dupl)
type(psb_d_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(inout) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: dupl
integer(psb_ipk_) :: i, j, dupl_, linfo
character(len=20) :: name
info = psb_success_
name = 'psb_spasb_nest'
dupl_ = psb_dupl_add_
if (present(dupl)) dupl_ = dupl
do i = 1, a_nest%nrblocks
do j = 1, a_nest%ncblocks
if (a_nest%blk_present(i, j)) then
linfo = psb_success_
if (dupl_ == psb_dupl_add_) then
call psb_spasb(a_nest%mats(i, j), desc_nest%descs(i, j), linfo, &
dupl=psb_dupl_add_)
else if (dupl_ == psb_dupl_ovwrt_) then
call psb_spasb(a_nest%mats(i, j), desc_nest%descs(i, j), linfo, &
dupl=psb_dupl_ovwrt_)
else if (dupl_ == psb_dupl_err_) then
call psb_spasb(a_nest%mats(i, j), desc_nest%descs(i, j), linfo, &
dupl=psb_dupl_err_)
else
call psb_spasb(a_nest%mats(i, j), desc_nest%descs(i, j), linfo)
end if
if (linfo /= psb_success_) then
info = linfo
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spasb')
return
end if
end if
end do
end do
end subroutine psb_spasb_nest
! Calls psb_spfree on every present block, then deallocates the
! mats and blk_present arrays and resets nrblocks/ncblocks to 0.
subroutine psb_spfree_nest(a_nest, desc_nest, info)
type(psb_d_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, linfo
character(len=20) :: name
info = psb_success_
name = 'psb_spfree_nest'
if (allocated(a_nest%mats)) then
do i = 1, a_nest%nrblocks
do j = 1, a_nest%ncblocks
if (allocated(a_nest%blk_present)) then
if (a_nest%blk_present(i, j)) then
linfo = psb_success_
call psb_spfree(a_nest%mats(i, j), desc_nest%descs(i, j), linfo)
if (linfo /= psb_success_ .and. info == psb_success_) then
info = linfo
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spfree')
end if
end if
end if
end do
end do
deallocate(a_nest%mats, stat=linfo)
if (linfo /= 0 .and. info == psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
end if
end if
if (allocated(a_nest%blk_present)) then
deallocate(a_nest%blk_present, stat=linfo)
if (linfo /= 0 .and. info == psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
end if
end if
a_nest%nrblocks = 0
a_nest%ncblocks = 0
end subroutine psb_spfree_nest
! Calls psb_sprn on every present block matrix, resetting it to
! the build state while preserving the sparsity pattern.
subroutine psb_sprn_nest(a_nest, desc_nest, info, clear)
type(psb_d_nest_sparse_mat), intent(inout) :: a_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional :: clear
integer(psb_ipk_) :: i, j, linfo
character(len=20) :: name
info = psb_success_
name = 'psb_sprn_nest'
if (.not. allocated(a_nest%mats) .or. .not. allocated(a_nest%blk_present)) return
do i = 1, a_nest%nrblocks
do j = 1, a_nest%ncblocks
if (a_nest%blk_present(i, j)) then
linfo = psb_success_
if (present(clear)) then
call psb_sprn(a_nest%mats(i, j), desc_nest%descs(i, j), linfo, clear=clear)
else
call psb_sprn(a_nest%mats(i, j), desc_nest%descs(i, j), linfo)
end if
if (linfo /= psb_success_ .and. info == psb_success_) then
info = linfo
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_sprn')
end if
end if
end do
end do
end subroutine psb_sprn_nest
! Allocates one sub-vector per block-row, using descs(i, 1) as
! the row descriptor for block i. Must be called before psb_geins_nest.
subroutine psb_geall_nest(x_nest, desc_nest, info)
type(psb_d_nest_vect_type), intent(out) :: x_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, linfo
character(len=20) :: name
info = psb_success_
name = 'psb_geall_nest'
x_nest%nblocks = desc_nest%nrblocks
allocate(x_nest%vects(x_nest%nblocks), stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
return
end if
do i = 1, x_nest%nblocks
linfo = psb_success_
call psb_geall(x_nest%vects(i), desc_nest%descs(i, 1), linfo)
if (linfo /= psb_success_) then
info = linfo
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_geall')
return
end if
end do
end subroutine psb_geall_nest
! Inserts m entries into block blk_i of the nested vector.
subroutine psb_geins_nest(blk_i, m, irw, val, x_nest, desc_nest, info, local)
integer(psb_ipk_), intent(in) :: blk_i, m
integer(psb_lpk_), intent(in) :: irw(:)
real(psb_dpk_), intent(in) :: val(:)
type(psb_d_nest_vect_type), intent(inout) :: x_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
logical, intent(in), optional :: local
character(len=20) :: name
info = psb_success_
name = 'psb_geins_nest'
if (m == 0) return
if (blk_i < 1 .or. blk_i > x_nest%nblocks) then
info = psb_err_invalid_input_
call psb_errpush(info, name, a_err='invalid block index')
return
end if
if (present(local)) then
call psb_geins(m, irw, val, x_nest%vects(blk_i), desc_nest%descs(blk_i, 1), info, &
local=local)
else
call psb_geins(m, irw, val, x_nest%vects(blk_i), desc_nest%descs(blk_i, 1), info)
end if
if (info /= psb_success_) &
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_geins')
end subroutine psb_geins_nest
! Calls psb_geasb on every sub-vector.
! Must be called after psb_cdasb_nest and all psb_geins_nest calls.
subroutine psb_geasb_nest(x_nest, desc_nest, info)
type(psb_d_nest_vect_type), intent(inout) :: x_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, linfo
character(len=20) :: name
info = psb_success_
name = 'psb_geasb_nest'
do i = 1, x_nest%nblocks
linfo = psb_success_
call psb_geasb(x_nest%vects(i), desc_nest%descs(i, 1), linfo)
if (linfo /= psb_success_ .and. info == psb_success_) then
info = linfo
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_geasb')
end if
end do
end subroutine psb_geasb_nest
! Calls psb_gefree on every sub-vector, then deallocates the
! vects array and resets nblocks to 0.
subroutine psb_gefree_nest(x_nest, desc_nest, info)
type(psb_d_nest_vect_type), intent(inout) :: x_nest
type(psb_desc_nest_type), intent(in) :: desc_nest
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, linfo
character(len=20) :: name
info = psb_success_
name = 'psb_gefree_nest'
if (allocated(x_nest%vects)) then
do i = 1, x_nest%nblocks
linfo = psb_success_
call psb_gefree(x_nest%vects(i), desc_nest%descs(i, 1), linfo)
if (linfo /= psb_success_ .and. info == psb_success_) then
info = linfo
call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_gefree')
end if
end do
deallocate(x_nest%vects, stat=linfo)
if (linfo /= 0 .and. info == psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
end if
end if
x_nest%nblocks = 0
end subroutine psb_gefree_nest
end module psb_d_nest_tools_mod

@ -44,4 +44,6 @@ module psb_tools_mod
use psb_d_tools_mod use psb_d_tools_mod
use psb_c_tools_mod use psb_c_tools_mod
use psb_z_tools_mod use psb_z_tools_mod
use psb_cd_nest_tools_mod
use psb_d_nest_tools_mod
end module psb_tools_mod end module psb_tools_mod

@ -15,6 +15,10 @@ set(LIBDIR "${INSTALLDIR}/lib")
# Find the psblas package # Find the psblas package
find_package(psblas REQUIRED PATHS ${INSTALLDIR}) find_package(psblas REQUIRED PATHS ${INSTALLDIR})
set(EXTLIBDIR "/home/fdurastante/psctoolkit/install/lib")
link_directories(${EXTLIBDIR})
# Include directories for the Fortran compiler # Include directories for the Fortran compiler
include_directories(${INCDIR} ${MODDIR}) include_directories(${INCDIR} ${MODDIR})
@ -30,6 +34,9 @@ set(SOURCES_S_PDE3D psb_s_pde3d.F90)
set(SOURCES_D_PDE2D psb_d_pde2d.F90) set(SOURCES_D_PDE2D psb_d_pde2d.F90)
set(SOURCES_S_PDE2D psb_s_pde2d.F90) set(SOURCES_S_PDE2D psb_s_pde2d.F90)
set(SOURCES_D_PDE_NEST_PSBLAS psb_d_pde_nest_psblas.F90)
set(SOURCES_D_PDE_NEST_KRYLOV psb_d_pde_nest_krylov.F90)
# Create executables # Create executables
add_executable(psb_d_pde3d ${SOURCES_D_PDE3D}) add_executable(psb_d_pde3d ${SOURCES_D_PDE3D})
target_link_libraries(psb_d_pde3d psblas::util psblas::linsolve psblas::prec psblas::base) target_link_libraries(psb_d_pde3d psblas::util psblas::linsolve psblas::prec psblas::base)
@ -43,8 +50,14 @@ target_link_libraries(psb_d_pde2d psblas::util psblas::linsolve psblas::prec psb
add_executable(psb_s_pde2d ${SOURCES_S_PDE2D}) add_executable(psb_s_pde2d ${SOURCES_S_PDE2D})
target_link_libraries(psb_s_pde2d psblas::util psblas::linsolve psblas::prec psblas::base) target_link_libraries(psb_s_pde2d psblas::util psblas::linsolve psblas::prec psblas::base)
add_executable(psb_d_pde_nest_psblas ${SOURCES_D_PDE_NEST_PSBLAS})
target_link_libraries(psb_d_pde_nest_psblas psblas::util psblas::linsolve psblas::prec psblas::base)
add_executable(psb_d_pde_nest_krylov ${SOURCES_D_PDE_NEST_KRYLOV})
target_link_libraries(psb_d_pde_nest_krylov psblas::util psblas::linsolve psblas::prec psblas::base)
# Set output directory for executables # Set output directory for executables
foreach(target psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d) foreach(target psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d psb_dist1_pde2d psb_dist2_pde2d psb_dist3_pde2d psb_d_pde_nest_psblas psb_d_pde_nest_krylov)
set_target_properties(${target} PROPERTIES set_target_properties(${target} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${EXEDIR} RUNTIME_OUTPUT_DIRECTORY ${EXEDIR}
) )

@ -5,8 +5,10 @@ include $(INCDIR)/Make.inc.psblas
# #
# Libraries used # Libraries used
LIBDIR=$(INSTALLDIR)/lib LIBDIR=$(INSTALLDIR)/lib
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base # BLASLIBDIR=/opt/share/sdk/intel/nvidia_hpc_sdk/Linux_x86_64/24.3/compilers/lib
LDLIBS=$(PSBLDLIBS) # GKLIBDIR=/home/jalmerol/GKlib/build/Linux-x86_64
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS= $(PSBLDLIBS)
# #
# Compilers and such # Compilers and such
# #
@ -16,7 +18,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
EXEDIR=./runs EXEDIR=./runs
all: runsd psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d all: runsd psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d psb_d_pde_nest_psblas psb_d_pde_nest_krylov
runsd: runsd:
(if test ! -d runs ; then mkdir runs; fi) (if test ! -d runs ; then mkdir runs; fi)
@ -38,10 +40,17 @@ psb_s_pde2d: psb_s_pde2d.o
$(FLINK) psb_s_pde2d.o -o psb_s_pde2d $(PSBLAS_LIB) $(LDLIBS) $(FLINK) psb_s_pde2d.o -o psb_s_pde2d $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_s_pde2d $(EXEDIR) /bin/mv psb_s_pde2d $(EXEDIR)
psb_d_pde_nest_psblas: psb_d_pde_nest_psblas.o
$(FLINK) psb_d_pde_nest_psblas.o -o psb_d_pde_nest_psblas $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_pde_nest_psblas $(EXEDIR)
psb_d_pde_nest_krylov: psb_d_pde_nest_krylov.o
$(FLINK) psb_d_pde_nest_krylov.o -o psb_d_pde_nest_krylov $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_pde_nest_krylov $(EXEDIR)
clean: clean:
/bin/rm -f psb_d_pde3d.o psb_d_oacc_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \ /bin/rm -f psb_d_pde3d.o psb_d_oacc_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o psb_d_pde_nest_psblas.o psb_d_pde_nest_krylov.o *$(.mod) \
$(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d $(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d $(EXEDIR)/psb_d_pde_nest_psblas $(EXEDIR)/psb_d_pde_nest_krylov
verycleanlib: verycleanlib:
(cd ../..; make veryclean) (cd ../..; make veryclean)
lib: lib:

@ -0,0 +1,672 @@
!
! Test code for all subroutines in psb_d_nest_psblas_mod.
! Extends psb_d_pde_nest_first.F90: copies the same 2x2 saddle-point
! setup (n=10 global DOFs per block), then exercises every operation
! exported by psb_d_nest_psblas_mod.
!
! Tested subroutines
! ------------------
! T01 psb_d_nest_spmm y = alpha*A*x + beta*y (block SpMV)
! T02 psb_d_nest_geaxpby y = alpha*x + beta*y
! T03 psb_d_nest_genrm2 ||x||_2 (function)
! T04 psb_d_nest_genrm2s ||x||_2 (subroutine)
! T05 psb_d_nest_gedot dot(x, y)
! T06 psb_d_nest_geamax ||x||_inf
! T07 psb_d_nest_geasum ||x||_1
! T08 psb_d_nest_gemin min(x)
! T09 psb_d_nest_minquotient min(x/y)
! T10 psb_d_nest_gemlt y = y * x (element-wise)
! T11 psb_d_nest_gediv x = x / y (element-wise; result in x)
! T12 psb_d_nest_geinv y = 1/x (result in y)
! T13 psb_d_nest_geabs y = |x| (result in y)
! T14 psb_d_nest_geaddconst z = x + b (result in z)
! T15 psb_d_nest_gecmp z(i)=1 if |x(i)|>=c, 0 otherwise
! T16 psb_d_nest_mask mask operation; t=.true. if all satisfied
! T17 psb_d_nest_upd_xyz y=alpha*x+beta*y; z=gamma*y_new+delta*z
!
program psb_d_pde_nest_psblas
use psb_base_mod
use psb_desc_nest_mod
use psb_d_nest_mod
use psb_d_nest_tools_mod, only : psb_geall_nest, psb_geasb_nest, psb_gefree_nest, &
& psb_geins_nest, psb_spall_nest, psb_spins_nest, &
& psb_spasb_nest
implicit none
!------------------------------------------------------------------
! Parallel context
!------------------------------------------------------------------
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: iam, np, info
!------------------------------------------------------------------
! Problem size
!------------------------------------------------------------------
integer(psb_ipk_), parameter :: n = 100
integer(psb_ipk_) :: nlr, iloc, i
integer(psb_lpk_) :: iglob
!------------------------------------------------------------------
! Per-block descriptors (identical layout as psb_d_pde_nest_first)
!------------------------------------------------------------------
type(psb_desc_type) :: desc1, desc2, desc3, desc4
!------------------------------------------------------------------
! Nested descriptor and nested sparse matrix
!------------------------------------------------------------------
type(psb_desc_nest_type) :: descs
type(psb_d_nest_sparse_mat) :: anest
!------------------------------------------------------------------
! Individual sparse matrices (A11 = Laplacian, A12 = I, A21 = I)
!------------------------------------------------------------------
type(psb_dspmat_type) :: a11, a12, a21
!------------------------------------------------------------------
! Work nested vectors (xnest, ynest, znest reused across tests)
!------------------------------------------------------------------
type(psb_d_nest_vect_type) :: xnest, ynest, znest
!------------------------------------------------------------------
! Insert buffers
!------------------------------------------------------------------
integer(psb_lpk_) :: grow(1)
real(psb_dpk_) :: gval(1)
!------------------------------------------------------------------
! Scalar results and expected values
!------------------------------------------------------------------
real(psb_dpk_) :: res, res2, expected
real(psb_dpk_), parameter :: tol = 1.0e-10_psb_dpk_
logical :: t_mask
!------------------------------------------------------------------
! Test pass/fail counter
!------------------------------------------------------------------
integer(psb_ipk_) :: npass, nfail
character(len=40) :: name = 'psb_d_pde_nest_psblas'
!==================================================================
! Initialise MPI
!==================================================================
call psb_init(ctxt)
call psb_info(ctxt, iam, np)
call psb_set_errverbosity(itwo)
npass = 0
nfail = 0
!==================================================================
! Build per-block descriptors
! Use exact block distribution so total rows = n exactly.
! Ceiling division (n+np-1)/np gives nlr*np > n phantom rows
! when np does not divide n evenly.
!==================================================================
nlr = n / np
if (iam < mod(n, np)) nlr = nlr + 1_psb_ipk_
nlr = max(1_psb_ipk_, nlr)
call psb_cdall(ctxt, desc1, info, nl=nlr)
call psb_cdall(ctxt, desc2, info, nl=nlr)
call desc1%clone(desc3, info)
!==================================================================
! A(1,1) = tridiagonal Laplacian
!==================================================================
call psb_spall(a11, desc1, info, nnz=3*desc1%get_local_rows())
do iloc = 1, desc1%get_local_rows()
call desc1%l2g(iloc, iglob, info)
grow(1)=iglob; gval(1)=2.0_psb_dpk_
call psb_spins(1,grow,grow,gval,a11,desc1,info)
if (iglob>1) then
grow(1)=iglob; gval(1)=-1.0_psb_dpk_
call psb_spins(1,grow,[iglob-1_psb_lpk_],gval,a11,desc1,info)
end if
if (iglob<n) then
grow(1)=iglob; gval(1)=-1.0_psb_dpk_
call psb_spins(1,grow,[iglob+1_psb_lpk_],gval,a11,desc1,info)
end if
end do
!==================================================================
! A(1,2) = Identity
!==================================================================
call psb_spall(a12, desc2, info, nnz=desc2%get_local_rows())
do iloc = 1, desc2%get_local_rows()
call desc2%l2g(iloc, iglob, info)
grow(1)=iglob; gval(1)=1.0_psb_dpk_
call psb_spins(1,grow,grow,gval,a12,desc2,info)
end do
!==================================================================
! A(2,1) = Identity
!==================================================================
call psb_spall(a21, desc3, info, nnz=desc3%get_local_rows())
do iloc = 1, desc3%get_local_rows()
call desc3%l2g(iloc, iglob, info)
grow(1)=iglob; gval(1)=1.0_psb_dpk_
call psb_spins(1,grow,grow,gval,a21,desc3,info)
end do
!------------------------------------------------------------------
! Finalise descriptors and assemble matrices
!------------------------------------------------------------------
call psb_cdasb(desc1, info)
call psb_cdasb(desc2, info)
call psb_cdasb(desc3, info)
call desc2%clone(desc4, info)
call psb_spasb(a11, desc1, info, dupl=psb_dupl_add_)
call psb_spasb(a12, desc2, info, dupl=psb_dupl_add_)
call psb_spasb(a21, desc3, info, dupl=psb_dupl_add_)
!==================================================================
! Assemble nested matrix
!==================================================================
anest%nrblocks = 2
anest%ncblocks = 2
allocate(anest%mats(2,2), anest%blk_present(2,2), stat=info)
if (info /= 0) goto 9999
anest%blk_present = .false.
call psb_move_alloc(a11, anest%mats(1,1), info)
call psb_move_alloc(a12, anest%mats(1,2), info)
call psb_move_alloc(a21, anest%mats(2,1), info)
anest%blk_present(1,1) = .true.
anest%blk_present(1,2) = .true.
anest%blk_present(2,1) = .true.
!==================================================================
! Assemble nested descriptor
!==================================================================
descs%nrblocks = 2
descs%ncblocks = 2
allocate(descs%descs(2,2), stat=info)
if (info /= 0) goto 9999
call desc1%clone(descs%descs(1,1), info)
call desc2%clone(descs%descs(1,2), info)
call desc3%clone(descs%descs(2,1), info)
call desc4%clone(descs%descs(2,2), info)
!==================================================================
! Allocate and assemble work nested vectors
!==================================================================
call psb_geall_nest(xnest, descs, info)
if (info /= psb_success_) goto 9999
call psb_geasb_nest(xnest, descs, info)
if (info /= psb_success_) goto 9999
call psb_geall_nest(ynest, descs, info)
if (info /= psb_success_) goto 9999
call psb_geasb_nest(ynest, descs, info)
if (info /= psb_success_) goto 9999
call psb_geall_nest(znest, descs, info)
if (info /= psb_success_) goto 9999
call psb_geasb_nest(znest, descs, info)
if (info /= psb_success_) goto 9999
!==================================================================
! T01: psb_d_nest_spmm
! x = all 1s; y = anest * x
! Block 1 (Laplacian * ones): interior rows give 0, boundary rows
! give 1 (first) or 1 (last), interior give 0.
! Block 2 (A21 * ones) = ones.
! Print both result blocks for visual inspection.
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T01: psb_d_nest_spmm (y = anest * x, x = all-1s)'
call set_nest_val(xnest, done)
call ynest%zero()
call psb_d_nest_spmm(done, anest, xnest, dzero, ynest, descs, info)
if (info /= psb_success_) goto 9999
! call print_nest_vec(ynest, 'y = anest * [1,1]^T', iam, np, ctxt, descs)
! Expected values for y = anest * ones (ng=n ensures exactly n global rows):
! Block 1: row 1 and row n give Lap=1, I=1 => y1=2 (2 boundary rows)
! rows 2..n-1 give Lap=0, I=1 => y1=1 (n-2 interior rows)
! Block 2: I*ones = 1 for all n rows
! amax=2, gemin=1, geasum = (n+2) + n = 2n+2
res = psb_d_nest_geamax(ynest, descs, info)
call check('T01 spmm amax(y)=2', res, 2.0_psb_dpk_, tol, npass, nfail, iam)
res = psb_d_nest_gemin(ynest, descs, info)
call check('T01 spmm gemin(y)=1', res, done, tol, npass, nfail, iam)
res = psb_d_nest_geasum(ynest, descs, info)
expected = 2.0_psb_dpk_ * real(n, psb_dpk_) + 2.0_psb_dpk_
call check('T01 spmm geasum(y)=2n+2', res, expected, tol, npass, nfail, iam)
!==================================================================
! T02: psb_d_nest_geaxpby
! x = all 3s, y = all 2s
! y = 2*x + (-1)*y => y = 2*3 - 2 = 4 (all 4s)
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T02: psb_d_nest_geaxpby (y = 2*x - y, x=3 y=2 => 4)'
call set_nest_val(xnest, 3.0_psb_dpk_)
call set_nest_val(ynest, 2.0_psb_dpk_)
call psb_d_nest_geaxpby(2.0_psb_dpk_, xnest, -done, ynest, descs, info)
if (info /= psb_success_) goto 9999
expected = 4.0_psb_dpk_
res = psb_d_nest_geamax(ynest, descs, info)
call check('T02 geaxpby amax(y)=4', res, expected, tol, npass, nfail, iam)
res = psb_d_nest_gemin(ynest, descs, info)
call check('T02 geaxpby amin(y)=4', res, expected, tol, npass, nfail, iam)
!==================================================================
! T03: psb_d_nest_genrm2
! x = all 1s => ||x||_2 = sqrt(2*n) = sqrt(20)
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T03: psb_d_nest_genrm2 (x=1 => sqrt(2n))'
call set_nest_val(xnest, done)
res = psb_d_nest_genrm2(xnest, descs, info)
expected = sqrt(2.0_psb_dpk_ * real(n, psb_dpk_))
call check('T03 genrm2(ones)', res, expected, tol, npass, nfail, iam)
!==================================================================
! T04: psb_d_nest_genrm2s (subroutine form; result must equal T03)
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T04: psb_d_nest_genrm2s (subroutine form of genrm2)'
call psb_d_nest_genrm2s(res2, xnest, descs, info)
call check('T04 genrm2s == genrm2', res2, res, tol, npass, nfail, iam)
!==================================================================
! T05: psb_d_nest_gedot
! x = all 1s, y = all 2s => dot = 2 * 2*n = 40
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T05: psb_d_nest_gedot (x=1 y=2 => 2*2n=40)'
call set_nest_val(xnest, done)
call set_nest_val(ynest, 2.0_psb_dpk_)
res = psb_d_nest_gedot(xnest, ynest, descs, info)
expected = 2.0_psb_dpk_ * 2.0_psb_dpk_ * real(n, psb_dpk_)
call check('T05 gedot', res, expected, tol, npass, nfail, iam)
!==================================================================
! T06: psb_d_nest_geamax
! x = all 5s => ||x||_inf = 5
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T06: psb_d_nest_geamax (x=5 => 5)'
call set_nest_val(xnest, 5.0_psb_dpk_)
res = psb_d_nest_geamax(xnest, descs, info)
expected = 5.0_psb_dpk_
call check('T06 geamax', res, expected, tol, npass, nfail, iam)
!==================================================================
! T07: psb_d_nest_geasum
! x = all 1s => ||x||_1 = 2*n = 20
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T07: psb_d_nest_geasum (x=1 => 2n=20)'
call set_nest_val(xnest, done)
res = psb_d_nest_geasum(xnest, descs, info)
expected = 2.0_psb_dpk_ * real(n, psb_dpk_)
call check('T07 geasum', res, expected, tol, npass, nfail, iam)
!==================================================================
! T08: psb_d_nest_gemin
! x = all 7s => min = 7
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T08: psb_d_nest_gemin (x=7 => 7)'
call set_nest_val(xnest, 7.0_psb_dpk_)
res = psb_d_nest_gemin(xnest, descs, info)
expected = 7.0_psb_dpk_
call check('T08 gemin', res, expected, tol, npass, nfail, iam)
!==================================================================
! T09: psb_d_nest_minquotient
! x = all 3s, y = all 6s => min(x/y) = 0.5
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T09: psb_d_nest_minquotient (x=3 y=6 => 0.5)'
call set_nest_val(xnest, 3.0_psb_dpk_)
call set_nest_val(ynest, 6.0_psb_dpk_)
res = psb_d_nest_minquotient(xnest, ynest, descs, info)
expected = 0.5_psb_dpk_
call check('T09 minquotient', res, expected, tol, npass, nfail, iam)
!==================================================================
! T10: psb_d_nest_gemlt
! x = all 2s, y = all 4s => y = y * x = 8 (result in y)
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T10: psb_d_nest_gemlt (x=2 y=4 => y=8)'
call set_nest_val(xnest, 2.0_psb_dpk_)
call set_nest_val(ynest, 4.0_psb_dpk_)
call psb_d_nest_gemlt(xnest, ynest, descs, info)
if (info /= psb_success_) goto 9999
expected = 8.0_psb_dpk_
res = psb_d_nest_geamax(ynest, descs, info)
call check('T10 gemlt amax(y)=8', res, expected, tol, npass, nfail, iam)
res = psb_d_nest_gemin(ynest, descs, info)
call check('T10 gemlt amin(y)=8', res, expected, tol, npass, nfail, iam)
!==================================================================
! T11: psb_d_nest_gediv
! x = all 6s, y = all 3s => x = x / y = 2 (result in x)
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T11: psb_d_nest_gediv (x=6 y=3 => x=2)'
call set_nest_val(xnest, 6.0_psb_dpk_)
call set_nest_val(ynest, 3.0_psb_dpk_)
call psb_d_nest_gediv(xnest, ynest, descs, info)
if (info /= psb_success_) goto 9999
expected = 2.0_psb_dpk_
res = psb_d_nest_geamax(xnest, descs, info)
call check('T11 gediv amax(x)=2', res, expected, tol, npass, nfail, iam)
res = psb_d_nest_gemin(xnest, descs, info)
call check('T11 gediv amin(x)=2', res, expected, tol, npass, nfail, iam)
!==================================================================
! T12: psb_d_nest_geinv
! x = all 4s => y = 1/x = 0.25 (result in y)
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T12: psb_d_nest_geinv (x=4 => y=0.25)'
call set_nest_val(xnest, 4.0_psb_dpk_)
call psb_d_nest_geinv(xnest, ynest, descs, info)
if (info /= psb_success_) goto 9999
expected = 0.25_psb_dpk_
res = psb_d_nest_geamax(ynest, descs, info)
call check('T12 geinv amax(y)=0.25', res, expected, tol, npass, nfail, iam)
res = psb_d_nest_gemin(ynest, descs, info)
call check('T12 geinv amin(y)=0.25', res, expected, tol, npass, nfail, iam)
!==================================================================
! T13: psb_d_nest_geabs
! x = all -3s => y = |x| = 3 (result in y)
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T13: psb_d_nest_geabs (x=-3 => y=3)'
call set_nest_val(xnest, -3.0_psb_dpk_)
call psb_d_nest_geabs(xnest, ynest, descs, info)
if (info /= psb_success_) goto 9999
expected = 3.0_psb_dpk_
res = psb_d_nest_geamax(ynest, descs, info)
call check('T13 geabs amax(y)=3', res, expected, tol, npass, nfail, iam)
res = psb_d_nest_gemin(ynest, descs, info)
call check('T13 geabs amin(y)=3', res, expected, tol, npass, nfail, iam)
!==================================================================
! T14: psb_d_nest_geaddconst
! x = all 2s, b = 7.0 => z = x + 7 = 9 (result in z)
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T14: psb_d_nest_geaddconst (x=2 b=7 => z=9)'
call set_nest_val(xnest, 2.0_psb_dpk_)
call psb_d_nest_geaddconst(xnest, 7.0_psb_dpk_, znest, descs, info)
if (info /= psb_success_) goto 9999
expected = 9.0_psb_dpk_
res = psb_d_nest_geamax(znest, descs, info)
call check('T14 geaddconst amax(z)=9', res, expected, tol, npass, nfail, iam)
res = psb_d_nest_gemin(znest, descs, info)
call check('T14 geaddconst amin(z)=9', res, expected, tol, npass, nfail, iam)
!==================================================================
! T15a: psb_d_nest_gecmp entries satisfy threshold
! x = all 3s, c = 2.0 => z(i)=1 (since |3| >= 2)
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T15a: psb_d_nest_gecmp (x=3 c=2 => z=1)'
call set_nest_val(xnest, 3.0_psb_dpk_)
call psb_d_nest_gecmp(xnest, 2.0_psb_dpk_, znest, descs, info)
if (info /= psb_success_) goto 9999
expected = done
res = psb_d_nest_geamax(znest, descs, info)
call check('T15a gecmp amax(z)=1', res, expected, tol, npass, nfail, iam)
res = psb_d_nest_gemin(znest, descs, info)
call check('T15a gecmp amin(z)=1', res, expected, tol, npass, nfail, iam)
!==================================================================
! T15b: psb_d_nest_gecmp entries do not satisfy threshold
! x = all 1s, c = 2.0 => z(i)=0 (since |1| < 2)
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T15b: psb_d_nest_gecmp (x=1 c=2 => z=0)'
call set_nest_val(xnest, done)
call psb_d_nest_gecmp(xnest, 2.0_psb_dpk_, znest, descs, info)
if (info /= psb_success_) goto 9999
expected = dzero
res = psb_d_nest_geamax(znest, descs, info)
call check('T15b gecmp amax(z)=0', res, expected, tol, npass, nfail, iam)
!==================================================================
! T16: psb_d_nest_mask
! Semantics: mask(c, x, m, t)
! c = values to test (first arg)
! x = constraint-type indicators (second arg):
! 2 => satisfied if c(i) > 0
! 1 => satisfied if c(i) >= 0
! -1 => satisfied if c(i) <= 0
! -2 => satisfied if c(i) < 0
! m = output mask (0=satisfied, 1=violated)
! t = .true. iff all entries satisfied
!
! Case: c = all +3 (positive), x = all 2 (check > 0) => m=0 t=T
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') 'T16: psb_d_nest_mask (c=3 x=2 => m=0 t=.true.)'
call set_nest_val(xnest, 3.0_psb_dpk_) ! values to test
call set_nest_val(ynest, 2.0_psb_dpk_) ! constraint indicators (type 2: check > 0)
call psb_d_nest_mask(xnest, ynest, znest, t_mask, descs, info)
if (info /= psb_success_) goto 9999
if (iam == 0) then
if (t_mask) then
write(*,'(a)') ' T16 mask: t=.true. PASS (all constraints satisfied)'
npass = npass + 1
else
write(*,'(a)') ' T16 mask: t=.false. FAIL (expected .true.)'
nfail = nfail + 1
end if
end if
!------------------------------------------------------------------
! T16b: c = all -3 (negative), x = all 2 (check > 0) => m=1 t=F
!------------------------------------------------------------------
if (iam == 0) write(*,'(a)') 'T16b: psb_d_nest_mask (c=-3 x=2 => m=1 t=.false.)'
call set_nest_val(xnest, -3.0_psb_dpk_) ! values (negative)
call set_nest_val(ynest, 2.0_psb_dpk_) ! indicators (type 2: check > 0)
call psb_d_nest_mask(xnest, ynest, znest, t_mask, descs, info)
if (info /= psb_success_) goto 9999
if (iam == 0) then
if (.not. t_mask) then
write(*,'(a)') ' T16b mask: t=.false. PASS (all constraints violated)'
npass = npass + 1
else
write(*,'(a)') ' T16b mask: t=.true. FAIL (expected .false.)'
nfail = nfail + 1
end if
end if
!==================================================================
! T17: psb_d_nest_upd_xyz
! Computes: y_new = alpha*x + beta*y
! z_new = gamma*y_new + delta*z
!
! x=1, y=2, z=3, alpha=2, beta=3, gamma=4, delta=5
! => y_new = 2*1 + 3*2 = 8
! => z_new = 4*8 + 5*3 = 47
!==================================================================
if (iam == 0) write(*,'(/,a)') repeat('=',60)
if (iam == 0) write(*,'(a)') &
'T17: psb_d_nest_upd_xyz (x=1 y=2 z=3 a=2 b=3 g=4 d=5 => y=8 z=47)'
call set_nest_val(xnest, done)
call set_nest_val(ynest, 2.0_psb_dpk_)
call set_nest_val(znest, 3.0_psb_dpk_)
call psb_d_nest_upd_xyz(2.0_psb_dpk_, 3.0_psb_dpk_, &
& 4.0_psb_dpk_, 5.0_psb_dpk_, &
& xnest, ynest, znest, descs, info)
if (info /= psb_success_) goto 9999
expected = 8.0_psb_dpk_
res = psb_d_nest_geamax(ynest, descs, info)
call check('T17 upd_xyz amax(y)=8', res, expected, tol, npass, nfail, iam)
res = psb_d_nest_gemin(ynest, descs, info)
call check('T17 upd_xyz amin(y)=8', res, expected, tol, npass, nfail, iam)
expected = 47.0_psb_dpk_
res = psb_d_nest_geamax(znest, descs, info)
call check('T17 upd_xyz amax(z)=47', res, expected, tol, npass, nfail, iam)
res = psb_d_nest_gemin(znest, descs, info)
call check('T17 upd_xyz amin(z)=47', res, expected, tol, npass, nfail, iam)
!==================================================================
! Summary
!==================================================================
if (iam == 0) then
write(*,'(/,a)') repeat('=',60)
write(*,'(a,i0,a,i0,a)') &
' RESULTS: ', npass, ' passed, ', nfail, ' failed'
write(*,'(a)') repeat('=',60)
end if
!==================================================================
! Clean up
!==================================================================
call psb_gefree_nest(xnest, descs, info)
call psb_gefree_nest(ynest, descs, info)
call psb_gefree_nest(znest, descs, info)
call psb_cdfree(desc1, info)
call psb_cdfree(desc2, info)
call psb_cdfree(desc3, info)
call psb_cdfree(desc4, info)
call psb_exit(ctxt)
stop
9999 continue
write(psb_err_unit,*) trim(name), ': error info=', info, ' rank=', iam
call psb_error(ctxt)
call psb_exit(ctxt)
stop
contains
!------------------------------------------------------------------
! Set every local entry of every block to val
!------------------------------------------------------------------
subroutine set_nest_val(v, val)
use psb_base_mod
type(psb_d_nest_vect_type), intent(inout) :: v
real(psb_dpk_), intent(in) :: val
integer(psb_ipk_) :: k, linfo
linfo = 0
do k = 1, v%nblocks
call v%vects(k)%set(val, linfo)
end do
end subroutine set_nest_val
!------------------------------------------------------------------
! Scalar pass/fail check with tolerance
!------------------------------------------------------------------
subroutine check(label, got, expected, tol, np_, nf_, myrank)
use psb_base_mod
character(len=*), intent(in) :: label
real(psb_dpk_), intent(in) :: got, expected, tol
integer(psb_ipk_), intent(inout) :: np_, nf_
integer(psb_ipk_), intent(in) :: myrank
if (myrank /= 0) return
if (abs(got - expected) <= tol * max(done, abs(expected))) then
write(*,'(2x,a,a,f16.10,a,f16.10)') &
'PASS ', trim(label)//' got=', got, ' exp=', expected
np_ = np_ + 1
else
write(*,'(2x,a,a,f16.10,a,f16.10)') &
'FAIL ', trim(label)//' got=', got, ' exp=', expected
nf_ = nf_ + 1
end if
end subroutine check
!------------------------------------------------------------------
! Print every block of a nested vector (one rank at a time).
! Each process flushes stdout before the barrier so that buffered
! output does not bleed into the next process's print window.
!------------------------------------------------------------------
subroutine print_nest_vec(v, label, myrank, nprocs, myctxt, ds)
use psb_base_mod
use iso_fortran_env, only: output_unit
type(psb_d_nest_vect_type), intent(inout) :: v
character(len=*), intent(in) :: label
integer(psb_ipk_), intent(in) :: myrank, nprocs
type(psb_ctxt_type), intent(in) :: myctxt
type(psb_desc_nest_type), intent(in) :: ds
integer(psb_ipk_) :: blk, ip, k, nr, linfo
real(psb_dpk_), allocatable :: vals(:)
do blk = 1, v%nblocks
nr = ds%descs(blk,blk)%get_local_rows()
do ip = 0, nprocs-1
call psb_barrier(myctxt)
if (myrank == ip) then
write(*,'(a,a,a,i0,a)') ' [', trim(label), '] block ', blk, ':'
linfo = 0
allocate(vals(nr), stat=linfo)
if (linfo == 0) vals = v%vects(blk)%get_vect()
do k = 1, nr
write(*,'(4x,i4,f14.6)') k, vals(k)
end do
deallocate(vals)
flush(output_unit)
end if
end do
call psb_barrier(myctxt)
end do
end subroutine print_nest_vec
end program psb_d_pde_nest_psblas
Loading…
Cancel
Save