diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index 7d03c62bb..5b01ebf76 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -567,6 +567,8 @@ set(PSB_base_source_files modules/tools/psb_m_tools_a_mod.f90 modules/tools/psb_cd_tools_mod.F90 modules/tools/psb_d_tools_mod.F90 + modules/tools/psb_cd_nest_tools_mod.F90 + modules/tools/psb_d_nest_tools_mod.F90 modules/tools/psb_c_tools_mod.F90 modules/tools/psb_e_tools_a_mod.f90 modules/tools/psb_i2_tools_a_mod.f90 @@ -638,6 +640,12 @@ set(PSB_base_source_files modules/desc/psb_hash_map_mod.F90 modules/desc/psb_glist_map_mod.F90 modules/psb_base_mod.f90 + modules/desc/psb_desc_nest_mod.f90 + modules/serial/psb_d_nest_vect_mod.f90 + modules/serial/psb_d_nest_mat_mod.f90 + modules/comm/psb_d_nest_comm_mod.f90 + modules/psblas/psb_d_nest_psblas_mod.f90 + modules/psb_d_nest_mod.f90 ) foreach(file IN LISTS PSB_base_source_files) list(APPEND base_source_files ${CMAKE_CURRENT_LIST_DIR}/${file}) diff --git a/base/modules/Makefile b/base/modules/Makefile index 353f64211..bc8ecfbe8 100644 --- a/base/modules/Makefile +++ b/base/modules/Makefile @@ -36,6 +36,7 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \ serial/psb_i2_base_vect_mod.o serial/psb_i2_vect_mod.o\ serial/psb_i_base_vect_mod.o serial/psb_i_vect_mod.o\ serial/psb_l_base_vect_mod.o serial/psb_l_vect_mod.o\ + serial/psb_i2_base_vect_mod.o serial/psb_i2_vect_mod.o\ serial/psb_d_base_vect_mod.o serial/psb_d_vect_mod.o\ serial/psb_s_base_vect_mod.o serial/psb_s_vect_mod.o\ serial/psb_c_base_vect_mod.o serial/psb_c_vect_mod.o\ @@ -82,7 +83,8 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \ serial/psb_s_base_mat_mod.o serial/psb_s_csr_mat_mod.o serial/psb_s_csc_mat_mod.o serial/psb_s_mat_mod.o \ serial/psb_d_base_mat_mod.o serial/psb_d_csr_mat_mod.o serial/psb_d_csc_mat_mod.o serial/psb_d_mat_mod.o \ serial/psb_c_base_mat_mod.o serial/psb_c_csr_mat_mod.o serial/psb_c_csc_mat_mod.o serial/psb_c_mat_mod.o \ - serial/psb_z_base_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_z_csc_mat_mod.o serial/psb_z_mat_mod.o + serial/psb_z_base_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_z_csc_mat_mod.o serial/psb_z_mat_mod.o \ + serial/psb_d_nest_vect_mod.o serial/psb_d_nest_mat_mod.o #\ # serial/psb_ls_csr_mat_mod.o serial/psb_ld_csr_mat_mod.o serial/psb_lc_csr_mat_mod.o serial/psb_lz_csr_mat_mod.o #\ @@ -91,10 +93,12 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \ UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\ desc/psb_gen_block_map_mod.o desc/psb_list_map_mod.o desc/psb_repl_map_mod.o\ desc/psb_glist_map_mod.o desc/psb_hash_map_mod.o desc/psb_hashval.o \ - desc/psb_desc_mod.o auxil/psb_sort_mod.o \ + desc/psb_desc_mod.o desc/psb_desc_nest_mod.o auxil/psb_sort_mod.o \ tools/psb_cd_tools_mod.o \ + tools/psb_cd_nest_tools_mod.o \ tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \ tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o\ + tools/psb_d_nest_tools_mod.o \ tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \ tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \ tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\ @@ -110,6 +114,7 @@ UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\ comm/psb_s_comm_mod.o comm/psb_d_comm_mod.o\ comm/psb_c_comm_mod.o comm/psb_z_comm_mod.o \ comm/psb_i2_comm_a_mod.o \ + comm/psb_d_nest_comm_mod.o \ comm/psb_m_comm_a_mod.o comm/psb_e_comm_a_mod.o \ comm/psb_s_comm_a_mod.o comm/psb_d_comm_a_mod.o\ comm/psb_c_comm_a_mod.o comm/psb_z_comm_a_mod.o \ @@ -123,12 +128,13 @@ UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\ psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o \ psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o \ psblas/psb_psblas_mod.o \ + psblas/psb_d_nest_psblas_mod.o \ psb_check_mod.o desc/psb_hash_mod.o MODULES=$(BASIC_MODS) $(SERIAL_MODS) $(UTIL_MODS) -OBJS = error.o psb_base_mod.o $(EXTRA_COBJS) cutil.o +OBJS = error.o psb_base_mod.o psb_d_nest_mod.o $(EXTRA_COBJS) cutil.o MODDIR=../../modules INCDIR=../../include LIBDIR=../ @@ -413,12 +419,19 @@ comm/psi_s_comm_a_mod.o comm/psi_d_comm_a_mod.o \ comm/psi_c_comm_a_mod.o comm/psi_z_comm_a_mod.o: desc/psb_desc_mod.o tools/psb_tools_mod.o: tools/psb_cd_tools_mod.o tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o\ + tools/psb_cd_nest_tools_mod.o tools/psb_d_nest_tools_mod.o \ tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \ tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \ tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \ tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\ tools/psb_c_tools_a_mod.o tools/psb_z_tools_a_mod.o +tools/psb_cd_nest_tools_mod.o: tools/psb_cd_nest_tools_mod.F90 tools/psb_cd_tools_mod.o desc/psb_desc_nest_mod.o + $(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_cd_nest_tools_mod.F90 -o tools/psb_cd_nest_tools_mod.o + +tools/psb_d_nest_tools_mod.o: tools/psb_d_nest_tools_mod.F90 tools/psb_d_tools_mod.o \ + desc/psb_desc_nest_mod.o serial/psb_d_nest_mat_mod.o serial/psb_d_nest_vect_mod.o + $(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c tools/psb_d_nest_tools_mod.F90 -o tools/psb_d_nest_tools_mod.o tools/psb_cd_tools_mod.o tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \ tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o \ @@ -441,6 +454,26 @@ psblas/psb_z_psblas_mod.o: serial/psb_z_vect_mod.o serial/psb_z_mat_mod.o psblas/psb_psblas_mod.o: psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o psblas/psb_z_psblas_mod.o: serial/psb_mat_mod.o desc/psb_desc_mod.o +# --- nested mat/vec/desc dependencies --- +desc/psb_desc_nest_mod.o: desc/psb_desc_mod.o +serial/psb_d_nest_vect_mod.o: serial/psb_d_vect_mod.o desc/psb_desc_mod.o +serial/psb_d_nest_mat_mod.o: serial/psb_d_mat_mod.o +comm/psb_d_nest_comm_mod.o: \ + desc/psb_desc_nest_mod.o \ + serial/psb_d_nest_vect_mod.o \ + comm/psb_d_comm_mod.o +psblas/psb_d_nest_psblas_mod.o: \ + desc/psb_desc_nest_mod.o \ + serial/psb_d_nest_vect_mod.o \ + serial/psb_d_nest_mat_mod.o \ + serial/psb_d_mat_mod.o \ + psblas/psb_d_psblas_mod.o \ + comm/psb_d_nest_comm_mod.o +psb_d_nest_mod.o: \ + desc/psb_desc_nest_mod.o \ + serial/psb_d_nest_vect_mod.o \ + serial/psb_d_nest_mat_mod.o \ + psblas/psb_d_nest_psblas_mod.o psb_base_mod.o: $(MODULES) diff --git a/base/modules/comm/psb_d_nest_comm_mod.f90 b/base/modules/comm/psb_d_nest_comm_mod.f90 new file mode 100644 index 000000000..cef64ebf7 --- /dev/null +++ b/base/modules/comm/psb_d_nest_comm_mod.f90 @@ -0,0 +1,93 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! module: psb_d_nest_comm_mod +! +! Communication operations for nested (block-structured) double precision +! real vectors. +! +! psb_d_nest_halo +! Halo exchange for all column blocks of a nested vector. +! Calls psb_halo(x(j), descs(1,j)) for each column block j. +! All descriptors descs(i,j) for fixed j are equivalent; +! Called once before block SpMM to populate ghost entries of x. +! +! psb_d_nest_ovrl +! Overlap update for all row blocks of a nested vector. +! Calls psb_ovrl(x(i), descs(i,i)) for each row block i using the +! diagonal descriptor. +! Called after operations that contribute to overlapping rows +! (e.g. FEM assembly). +! +module psb_d_nest_comm_mod + use psb_desc_nest_mod + use psb_d_nest_vect_mod + use psb_d_comm_mod, only : psb_halo, psb_ovrl + use psb_const_mod, only : psb_ipk_ + implicit none + + private + public :: psb_d_nest_halo, psb_d_nest_ovrl + +contains + + subroutine psb_d_nest_halo(xnest, descs, info, tran, mode, data) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: tran + integer(psb_ipk_), optional, intent(in) :: mode, data + + integer(psb_ipk_) :: j + + info = 0 + do j = 1, xnest%nblocks + call psb_halo(xnest%vects(j), descs%descs(1,j), info, tran=tran, mode=mode, data=data) + if (info /= 0) return + end do + end subroutine psb_d_nest_halo + + subroutine psb_d_nest_ovrl(xnest, descs, info, update, mode) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: update, mode + + integer(psb_ipk_) :: i + + info = 0 + do i = 1, xnest%nblocks + call psb_ovrl(xnest%vects(i), descs%descs(i,i), info, update=update, mode=mode) + if (info /= 0) return + end do + end subroutine psb_d_nest_ovrl + +end module psb_d_nest_comm_mod diff --git a/base/modules/desc/psb_desc_nest_mod.f90 b/base/modules/desc/psb_desc_nest_mod.f90 new file mode 100644 index 000000000..5260285bf --- /dev/null +++ b/base/modules/desc/psb_desc_nest_mod.f90 @@ -0,0 +1,141 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! module: psb_desc_nest_mod +! +! Defines psb_desc_nest_type: a 2-D array of psb_desc_type objects, +! one per block matrix entry in an nrblocks x ncblocks block system. +! +! +module psb_desc_nest_mod + use psb_desc_mod + implicit none + + type :: psb_desc_nest_type + integer(psb_ipk_) :: nrblocks = 0 + integer(psb_ipk_) :: ncblocks = 0 + type(psb_desc_type), allocatable :: descs(:,:) + contains + procedure :: get_nrblocks => psb_desc_nest_get_nrblocks + procedure :: get_ncblocks => psb_desc_nest_get_ncblocks + procedure :: get_desc => psb_desc_nest_get_desc + procedure :: is_valid => psb_desc_nest_is_valid + procedure :: sizeof => psb_desc_nest_sizeof + procedure :: free => psb_desc_nest_free + end type psb_desc_nest_type + +contains + + + ! get_nrblocks / get_ncblocks + function psb_desc_nest_get_nrblocks(d) result(nb) + class(psb_desc_nest_type), intent(in) :: d + integer(psb_ipk_) :: nb + nb = d%nrblocks + end function psb_desc_nest_get_nrblocks + + function psb_desc_nest_get_ncblocks(d) result(nb) + class(psb_desc_nest_type), intent(in) :: d + integer(psb_ipk_) :: nb + nb = d%ncblocks + end function psb_desc_nest_get_ncblocks + + ! get_desc: copy descriptor (i,j) into the output argument + subroutine psb_desc_nest_get_desc(d, i, j, desc, info) + class(psb_desc_nest_type), intent(in) :: d + integer(psb_ipk_), intent(in) :: i, j + type(psb_desc_type), intent(out):: desc + integer(psb_ipk_), intent(out):: info + + info = 0 + if (i < 1 .or. i > d%nrblocks .or. j < 1 .or. j > d%ncblocks) then + info = -1 + return + end if + desc = d%descs(i,j) + end subroutine psb_desc_nest_get_desc + + ! is_valid: true if all diagonal sub-descriptors are valid + function psb_desc_nest_is_valid(d) result(valid) + class(psb_desc_nest_type), intent(in) :: d + logical :: valid + integer(psb_ipk_) :: i + + valid = (d%nrblocks >= 1) .and. (d%ncblocks >= 1) .and. allocated(d%descs) + if (valid) then + do i = 1, min(d%nrblocks, d%ncblocks) + if (.not. d%descs(i,i)%is_valid()) then + valid = .false. + return + end if + end do + end if + end function psb_desc_nest_is_valid + + ! sizeof: total memory (bytes) of all sub-descriptors + function psb_desc_nest_sizeof(d) result(s) + class(psb_desc_nest_type), intent(in) :: d + integer(psb_epk_) :: s + integer(psb_ipk_) :: i, j + + s = 0_psb_epk_ + if (allocated(d%descs)) then + do j = 1, d%ncblocks + do i = 1, d%nrblocks + s = s + d%descs(i,j)%sizeof() + end do + end do + end if + end function psb_desc_nest_sizeof + + ! free: release all sub-descriptors and reset + subroutine psb_desc_nest_free(d, info) + class(psb_desc_nest_type), intent(inout) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, linfo + + info = 0 + if (allocated(d%descs)) then + do j = 1, d%ncblocks + do i = 1, d%nrblocks + call d%descs(i,j)%free(linfo) + if (linfo /= 0 .and. info == 0) info = linfo + end do + end do + deallocate(d%descs, stat=linfo) + if (linfo /= 0 .and. info == 0) info = linfo + end if + d%nrblocks = 0 + d%ncblocks = 0 + end subroutine psb_desc_nest_free + +end module psb_desc_nest_mod diff --git a/base/modules/psb_d_nest_mod.f90 b/base/modules/psb_d_nest_mod.f90 new file mode 100644 index 000000000..93f4432c1 --- /dev/null +++ b/base/modules/psb_d_nest_mod.f90 @@ -0,0 +1,46 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! module: psb_d_nest_mod +! +! Umbrella module for the nested (block-structured) double precision +! real types. Users need only: +! +! use psb_d_nest_mod +! +! to access all three container types and their parallel operations. +! +module psb_d_nest_mod + use psb_desc_nest_mod + use psb_d_nest_vect_mod + use psb_d_nest_mat_mod + use psb_d_nest_psblas_mod +end module psb_d_nest_mod diff --git a/base/modules/psblas/psb_d_nest_psblas_mod.f90 b/base/modules/psblas/psb_d_nest_psblas_mod.f90 new file mode 100644 index 000000000..9a5a62f3a --- /dev/null +++ b/base/modules/psblas/psb_d_nest_psblas_mod.f90 @@ -0,0 +1,627 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! module: psb_d_nest_psblas_mod +! +! Parallel BLAS operations for the nested (block-structured) double +! precision real types. +! +! psb_d_nest_spmm +! Computes y = alpha * A_nest * x + beta * y (block SpMV). +! Three-phase algorithm: +! Phase 1 — scale y upfront: +! if beta == 0: zero all y(i) +! elif beta /= 1: y(i) = beta * y(i) for each block i +! Phase 2 — single halo exchange per column block: +! call psb_d_nest_halo(xnest, descs) +! Populates ghost entries of x(j) using descs(1,j) for each j. +! All descs(i,j) for fixed j share the same column space, so +! one exchange covers all block-rows. +! Phase 3 — local SpMM accumulation (no further communication): +! For each present block (i,j): +! y(i) += alpha * A(i,j) * x(j) +! (psb_spmm called with doswap=.false. to skip internal halo) +! +! psb_d_nest_geaxpby +! Computes y(i) = alpha * x(i) + beta * y(i) for each block i. +! +! psb_d_nest_genrm2 +! Computes ||x||_2 = sqrt( sum_i ||x(i)||_2^2 ) with a single +! global reduction. +! +! psb_d_nest_genrm2s +! Subroutine form of psb_d_nest_genrm2 (result via intent(out) argument). +! +! psb_d_nest_gedot +! Computes dot(x,y) = sum_i dot(x(i), y(i)) with a single global +! reduction. +! +! psb_d_nest_geamax +! Computes ||x||_inf = max_i ||x(i)||_inf with a single global reduction. +! +! psb_d_nest_geasum +! Computes ||x||_1 = sum_i ||x(i)||_1 with a single global reduction. +! +! psb_d_nest_gemin +! Computes min(x) = min_i min(x(i)) with a single global reduction. +! +! psb_d_nest_minquotient +! Computes min(x/y) = min_i min(x(i)/y(i)) with a single global reduction. +! +! psb_d_nest_gemlt +! Computes y(i) = x(i) .* y(i) element-wise for each block i. +! +! psb_d_nest_gediv +! Computes y(i) = x(i) ./ y(i) element-wise for each block i. +! +! psb_d_nest_geinv +! Computes y(i) = 1 / x(i) element-wise for each block i. +! +! psb_d_nest_geabs +! Computes y(i) = |x(i)| element-wise for each block i. +! +! psb_d_nest_geaddconst +! Computes z(i) = x(i) + b for each block i (b is a scalar). +! +! psb_d_nest_gecmp +! Computes z(i) = cmp(x(i), c) for each block i (c is a scalar). +! +! psb_d_nest_mask +! Applies mask operation to each block i; t is .true. iff all blocks +! return .true. +! +! psb_d_nest_upd_xyz +! Applies psb_upd_xyz(alpha,beta,gamma,delta, x(i),y(i),z(i)) for each block i. +! +! psb_d_nest_spsm +! Block-diagonal triangular solve: applies psb_spsm to each diagonal +! block (i,i) of tnest independently. +! +module psb_d_nest_psblas_mod + use psb_desc_nest_mod + use psb_d_nest_vect_mod + use psb_d_nest_mat_mod + use psb_d_mat_mod, only : psb_dspmat_type, psb_csmm + use psb_d_psblas_mod, only : psb_spmm, psb_geaxpby, psb_genrm2, psb_gedot, & + & psb_geamax, psb_geasum, psb_gemin, psb_minquotient, & + & psb_gemlt, psb_gediv, psb_geinv, psb_geabs, psb_geaddconst, & + & psb_gecmp, psb_mask, psb_upd_xyz, psb_spsm + use psb_d_nest_comm_mod, only : psb_d_nest_halo, psb_d_nest_ovrl + use psb_penv_mod, only : psb_sum, psb_max, psb_min, psb_info + use psb_const_mod, only : psb_dpk_, psb_ipk_, psb_epk_, psb_ctxt_type, dzero, done + implicit none + + private + public :: psb_d_nest_spmm, psb_d_nest_geaxpby, & + psb_d_nest_genrm2, psb_d_nest_genrm2s, psb_d_nest_gedot, & + psb_d_nest_geamax, psb_d_nest_geasum, psb_d_nest_gemin, & + psb_d_nest_minquotient, & + psb_d_nest_gemlt, psb_d_nest_gediv, psb_d_nest_geinv, & + psb_d_nest_halo, psb_d_nest_ovrl, & + psb_d_nest_geabs, psb_d_nest_geaddconst, psb_d_nest_gecmp, & + psb_d_nest_mask, psb_d_nest_upd_xyz, psb_d_nest_spsm + +contains + + ! y = alpha * A_nest * x + beta * y + subroutine psb_d_nest_spmm(alpha, anest, xnest, beta, ynest, descs, info, trans) + + real(psb_dpk_), intent(in) :: alpha, beta + type(psb_d_nest_sparse_mat), intent(in) :: anest + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: ynest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: i, j + character :: trans_ + + info = 0 + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (beta == dzero) then + do i = 1, anest%nrblocks + call ynest%vects(i)%zero() + end do + else if (beta /= done) then + do i = 1, anest%nrblocks + call ynest%vects(i)%scal(beta) + end do + end if + + call psb_d_nest_halo(xnest, descs, info) + if (info /= 0) return + + do i = 1, anest%nrblocks + do j = 1, anest%ncblocks + if (anest%has_block(i, j)) then + ! y(i) += alpha * A(i,j) * x(j) (doswap=.false. skips internal halo, already done in Phase 2) + call psb_spmm(alpha, anest%mats(i,j), xnest%vects(j), & + & done, ynest%vects(i), descs%descs(i,j), info, trans=trans_, doswap=.false.) + if (info /= 0) return + end if + end do + end do + end subroutine psb_d_nest_spmm + + ! y(i) = alpha * x(i) + beta * y(i) for each block i + subroutine psb_d_nest_geaxpby(alpha, xnest, beta, ynest, descs, info) + real(psb_dpk_), intent(in) :: alpha, beta + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: ynest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + do i = 1, xnest%nblocks + call psb_geaxpby(alpha, xnest%vects(i), beta, ynest%vects(i), & + & descs%descs(i,i), info) + if (info /= 0) return + end do + end subroutine psb_d_nest_geaxpby + + ! ||x||_2 = sqrt( sum_i ||x(i)||_2^2 ) + ! Uses a single global MPI_Allreduce across all blocks. + ! global (optional, default .true.): if .false., skips MPI_Allreduce and returns + ! the process-local partial norm; use when the caller manages the reduction itself. + + function psb_d_nest_genrm2(xnest, descs, info, global) result(res) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: global + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i + real(psb_dpk_) :: loc_sum, blk_nrm + logical :: global_ + type(psb_ctxt_type) :: ctxt + + global_ = .true. + if (present(global)) global_ = global + + info = 0 + loc_sum = dzero + do i = 1, xnest%nblocks + ! global=.false. returns local partial norm (sqrt of local partial sum) + blk_nrm = psb_genrm2(xnest%vects(i), descs%descs(i,i), info, global=.false.) + if (info /= 0) then + res = dzero + return + end if + loc_sum = loc_sum + blk_nrm * blk_nrm + end do + + if (global_) then + ctxt = descs%descs(1,1)%get_context() + call psb_sum(ctxt, loc_sum) + end if + + res = sqrt(loc_sum) + end function psb_d_nest_genrm2 + + ! psb_d_nest_gedot + ! dot(x, y) = sum_i dot(x(i), y(i)) + ! Uses a single global MPI_Allreduce across all blocks. + function psb_d_nest_gedot(xnest, ynest, descs, info, global) result(res) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: ynest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: global + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i + real(psb_dpk_) :: loc_sum, blk_dot + logical :: global_ + type(psb_ctxt_type) :: ctxt + + global_ = .true. + if (present(global)) global_ = global + + info = 0 + loc_sum = dzero + do i = 1, xnest%nblocks + blk_dot = psb_gedot(xnest%vects(i), ynest%vects(i), descs%descs(i,i), & + & info, global=.false.) + if (info /= 0) then + res = dzero + return + end if + loc_sum = loc_sum + blk_dot + end do + + if (global_) then + ctxt = descs%descs(1,1)%get_context() + call psb_sum(ctxt, loc_sum) + end if + + res = loc_sum + end function psb_d_nest_gedot + + ! Subroutine form: res = ||x||_2 (single global reduction) + subroutine psb_d_nest_genrm2s(res, xnest, descs, info, global) + real(psb_dpk_), intent(out) :: res + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: global + + res = psb_d_nest_genrm2(xnest, descs, info, global) + end subroutine psb_d_nest_genrm2s + + ! ||x||_inf = max_i ||x(i)||_inf (single global reduction) + function psb_d_nest_geamax(xnest, descs, info, global) result(res) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: global + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i + real(psb_dpk_) :: blk_val + logical :: global_ + type(psb_ctxt_type) :: ctxt + + global_ = .true. + if (present(global)) global_ = global + + info = 0 + res = dzero + do i = 1, xnest%nblocks + blk_val = psb_geamax(xnest%vects(i), descs%descs(i,i), info, global=.false.) + if (info /= 0) return + if (blk_val > res) res = blk_val + end do + + if (global_) then + ctxt = descs%descs(1,1)%get_context() + call psb_max(ctxt, res) + end if + end function psb_d_nest_geamax + + ! ||x||_1 = sum_i ||x(i)||_1 (single global reduction) + ! function psb_d_nest_geasum(xnest, descs, info, global) result(res) + ! type(psb_d_nest_vect_type), intent(inout) :: xnest + ! type(psb_desc_nest_type), intent(in) :: descs + ! integer(psb_ipk_), intent(out) :: info + ! logical, optional, intent(in) :: global + ! real(psb_dpk_) :: res + + ! integer(psb_ipk_) :: i + ! real(psb_dpk_) :: blk_val + ! logical :: global_ + ! type(psb_ctxt_type) :: ctxt + + ! global_ = .true. + ! if (present(global)) global_ = global + + ! info = 0 + ! res = dzero + ! do i = 1, xnest%nblocks + ! blk_val = psb_geasum(xnest%vects(i), descs%descs(i,i), info, global=.false.) + ! if (info /= 0) return + ! res = res + blk_val + ! end do + + ! if (global_) then + ! ctxt = descs%descs(1,1)%get_context() + ! call psb_sum(ctxt, res) + ! end if + ! end function psb_d_nest_geasum + + ! ||x||_1 = sum_i ||x(i)||_1 + function psb_d_nest_geasum(xnest, descs, info, global) result(res) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: global + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: nloc + real(psb_dpk_) :: blk_val + real(psb_dpk_), allocatable :: blk_vals(:) + logical :: global_ + type(psb_ctxt_type) :: ctxt + + global_ = .true. + if (present(global)) global_ = global + + info = 0 + res = dzero + do i = 1, xnest%nblocks + nloc = descs%descs(i,i)%get_local_rows() + blk_vals = xnest%vects(i)%get_vect(nloc) + if (size(blk_vals) > 0) then + blk_val = sum(abs(blk_vals)) + else + blk_val = dzero + end if + res = res + blk_val + end do + + if (global_) then + ctxt = descs%descs(1,1)%get_context() + call psb_sum(ctxt, res) + end if + end function psb_d_nest_geasum + + ! min(x) = min_i min(x(i)) + function psb_d_nest_gemin(xnest, descs, info, global) result(res) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: global + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i + real(psb_dpk_) :: blk_val + logical :: global_ + type(psb_ctxt_type) :: ctxt + + global_ = .true. + if (present(global)) global_ = global + + info = 0 + res = huge(dzero) + do i = 1, xnest%nblocks + blk_val = psb_gemin(xnest%vects(i), descs%descs(i,i), info, global=.false.) + if (info /= 0) return + if (blk_val < res) res = blk_val + end do + + if (global_) then + ctxt = descs%descs(1,1)%get_context() + call psb_min(ctxt, res) + end if + end function psb_d_nest_gemin + + ! min(x/y) = min_i min(x(i)/y(i)) (single global reduction) + function psb_d_nest_minquotient(xnest, ynest, descs, info, global) result(res) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: ynest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: global + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i + real(psb_dpk_) :: blk_val + logical :: global_ + type(psb_ctxt_type) :: ctxt + + global_ = .true. + if (present(global)) global_ = global + + info = 0 + res = huge(dzero) + do i = 1, xnest%nblocks + blk_val = psb_minquotient(xnest%vects(i), ynest%vects(i), & + & descs%descs(i,i), info, global=.false.) + if (info /= 0) return + if (blk_val < res) res = blk_val + end do + + if (global_) then + ctxt = descs%descs(1,1)%get_context() + call psb_min(ctxt, res) + end if + end function psb_d_nest_minquotient + + ! y(i) = x(i) .* y(i) element-wise for each block i + subroutine psb_d_nest_gemlt(xnest, ynest, descs, info) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: ynest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + do i = 1, xnest%nblocks + call psb_gemlt(xnest%vects(i), ynest%vects(i), descs%descs(i,i), info) + if (info /= 0) return + end do + end subroutine psb_d_nest_gemlt + + ! y(i) = x(i) ./ y(i) element-wise for each block i + subroutine psb_d_nest_gediv(xnest, ynest, descs, info) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: ynest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + do i = 1, xnest%nblocks + call psb_gediv(xnest%vects(i), ynest%vects(i), descs%descs(i,i), info) + if (info /= 0) return + end do + end subroutine psb_d_nest_gediv + + ! y(i) = 1/x(i) element-wise for each block i + subroutine psb_d_nest_geinv(xnest, ynest, descs, info) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: ynest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + do i = 1, xnest%nblocks + call psb_geinv(xnest%vects(i), ynest%vects(i), descs%descs(i,i), info) + if (info /= 0) return + end do + end subroutine psb_d_nest_geinv + + ! y(i) = |x(i)| element-wise for each block i + subroutine psb_d_nest_geabs(xnest, ynest, descs, info) + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: ynest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + do i = 1, xnest%nblocks + call psb_geabs(xnest%vects(i), ynest%vects(i), descs%descs(i,i), info) + if (info /= 0) return + end do + end subroutine psb_d_nest_geabs + + ! z(i) = x(i) + b for each block i (b is a scalar) + subroutine psb_d_nest_geaddconst(xnest, b, znest, descs, info) + type(psb_d_nest_vect_type), intent(inout) :: xnest + real(psb_dpk_), intent(in) :: b + type(psb_d_nest_vect_type), intent(inout) :: znest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + do i = 1, xnest%nblocks + call psb_geaddconst(xnest%vects(i), b, znest%vects(i), descs%descs(i,i), info) + if (info /= 0) return + end do + end subroutine psb_d_nest_geaddconst + + ! z(i) = cmp(x(i), c) for each block i (c is a scalar) + subroutine psb_d_nest_gecmp(xnest, c, znest, descs, info) + type(psb_d_nest_vect_type), intent(inout) :: xnest + real(psb_dpk_), intent(in) :: c + type(psb_d_nest_vect_type), intent(inout) :: znest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + do i = 1, xnest%nblocks + call psb_gecmp(xnest%vects(i), c, znest%vects(i), descs%descs(i,i), info) + if (info /= 0) return + end do + end subroutine psb_d_nest_gecmp + + subroutine psb_d_nest_mask(cnest, xnest, mnest, t, descs, info) + type(psb_d_nest_vect_type), intent(inout) :: cnest + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: mnest + logical, intent(out) :: t + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + logical :: t_dummy + real(psb_dpk_) :: mmax + + info = 0 + do i = 1, cnest%nblocks + ! psb_mask(c, x, m, t, desc) semantics after the double-swap in + ! d_vect_mask_v → d_base_mask_v → d_base_mask_a: + ! first arg → constraint-type selector in d_base_mask_a + ! second arg → value to test in d_base_mask_a + ! So pass xnest (constraint types) first, cnest (values) second. + call psb_mask(xnest%vects(i), cnest%vects(i), mnest%vects(i), & + & t_dummy, descs%descs(i,i), info) + if (info /= 0) return + end do + ! t = .true. iff no block has any violated entry (mnest=1). + mmax = psb_d_nest_geamax(mnest, descs, info) + t = (mmax < 0.5_psb_dpk_) + end subroutine psb_d_nest_mask + + + ! Applies psb_upd_xyz(alpha,beta,gamma,delta,x(i),y(i),z(i)) + ! for each block i. + subroutine psb_d_nest_upd_xyz(alpha, beta, gamma, delta, xnest, ynest, znest, descs, info) + real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: ynest + type(psb_d_nest_vect_type), intent(inout) :: znest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + do i = 1, xnest%nblocks + call psb_upd_xyz(alpha, beta, gamma, delta, & + & xnest%vects(i), ynest%vects(i), znest%vects(i), & + & descs%descs(i,i), info) + if (info /= 0) return + end do + end subroutine psb_d_nest_upd_xyz + + ! Block-diagonal triangular solve: applies psb_spsm to each + ! diagonal block (i,i) of tnest independently. + ! y(i) = alpha * T(i,i)^{-1} x(i) + beta * y(i) + subroutine psb_d_nest_spsm(alpha, tnest, xnest, beta, ynest, descs, info, & + & trans, scale, choice, work) + real(psb_dpk_), intent(in) :: alpha, beta + type(psb_d_nest_sparse_mat), intent(inout) :: tnest + type(psb_d_nest_vect_type), intent(inout) :: xnest + type(psb_d_nest_vect_type), intent(inout) :: ynest + type(psb_desc_nest_type), intent(in) :: descs + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans, scale + integer(psb_ipk_), optional, intent(in) :: choice + real(psb_dpk_), optional, intent(inout), target :: work(:) + + integer(psb_ipk_) :: i + + info = 0 + do i = 1, tnest%nrblocks + if (.not. tnest%has_block(i, i)) then + ! No diagonal block: treat as identity => y(i) = alpha*x(i) + beta*y(i) + call psb_geaxpby(alpha, xnest%vects(i), beta, ynest%vects(i), & + & descs%descs(i,i), info) + else + call psb_spsm(alpha, tnest%mats(i,i), xnest%vects(i), beta, ynest%vects(i), & + & descs%descs(i,i), info, trans=trans, scale=scale, & + & choice=choice, work=work) + end if + if (info /= 0) return + end do + end subroutine psb_d_nest_spsm + +end module psb_d_nest_psblas_mod diff --git a/base/modules/serial/psb_d_nest_mat_mod.f90 b/base/modules/serial/psb_d_nest_mat_mod.f90 new file mode 100644 index 000000000..bfcb0c966 --- /dev/null +++ b/base/modules/serial/psb_d_nest_mat_mod.f90 @@ -0,0 +1,149 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific without permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! module: psb_d_nest_mat_mod +! +! Defines psb_d_nest_sparse_mat: a block-structured distributed sparse +! matrix for double precision real arithmetic. +! +! The matrix is stored as a 2-D array of psb_dspmat_type sub-matrices. +! A companion logical array blk_present(i,j) flags which blocks are +! non-null (absent blocks contribute zero to any product). +! +! Descriptor convention (current nested design) +! --------------------------------------------- +! Each matrix block (i,j) is associated with descs(i,j) from the +! corresponding psb_desc_nest_type. Nested tools (psb_spall_nest, +! psb_spins_nest, psb_spasb_nest, psb_spmm) consistently pass +! descs(i,j) together with mats(i,j). +! +! A block may be structurally absent (NULL/zero): this is represented by +! blk_present(i,j)=.false. and mats(i,j) left unbuilt. In that case the +! block contributes zero and is skipped by nested kernels. +! +! Descriptor storage is distinct from matrix presence: descriptors are +! typically defined for all block positions in descs(:,:), while actual +! matrix blocks may be present only on a subset. +! +! Reference examples in test/pdegen: +! * psb_d_pde_nest.full.F90 (A(2,2) left NULL, blk_present(2,2)=.false.) +! * psb_d_nest_tools.F90 and psb_d_pde_nest_full_tools.F90 +! (2-D desc_nest%descs(i,j) used in nested allocation/assembly). +! +module psb_d_nest_mat_mod + use psb_d_mat_mod + implicit none + + type :: psb_d_nest_sparse_mat + integer(psb_ipk_) :: nrblocks = 0 + integer(psb_ipk_) :: ncblocks = 0 + type(psb_dspmat_type), allocatable :: mats(:,:) + logical, allocatable :: blk_present(:,:) + contains + procedure :: get_nrblocks => psb_d_nest_mat_get_nrb + procedure :: get_ncblocks => psb_d_nest_mat_get_ncb + procedure :: has_block => psb_d_nest_mat_has_block + procedure :: sizeof => psb_d_nest_mat_sizeof + procedure :: free => psb_d_nest_mat_free + end type psb_d_nest_sparse_mat + +contains + + ! get_nrblocks / get_ncblocks + function psb_d_nest_mat_get_nrb(a) result(n) + class(psb_d_nest_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: n + n = a%nrblocks + end function psb_d_nest_mat_get_nrb + + function psb_d_nest_mat_get_ncb(a) result(n) + class(psb_d_nest_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: n + n = a%ncblocks + end function psb_d_nest_mat_get_ncb + + ! has_block: return .true. if block (i,j) is non-null + function psb_d_nest_mat_has_block(a, i, j) result(hp) + class(psb_d_nest_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: i, j + logical :: hp + + hp = .false. + if (i < 1 .or. i > a%nrblocks) return + if (j < 1 .or. j > a%ncblocks) return + if (.not. allocated(a%blk_present)) return + hp = a%blk_present(i, j) + end function psb_d_nest_mat_has_block + + ! sizeof: total storage across all allocated sub-matrices + function psb_d_nest_mat_sizeof(a) result(s) + class(psb_d_nest_sparse_mat), intent(in) :: a + integer(psb_epk_) :: s + integer(psb_ipk_) :: i, j + + s = 0_psb_epk_ + if (allocated(a%mats)) then + do j = 1, a%ncblocks + do i = 1, a%nrblocks + if (a%blk_present(i, j)) s = s + a%mats(i, j)%sizeof() + end do + end do + end if + end function psb_d_nest_mat_sizeof + + ! free: release all sub-matrices + subroutine psb_d_nest_mat_free(a, info) + class(psb_d_nest_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, linfo + + info = 0 + if (allocated(a%mats)) then + do j = 1, a%ncblocks + do i = 1, a%nrblocks + if (a%blk_present(i, j)) then + call a%mats(i, j)%free() + end if + end do + end do + deallocate(a%mats, stat=linfo) + if (linfo /= 0 .and. info == 0) info = linfo + end if + if (allocated(a%blk_present)) then + deallocate(a%blk_present, stat=linfo) + if (linfo /= 0 .and. info == 0) info = linfo + end if + a%nrblocks = 0 + a%ncblocks = 0 + end subroutine psb_d_nest_mat_free + +end module psb_d_nest_mat_mod diff --git a/base/modules/serial/psb_d_nest_vect_mod.f90 b/base/modules/serial/psb_d_nest_vect_mod.f90 new file mode 100644 index 000000000..c90693607 --- /dev/null +++ b/base/modules/serial/psb_d_nest_vect_mod.f90 @@ -0,0 +1,109 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! module: psb_d_nest_vect_mod +! +! Defines psb_d_nest_vect_type: a block-structured distributed dense +! vector for double precision real arithmetic. Each sub-vector is a +! standard psb_d_vect_type assembled under its own descriptor. +! +! Parallel BLAS operations (nrm2, dot, axpby) are exposed as module +! subroutines/functions in psb_d_nest_psblas_mod so that they can +! exploit a single global reduction per call. +! +module psb_d_nest_vect_mod + use psb_d_vect_mod + use psb_desc_mod + implicit none + + type :: psb_d_nest_vect_type + integer(psb_ipk_) :: nblocks = 0 + type(psb_d_vect_type), allocatable :: vects(:) + contains + procedure :: get_nblocks => psb_d_nest_vect_get_nblocks + procedure :: zero => psb_d_nest_vect_zero + procedure :: sizeof => psb_d_nest_vect_sizeof + procedure :: free => psb_d_nest_vect_free + end type psb_d_nest_vect_type + +contains + + ! get_nblocks + function psb_d_nest_vect_get_nblocks(x) result(nb) + class(psb_d_nest_vect_type), intent(in) :: x + integer(psb_ipk_) :: nb + nb = x%nblocks + end function psb_d_nest_vect_get_nblocks + + ! zero: set all sub-vectors to zero (local, no halo zeroing needed) + subroutine psb_d_nest_vect_zero(x) + class(psb_d_nest_vect_type), intent(inout) :: x + integer(psb_ipk_) :: i + if (allocated(x%vects)) then + do i = 1, x%nblocks + call x%vects(i)%zero() + end do + end if + end subroutine psb_d_nest_vect_zero + + ! sizeof: total bytes across all sub-vectors + function psb_d_nest_vect_sizeof(x) result(s) + class(psb_d_nest_vect_type), intent(in) :: x + integer(psb_epk_) :: s + integer(psb_ipk_) :: i + s = 0_psb_epk_ + if (allocated(x%vects)) then + do i = 1, x%nblocks + s = s + x%vects(i)%sizeof() + end do + end if + end function psb_d_nest_vect_sizeof + + ! free: release all sub-vectors + subroutine psb_d_nest_vect_free(x, info) + class(psb_d_nest_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, linfo + + info = 0 + if (allocated(x%vects)) then + do i = 1, x%nblocks + call x%vects(i)%free(linfo) + if (linfo /= 0 .and. info == 0) info = linfo + end do + deallocate(x%vects, stat=linfo) + if (linfo /= 0 .and. info == 0) info = linfo + end if + x%nblocks = 0 + end subroutine psb_d_nest_vect_free + +end module psb_d_nest_vect_mod diff --git a/base/modules/tools/psb_cd_nest_tools_mod.F90 b/base/modules/tools/psb_cd_nest_tools_mod.F90 new file mode 100644 index 000000000..78454ec32 --- /dev/null +++ b/base/modules/tools/psb_cd_nest_tools_mod.F90 @@ -0,0 +1,457 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! Nested-specific assembly wrappers for PSBLAS3 — descriptor routines +! + +module psb_cd_nest_tools_mod + use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_success_, psb_err_alloc_dealloc_, & + psb_err_invalid_input_, psb_err_no_optional_arg_, psb_err_from_subroutine_, & + psb_ctxt_type + use psb_error_mod, only : psb_errpush + use psb_cd_tools_mod, only : psb_cdall, psb_cdasb, psb_cdins, psb_cdcpy, psb_cdprt + use psb_desc_nest_mod, only : psb_desc_nest_type + implicit none + + private + + public :: psb_cdall_nest, psb_cdins_nest, psb_cdins_nest_rc, & + psb_cdasb_nest, psb_cdfree_nest, psb_cdcpy_nest, psb_cdprt_nest + + ! Column-only form: (blk_j, nz, ja, desc_nest, info [,mask, lidx]) + ! Row+column form: (blk_i, blk_j, nz, ia, ja, desc_nest, info) + interface psb_cdins_nest +#if defined(PSB_IPK4) && defined(PSB_LPK8) + module procedure psb_cdins_nest_c + module procedure psb_cdins_nest_rc_sub +#endif + module procedure psb_lcdins_nest_c + module procedure psb_lcdins_nest_rc + end interface + + ! Row+column form: (blk_i, blk_j, nz, ia, ja, desc_nest, info) + interface psb_cdins_nest_rc +#if defined(PSB_IPK4) && defined(PSB_LPK8) + module procedure psb_cdins_nest_rc_sub +#endif + module procedure psb_lcdins_nest_rc + end interface + +contains + + ! Allocates the nested descriptor structure and creates block + ! descriptors. The first block of each row uses psb_cdall with + ! the given local row count; subsequent blocks in the same row + ! are clones of the first block (same row distribution). + ! + ! Arguments: + ! ctxt - PSBLAS context + ! desc_nest - nested descriptor (output) + ! info - error code (output) + ! nrblocks - number of block rows (optional, default 2) + ! ncblocks - number of block columns (optional, default 2) + ! nl - local row count per process (required for first blocks) + + subroutine psb_cdall_nest(ctxt, desc_nest, info, nrblocks, ncblocks, nl) + type(psb_ctxt_type), intent(in) :: ctxt + type(psb_desc_nest_type), intent(out) :: desc_nest + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nrblocks, ncblocks, nl + + integer(psb_ipk_) :: i, j, nr, nc, nl_ + character(len=20) :: name + + info = psb_success_ + name = 'psb_cdall_nest' + + ! Set default dimensions + nr = 2 + nc = 2 + if (present(nrblocks)) nr = nrblocks + if (present(ncblocks)) nc = ncblocks + + if (.not. present(nl)) then + info = psb_err_no_optional_arg_ + call psb_errpush(info, name, a_err='nl (local row count)') + return + end if + nl_ = nl + + ! Allocate nested descriptor structure + desc_nest%nrblocks = nr + desc_nest%ncblocks = nc + allocate(desc_nest%descs(nr, nc), stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + return + end if + + ! Build descriptors: each block gets its own independent psb_cdall. + ! Cloning a build-state descriptor shares its base_desc pointer; when + ! psb_cdasb_nest assembles both the original and the clone the shared + ! base_desc is rebuilt twice, corrupting the global-to-local mapping of + ! every block in that row. Independent allocations avoid this entirely. + do i = 1, nr + do j = 1, nc + call psb_cdall(ctxt, desc_nest%descs(i, j), info, nl=nl_) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_, name) + return + end if + end do + end do + + end subroutine psb_cdall_nest + + +#if defined(PSB_IPK4) && defined(PSB_LPK8) + ! psb_cdins_nest_rc_sub: row+col form, ipk_ nz — only when ipk_ /= lpk_ + subroutine psb_cdins_nest_rc_sub(blk_i, blk_j, nz, ia, ja, desc_nest, info) + integer(psb_ipk_), intent(in) :: blk_i, blk_j, nz + integer(psb_lpk_), intent(in) :: ia(:), ja(:) + type(psb_desc_nest_type), intent(inout) :: desc_nest + integer(psb_ipk_), intent(out) :: info + + character(len=20) :: name + + info = psb_success_ + name = 'psb_cdins_nest' + + if (nz == 0) return + + if (blk_i < 1 .or. blk_i > desc_nest%nrblocks .or. & + blk_j < 1 .or. blk_j > desc_nest%ncblocks) then + info = psb_err_invalid_input_ + call psb_errpush(info, name, a_err='invalid block indices') + return + end if + + call psb_cdins(nz, ia, ja, desc_nest%descs(blk_i, blk_j), info) + if (info /= psb_success_) & + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins') + + end subroutine psb_cdins_nest_rc_sub + + + ! psb_cdins_nest_c: col-only form, ipk_ nz — only when ipk_ /= lpk_ + subroutine psb_cdins_nest_c(blk_j, nz, ja, desc_nest, info, mask, lidx) + integer(psb_ipk_), intent(in) :: blk_j, nz + integer(psb_lpk_), intent(in) :: ja(:) + type(psb_desc_nest_type), intent(inout) :: desc_nest + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional, target :: mask(:) + integer(psb_ipk_), intent(in), optional :: lidx(:) + + integer(psb_ipk_) :: i, linfo + character(len=20) :: name + + info = psb_success_ + name = 'psb_cdins_nest' + + if (nz == 0) return + + if (blk_j < 1 .or. blk_j > desc_nest%ncblocks) then + info = psb_err_invalid_input_ + call psb_errpush(info, name, a_err='invalid block column index') + return + end if + + do i = 1, desc_nest%nrblocks + linfo = psb_success_ + if (present(mask)) then + if (present(lidx)) then + call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, mask=mask, lidx=lidx) + else + call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, mask=mask) + end if + else + if (present(lidx)) then + call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, lidx=lidx) + else + call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo) + end if + end if + if (linfo /= psb_success_ .and. info == psb_success_) then + info = linfo + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins') + end if + end do + + end subroutine psb_cdins_nest_c +#endif + + + ! psb_lcdins_nest_rc: row+col form, lpk_ nz + ! + ! When entries in block (blk_i, blk_j) reference columns owned by other + ! processes, use the col-only form afterwards to broadcast those column + ! indices across all row-blocks in block-col blk_j. + subroutine psb_lcdins_nest_rc(blk_i, blk_j, nz, ia, ja, desc_nest, info) + integer(psb_ipk_), intent(in) :: blk_i, blk_j + integer(psb_lpk_), intent(in) :: nz, ia(:), ja(:) + type(psb_desc_nest_type), intent(inout) :: desc_nest + integer(psb_ipk_), intent(out) :: info + + character(len=20) :: name + + info = psb_success_ + name = 'psb_cdins_nest' + + if (nz == 0) return + + if (blk_i < 1 .or. blk_i > desc_nest%nrblocks .or. & + blk_j < 1 .or. blk_j > desc_nest%ncblocks) then + info = psb_err_invalid_input_ + call psb_errpush(info, name, a_err='invalid block indices') + return + end if + + call psb_cdins(nz, ia, ja, desc_nest%descs(blk_i, blk_j), info) + if (info /= psb_success_) & + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins') + + end subroutine psb_lcdins_nest_rc + + + ! psb_lcdins_nest_c: col-only form, lpk_ nz + ! + ! Registers nz global column indices ja into the descriptor for + ! block column blk_j across all row-blocks (descs(i, blk_j) for + ! i = 1..nrblocks). mask and lidx are forwarded to psb_cdins. + subroutine psb_lcdins_nest_c(blk_j, nz, ja, desc_nest, info, mask, lidx) + integer(psb_ipk_), intent(in) :: blk_j + integer(psb_lpk_), intent(in) :: nz, ja(:) + type(psb_desc_nest_type), intent(inout) :: desc_nest + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional, target :: mask(:) + integer(psb_ipk_), intent(in), optional :: lidx(:) + + integer(psb_ipk_) :: i, linfo + character(len=20) :: name + + info = psb_success_ + name = 'psb_cdins_nest' + + if (nz == 0) return + + if (blk_j < 1 .or. blk_j > desc_nest%ncblocks) then + info = psb_err_invalid_input_ + call psb_errpush(info, name, a_err='invalid block column index') + return + end if + + do i = 1, desc_nest%nrblocks + linfo = psb_success_ + if (present(mask)) then + if (present(lidx)) then + call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, mask=mask, lidx=lidx) + else + call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, mask=mask) + end if + else + if (present(lidx)) then + call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo, lidx=lidx) + else + call psb_cdins(nz, ja, desc_nest%descs(i, blk_j), linfo) + end if + end if + if (linfo /= psb_success_ .and. info == psb_success_) then + info = linfo + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdins') + end if + end do + + end subroutine psb_lcdins_nest_c + + + ! psb_cdasb_nest: Finalize all nested descriptors + ! + ! Calls psb_cdasb on all block descriptors in the nested structure. + ! This must be called after all psb_cdins_nest calls and + ! before psb_spasb_nest. + ! + ! Arguments: + ! desc_nest - nested descriptor (input/output) + ! info - error code (output) + + subroutine psb_cdasb_nest(desc_nest, info) + type(psb_desc_nest_type), intent(inout) :: desc_nest + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j + character(len=20) :: name + + info = psb_success_ + name = 'psb_cdasb_nest' + + do i = 1, desc_nest%nrblocks + do j = 1, desc_nest%ncblocks + call psb_cdasb(desc_nest%descs(i, j), info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdasb') + return + end if + end do + end do + + end subroutine psb_cdasb_nest + + + ! psb_cdfree_nest: Free all nested descriptors + ! + ! Calls psb_cdfree on every block descriptor in the nested + ! structure, then deallocates the descriptor array and resets + ! nrblocks/ncblocks to 0. Mirrors what psb_cdfree does for a + ! single psb_desc_type. + ! + ! Arguments: + ! desc_nest - nested descriptor (input/output) + ! info - error code (output) + ! + subroutine psb_cdfree_nest(desc_nest, info) + type(psb_desc_nest_type), intent(inout) :: desc_nest + integer(psb_ipk_), intent(out) :: info + + character(len=20) :: name + + info = psb_success_ + name = 'psb_cdfree_nest' + + call desc_nest%free(info) + if (info /= psb_success_) & + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_desc_nest_free') + + end subroutine psb_cdfree_nest + + + ! psb_cdcpy_nest: Deep copy (clone) a nested descriptor + ! + ! Allocates desc_out and clones each block descriptor from desc_in + ! using psb_cdcpy, preserving the full row/column block structure. + ! + ! Arguments: + ! desc_in - source nested descriptor (inout — clone may need to read internal state) + ! desc_out - destination nested descriptor (output) + ! info - error code (output) + + subroutine psb_cdcpy_nest(desc_in, desc_out, info) + type(psb_desc_nest_type), intent(inout) :: desc_in + type(psb_desc_nest_type), intent(out) :: desc_out + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j + character(len=20) :: name + + info = psb_success_ + name = 'psb_cdcpy_nest' + + desc_out%nrblocks = desc_in%nrblocks + desc_out%ncblocks = desc_in%ncblocks + allocate(desc_out%descs(desc_in%nrblocks, desc_in%ncblocks), stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + return + end if + + do i = 1, desc_in%nrblocks + do j = 1, desc_in%ncblocks + call psb_cdcpy(desc_in%descs(i, j), desc_out%descs(i, j), info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_cdcpy') + return + end if + end do + end do + + end subroutine psb_cdcpy_nest + + + ! psb_cdprt_nest: Print all block descriptors (debugging) + ! + ! Loops over all (i,j) block descriptors in the nested structure + ! and calls psb_cdprt on each, prefixing the output with the block + ! index. All optional arguments are forwarded unchanged. + ! + ! Arguments: + ! iout - output unit + ! desc_nest - nested descriptor (input) + ! glob - passed to psb_cdprt (optional) + ! short - passed to psb_cdprt (optional) + ! verbosity - passed to psb_cdprt (optional) + + subroutine psb_cdprt_nest(iout, desc_nest, glob, short, verbosity) + integer(psb_ipk_), intent(in) :: iout + type(psb_desc_nest_type), intent(in) :: desc_nest + logical, intent(in), optional :: glob, short + integer(psb_ipk_), intent(in), optional :: verbosity + + integer(psb_ipk_) :: i, j + + do i = 1, desc_nest%nrblocks + do j = 1, desc_nest%ncblocks + write(iout, '(a,i0,a,i0,a)') 'Block (', i, ',', j, '):' + if (present(glob)) then + if (present(short)) then + if (present(verbosity)) then + call psb_cdprt(iout, desc_nest%descs(i,j), glob=glob, short=short, verbosity=verbosity) + else + call psb_cdprt(iout, desc_nest%descs(i,j), glob=glob, short=short) + end if + else + if (present(verbosity)) then + call psb_cdprt(iout, desc_nest%descs(i,j), glob=glob, verbosity=verbosity) + else + call psb_cdprt(iout, desc_nest%descs(i,j), glob=glob) + end if + end if + else + if (present(short)) then + if (present(verbosity)) then + call psb_cdprt(iout, desc_nest%descs(i,j), short=short, verbosity=verbosity) + else + call psb_cdprt(iout, desc_nest%descs(i,j), short=short) + end if + else + if (present(verbosity)) then + call psb_cdprt(iout, desc_nest%descs(i,j), verbosity=verbosity) + else + call psb_cdprt(iout, desc_nest%descs(i,j)) + end if + end if + end if + end do + end do + + end subroutine psb_cdprt_nest + +end module psb_cd_nest_tools_mod diff --git a/base/modules/tools/psb_d_nest_tools_mod.F90 b/base/modules/tools/psb_d_nest_tools_mod.F90 new file mode 100644 index 000000000..687a46c2d --- /dev/null +++ b/base/modules/tools/psb_d_nest_tools_mod.F90 @@ -0,0 +1,433 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! Nested-specific assembly wrappers for PSBLAS3 — double precision matrix and vector routines +! + +module psb_d_nest_tools_mod + use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_dpk_, psb_success_, psb_err_alloc_dealloc_, & + psb_err_invalid_input_, psb_err_from_subroutine_, & + psb_dupl_add_, psb_dupl_ovwrt_, psb_dupl_err_, psb_ctxt_type + use psb_error_mod, only : psb_errpush + use psb_d_tools_mod, only : psb_spall, psb_spins, psb_spasb, psb_spfree, psb_sprn, & + psb_geall, psb_geins, psb_geasb, psb_gefree + use psb_desc_nest_mod, only : psb_desc_nest_type + use psb_d_nest_mat_mod, only : psb_d_nest_sparse_mat + use psb_d_nest_vect_mod, only : psb_d_nest_vect_type + implicit none + + private + + public :: psb_spall_nest, psb_spins_nest, psb_spasb_nest, psb_spfree_nest, psb_sprn_nest, & + psb_geall_nest, psb_geins_nest, psb_geasb_nest, psb_gefree_nest + +contains + + ! Allocates all (nrblocks x ncblocks) sparse matrix blocks + ! and marks all as present. psb_spins_nest lazy-allocates individual + ! blocks on first insertion; call psb_spall_nest instead when the + ! full block structure is known up front. + + subroutine psb_spall_nest(a_nest, desc_nest, info, nnz) + type(psb_d_nest_sparse_mat), intent(inout) :: a_nest + type(psb_desc_nest_type), intent(in) :: desc_nest + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nnz + + integer(psb_ipk_) :: i, j, linfo + character(len=20) :: name + + info = psb_success_ + name = 'psb_spall_nest' + + a_nest%nrblocks = desc_nest%nrblocks + a_nest%ncblocks = desc_nest%ncblocks + + if (.not. allocated(a_nest%mats)) then + allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + return + end if + end if + + if (.not. allocated(a_nest%blk_present)) then + allocate(a_nest%blk_present(a_nest%nrblocks, a_nest%ncblocks), stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + return + end if + a_nest%blk_present = .false. + end if + + do i = 1, a_nest%nrblocks + do j = 1, a_nest%ncblocks + linfo = psb_success_ + if (present(nnz)) then + call psb_spall(a_nest%mats(i, j), desc_nest%descs(i, j), linfo, nnz=nnz) + else + call psb_spall(a_nest%mats(i, j), desc_nest%descs(i, j), linfo) + end if + if (linfo /= psb_success_) then + info = linfo + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall') + return + end if + a_nest%blk_present(i, j) = .true. + end do + end do + + end subroutine psb_spall_nest + + + ! Inserts nz entries into block (blk_i, blk_j) of the nested matrix. + ! The block is lazy-allocated on first insertion if psb_spall_nest + ! was not called first. + + subroutine psb_spins_nest(blk_i, blk_j, nz, ia, ja, val, a_nest, desc_nest, info) + integer(psb_ipk_), intent(in) :: blk_i, blk_j, nz + integer(psb_lpk_), intent(in) :: ia(:), ja(:) + real(psb_dpk_), intent(in) :: val(:) + type(psb_d_nest_sparse_mat), intent(inout) :: a_nest + type(psb_desc_nest_type), intent(inout) :: desc_nest + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnz_est + character(len=20) :: name + + info = psb_success_ + name = 'psb_spins_nest' + + if (nz == 0) return + + if (blk_i < 1 .or. blk_i > a_nest%nrblocks .or. & + blk_j < 1 .or. blk_j > a_nest%ncblocks) then + info = psb_err_invalid_input_ + call psb_errpush(info, name, a_err='invalid block indices') + return + end if + + if (.not. allocated(a_nest%mats)) then + allocate(a_nest%mats(a_nest%nrblocks, a_nest%ncblocks), stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + return + end if + allocate(a_nest%blk_present(a_nest%nrblocks, a_nest%ncblocks), stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + return + end if + a_nest%blk_present = .false. + end if + + if (.not. a_nest%blk_present(blk_i, blk_j)) then + ! Estimate nnz: use nz + 50% buffer for future insertions + nnz_est = max(nz, 10) + nz / 2 + call psb_spall(a_nest%mats(blk_i, blk_j), & + desc_nest%descs(blk_i, blk_j), info, nnz=nnz_est) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spall') + return + end if + a_nest%blk_present(blk_i, blk_j) = .true. + end if + + call psb_spins(nz, ia, ja, val, a_nest%mats(blk_i, blk_j), & + desc_nest%descs(blk_i, blk_j), info) + if (info /= psb_success_) & + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spins') + + end subroutine psb_spins_nest + + ! Calls psb_spasb on all present block matrices. + ! Must be called after psb_cdasb_nest. + + subroutine psb_spasb_nest(a_nest, desc_nest, info, dupl) + type(psb_d_nest_sparse_mat), intent(inout) :: a_nest + type(psb_desc_nest_type), intent(inout) :: desc_nest + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: dupl + + integer(psb_ipk_) :: i, j, dupl_, linfo + character(len=20) :: name + + info = psb_success_ + name = 'psb_spasb_nest' + dupl_ = psb_dupl_add_ + if (present(dupl)) dupl_ = dupl + + do i = 1, a_nest%nrblocks + do j = 1, a_nest%ncblocks + if (a_nest%blk_present(i, j)) then + linfo = psb_success_ + if (dupl_ == psb_dupl_add_) then + call psb_spasb(a_nest%mats(i, j), desc_nest%descs(i, j), linfo, & + dupl=psb_dupl_add_) + else if (dupl_ == psb_dupl_ovwrt_) then + call psb_spasb(a_nest%mats(i, j), desc_nest%descs(i, j), linfo, & + dupl=psb_dupl_ovwrt_) + else if (dupl_ == psb_dupl_err_) then + call psb_spasb(a_nest%mats(i, j), desc_nest%descs(i, j), linfo, & + dupl=psb_dupl_err_) + else + call psb_spasb(a_nest%mats(i, j), desc_nest%descs(i, j), linfo) + end if + if (linfo /= psb_success_) then + info = linfo + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spasb') + return + end if + end if + end do + end do + + end subroutine psb_spasb_nest + + ! Calls psb_spfree on every present block, then deallocates the + ! mats and blk_present arrays and resets nrblocks/ncblocks to 0. + + subroutine psb_spfree_nest(a_nest, desc_nest, info) + type(psb_d_nest_sparse_mat), intent(inout) :: a_nest + type(psb_desc_nest_type), intent(in) :: desc_nest + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, linfo + character(len=20) :: name + + info = psb_success_ + name = 'psb_spfree_nest' + + if (allocated(a_nest%mats)) then + do i = 1, a_nest%nrblocks + do j = 1, a_nest%ncblocks + if (allocated(a_nest%blk_present)) then + if (a_nest%blk_present(i, j)) then + linfo = psb_success_ + call psb_spfree(a_nest%mats(i, j), desc_nest%descs(i, j), linfo) + if (linfo /= psb_success_ .and. info == psb_success_) then + info = linfo + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_spfree') + end if + end if + end if + end do + end do + deallocate(a_nest%mats, stat=linfo) + if (linfo /= 0 .and. info == psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + end if + end if + + if (allocated(a_nest%blk_present)) then + deallocate(a_nest%blk_present, stat=linfo) + if (linfo /= 0 .and. info == psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + end if + end if + + a_nest%nrblocks = 0 + a_nest%ncblocks = 0 + + end subroutine psb_spfree_nest + + ! Calls psb_sprn on every present block matrix, resetting it to + ! the build state while preserving the sparsity pattern. + + subroutine psb_sprn_nest(a_nest, desc_nest, info, clear) + type(psb_d_nest_sparse_mat), intent(inout) :: a_nest + type(psb_desc_nest_type), intent(in) :: desc_nest + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: clear + + integer(psb_ipk_) :: i, j, linfo + character(len=20) :: name + + info = psb_success_ + name = 'psb_sprn_nest' + + if (.not. allocated(a_nest%mats) .or. .not. allocated(a_nest%blk_present)) return + + do i = 1, a_nest%nrblocks + do j = 1, a_nest%ncblocks + if (a_nest%blk_present(i, j)) then + linfo = psb_success_ + if (present(clear)) then + call psb_sprn(a_nest%mats(i, j), desc_nest%descs(i, j), linfo, clear=clear) + else + call psb_sprn(a_nest%mats(i, j), desc_nest%descs(i, j), linfo) + end if + if (linfo /= psb_success_ .and. info == psb_success_) then + info = linfo + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_sprn') + end if + end if + end do + end do + + end subroutine psb_sprn_nest + + + ! Allocates one sub-vector per block-row, using descs(i, 1) as + ! the row descriptor for block i. Must be called before psb_geins_nest. + + subroutine psb_geall_nest(x_nest, desc_nest, info) + type(psb_d_nest_vect_type), intent(out) :: x_nest + type(psb_desc_nest_type), intent(in) :: desc_nest + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, linfo + character(len=20) :: name + + info = psb_success_ + name = 'psb_geall_nest' + + x_nest%nblocks = desc_nest%nrblocks + allocate(x_nest%vects(x_nest%nblocks), stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + return + end if + + do i = 1, x_nest%nblocks + linfo = psb_success_ + call psb_geall(x_nest%vects(i), desc_nest%descs(i, 1), linfo) + if (linfo /= psb_success_) then + info = linfo + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_geall') + return + end if + end do + + end subroutine psb_geall_nest + + ! Inserts m entries into block blk_i of the nested vector. + + subroutine psb_geins_nest(blk_i, m, irw, val, x_nest, desc_nest, info, local) + integer(psb_ipk_), intent(in) :: blk_i, m + integer(psb_lpk_), intent(in) :: irw(:) + real(psb_dpk_), intent(in) :: val(:) + type(psb_d_nest_vect_type), intent(inout) :: x_nest + type(psb_desc_nest_type), intent(in) :: desc_nest + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: local + + character(len=20) :: name + + info = psb_success_ + name = 'psb_geins_nest' + + if (m == 0) return + + if (blk_i < 1 .or. blk_i > x_nest%nblocks) then + info = psb_err_invalid_input_ + call psb_errpush(info, name, a_err='invalid block index') + return + end if + + if (present(local)) then + call psb_geins(m, irw, val, x_nest%vects(blk_i), desc_nest%descs(blk_i, 1), info, & + local=local) + else + call psb_geins(m, irw, val, x_nest%vects(blk_i), desc_nest%descs(blk_i, 1), info) + end if + if (info /= psb_success_) & + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_geins') + + end subroutine psb_geins_nest + + ! Calls psb_geasb on every sub-vector. + ! Must be called after psb_cdasb_nest and all psb_geins_nest calls. + + subroutine psb_geasb_nest(x_nest, desc_nest, info) + type(psb_d_nest_vect_type), intent(inout) :: x_nest + type(psb_desc_nest_type), intent(in) :: desc_nest + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, linfo + character(len=20) :: name + + info = psb_success_ + name = 'psb_geasb_nest' + + do i = 1, x_nest%nblocks + linfo = psb_success_ + call psb_geasb(x_nest%vects(i), desc_nest%descs(i, 1), linfo) + if (linfo /= psb_success_ .and. info == psb_success_) then + info = linfo + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_geasb') + end if + end do + + end subroutine psb_geasb_nest + + ! Calls psb_gefree on every sub-vector, then deallocates the + ! vects array and resets nblocks to 0. + + subroutine psb_gefree_nest(x_nest, desc_nest, info) + type(psb_d_nest_vect_type), intent(inout) :: x_nest + type(psb_desc_nest_type), intent(in) :: desc_nest + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, linfo + character(len=20) :: name + + info = psb_success_ + name = 'psb_gefree_nest' + + if (allocated(x_nest%vects)) then + do i = 1, x_nest%nblocks + linfo = psb_success_ + call psb_gefree(x_nest%vects(i), desc_nest%descs(i, 1), linfo) + if (linfo /= psb_success_ .and. info == psb_success_) then + info = linfo + call psb_errpush(psb_err_from_subroutine_, name, a_err='psb_gefree') + end if + end do + deallocate(x_nest%vects, stat=linfo) + if (linfo /= 0 .and. info == psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + end if + end if + + x_nest%nblocks = 0 + + end subroutine psb_gefree_nest + +end module psb_d_nest_tools_mod diff --git a/base/modules/tools/psb_tools_mod.f90 b/base/modules/tools/psb_tools_mod.f90 index ca2020ee7..8ac664feb 100644 --- a/base/modules/tools/psb_tools_mod.f90 +++ b/base/modules/tools/psb_tools_mod.f90 @@ -44,4 +44,6 @@ module psb_tools_mod use psb_d_tools_mod use psb_c_tools_mod use psb_z_tools_mod + use psb_cd_nest_tools_mod + use psb_d_nest_tools_mod end module psb_tools_mod diff --git a/test/pdegen/CMakeLists.txt b/test/pdegen/CMakeLists.txt index 58d586e3d..7c7e891bf 100644 --- a/test/pdegen/CMakeLists.txt +++ b/test/pdegen/CMakeLists.txt @@ -15,6 +15,10 @@ set(LIBDIR "${INSTALLDIR}/lib") # Find the psblas package find_package(psblas REQUIRED PATHS ${INSTALLDIR}) +set(EXTLIBDIR "/home/fdurastante/psctoolkit/install/lib") + +link_directories(${EXTLIBDIR}) + # Include directories for the Fortran compiler include_directories(${INCDIR} ${MODDIR}) @@ -30,6 +34,9 @@ set(SOURCES_S_PDE3D psb_s_pde3d.F90) set(SOURCES_D_PDE2D psb_d_pde2d.F90) set(SOURCES_S_PDE2D psb_s_pde2d.F90) +set(SOURCES_D_PDE_NEST_PSBLAS psb_d_pde_nest_psblas.F90) +set(SOURCES_D_PDE_NEST_KRYLOV psb_d_pde_nest_krylov.F90) + # Create executables add_executable(psb_d_pde3d ${SOURCES_D_PDE3D}) target_link_libraries(psb_d_pde3d psblas::util psblas::linsolve psblas::prec psblas::base) @@ -43,8 +50,14 @@ target_link_libraries(psb_d_pde2d psblas::util psblas::linsolve psblas::prec psb add_executable(psb_s_pde2d ${SOURCES_S_PDE2D}) target_link_libraries(psb_s_pde2d psblas::util psblas::linsolve psblas::prec psblas::base) +add_executable(psb_d_pde_nest_psblas ${SOURCES_D_PDE_NEST_PSBLAS}) +target_link_libraries(psb_d_pde_nest_psblas psblas::util psblas::linsolve psblas::prec psblas::base) + +add_executable(psb_d_pde_nest_krylov ${SOURCES_D_PDE_NEST_KRYLOV}) +target_link_libraries(psb_d_pde_nest_krylov psblas::util psblas::linsolve psblas::prec psblas::base) + # Set output directory for executables -foreach(target psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d) +foreach(target psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d psb_dist1_pde2d psb_dist2_pde2d psb_dist3_pde2d psb_d_pde_nest_psblas psb_d_pde_nest_krylov) set_target_properties(${target} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${EXEDIR} ) diff --git a/test/pdegen/Makefile b/test/pdegen/Makefile index ecbda5d16..d084f4541 100644 --- a/test/pdegen/Makefile +++ b/test/pdegen/Makefile @@ -5,8 +5,10 @@ include $(INCDIR)/Make.inc.psblas # # Libraries used LIBDIR=$(INSTALLDIR)/lib -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base -LDLIBS=$(PSBLDLIBS) +# BLASLIBDIR=/opt/share/sdk/intel/nvidia_hpc_sdk/Linux_x86_64/24.3/compilers/lib +# GKLIBDIR=/home/jalmerol/GKlib/build/Linux-x86_64 +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base +LDLIBS= $(PSBLDLIBS) # # Compilers and such # @@ -16,7 +18,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). EXEDIR=./runs -all: runsd psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d +all: runsd psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d psb_d_pde_nest_psblas psb_d_pde_nest_krylov runsd: (if test ! -d runs ; then mkdir runs; fi) @@ -38,10 +40,17 @@ psb_s_pde2d: psb_s_pde2d.o $(FLINK) psb_s_pde2d.o -o psb_s_pde2d $(PSBLAS_LIB) $(LDLIBS) /bin/mv psb_s_pde2d $(EXEDIR) +psb_d_pde_nest_psblas: psb_d_pde_nest_psblas.o + $(FLINK) psb_d_pde_nest_psblas.o -o psb_d_pde_nest_psblas $(PSBLAS_LIB) $(LDLIBS) + /bin/mv psb_d_pde_nest_psblas $(EXEDIR) + +psb_d_pde_nest_krylov: psb_d_pde_nest_krylov.o + $(FLINK) psb_d_pde_nest_krylov.o -o psb_d_pde_nest_krylov $(PSBLAS_LIB) $(LDLIBS) + /bin/mv psb_d_pde_nest_krylov $(EXEDIR) clean: - /bin/rm -f psb_d_pde3d.o psb_d_oacc_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \ - $(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d + /bin/rm -f psb_d_pde3d.o psb_d_oacc_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o psb_d_pde_nest_psblas.o psb_d_pde_nest_krylov.o *$(.mod) \ + $(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d $(EXEDIR)/psb_d_pde_nest_psblas $(EXEDIR)/psb_d_pde_nest_krylov verycleanlib: (cd ../..; make veryclean) lib: diff --git a/test/pdegen/psb_d_pde_nest_psblas.F90 b/test/pdegen/psb_d_pde_nest_psblas.F90 new file mode 100644 index 000000000..28074cb02 --- /dev/null +++ b/test/pdegen/psb_d_pde_nest_psblas.F90 @@ -0,0 +1,672 @@ +! +! Test code for all subroutines in psb_d_nest_psblas_mod. +! Extends psb_d_pde_nest_first.F90: copies the same 2x2 saddle-point +! setup (n=10 global DOFs per block), then exercises every operation +! exported by psb_d_nest_psblas_mod. +! +! Tested subroutines +! ------------------ +! T01 psb_d_nest_spmm y = alpha*A*x + beta*y (block SpMV) +! T02 psb_d_nest_geaxpby y = alpha*x + beta*y +! T03 psb_d_nest_genrm2 ||x||_2 (function) +! T04 psb_d_nest_genrm2s ||x||_2 (subroutine) +! T05 psb_d_nest_gedot dot(x, y) +! T06 psb_d_nest_geamax ||x||_inf +! T07 psb_d_nest_geasum ||x||_1 +! T08 psb_d_nest_gemin min(x) +! T09 psb_d_nest_minquotient min(x/y) +! T10 psb_d_nest_gemlt y = y * x (element-wise) +! T11 psb_d_nest_gediv x = x / y (element-wise; result in x) +! T12 psb_d_nest_geinv y = 1/x (result in y) +! T13 psb_d_nest_geabs y = |x| (result in y) +! T14 psb_d_nest_geaddconst z = x + b (result in z) +! T15 psb_d_nest_gecmp z(i)=1 if |x(i)|>=c, 0 otherwise +! T16 psb_d_nest_mask mask operation; t=.true. if all satisfied +! T17 psb_d_nest_upd_xyz y=alpha*x+beta*y; z=gamma*y_new+delta*z +! +program psb_d_pde_nest_psblas + use psb_base_mod + use psb_desc_nest_mod + use psb_d_nest_mod + use psb_d_nest_tools_mod, only : psb_geall_nest, psb_geasb_nest, psb_gefree_nest, & + & psb_geins_nest, psb_spall_nest, psb_spins_nest, & + & psb_spasb_nest + implicit none + + !------------------------------------------------------------------ + ! Parallel context + !------------------------------------------------------------------ + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: iam, np, info + + !------------------------------------------------------------------ + ! Problem size + !------------------------------------------------------------------ + integer(psb_ipk_), parameter :: n = 100 + integer(psb_ipk_) :: nlr, iloc, i + integer(psb_lpk_) :: iglob + + !------------------------------------------------------------------ + ! Per-block descriptors (identical layout as psb_d_pde_nest_first) + !------------------------------------------------------------------ + type(psb_desc_type) :: desc1, desc2, desc3, desc4 + + !------------------------------------------------------------------ + ! Nested descriptor and nested sparse matrix + !------------------------------------------------------------------ + type(psb_desc_nest_type) :: descs + type(psb_d_nest_sparse_mat) :: anest + + !------------------------------------------------------------------ + ! Individual sparse matrices (A11 = Laplacian, A12 = I, A21 = I) + !------------------------------------------------------------------ + type(psb_dspmat_type) :: a11, a12, a21 + + !------------------------------------------------------------------ + ! Work nested vectors (xnest, ynest, znest reused across tests) + !------------------------------------------------------------------ + type(psb_d_nest_vect_type) :: xnest, ynest, znest + + !------------------------------------------------------------------ + ! Insert buffers + !------------------------------------------------------------------ + integer(psb_lpk_) :: grow(1) + real(psb_dpk_) :: gval(1) + + !------------------------------------------------------------------ + ! Scalar results and expected values + !------------------------------------------------------------------ + real(psb_dpk_) :: res, res2, expected + real(psb_dpk_), parameter :: tol = 1.0e-10_psb_dpk_ + logical :: t_mask + + !------------------------------------------------------------------ + ! Test pass/fail counter + !------------------------------------------------------------------ + integer(psb_ipk_) :: npass, nfail + + character(len=40) :: name = 'psb_d_pde_nest_psblas' + + !================================================================== + ! Initialise MPI + !================================================================== + call psb_init(ctxt) + call psb_info(ctxt, iam, np) + call psb_set_errverbosity(itwo) + + npass = 0 + nfail = 0 + + !================================================================== + ! Build per-block descriptors + ! Use exact block distribution so total rows = n exactly. + ! Ceiling division (n+np-1)/np gives nlr*np > n phantom rows + ! when np does not divide n evenly. + !================================================================== + nlr = n / np + if (iam < mod(n, np)) nlr = nlr + 1_psb_ipk_ + nlr = max(1_psb_ipk_, nlr) + call psb_cdall(ctxt, desc1, info, nl=nlr) + call psb_cdall(ctxt, desc2, info, nl=nlr) + call desc1%clone(desc3, info) + + !================================================================== + ! A(1,1) = tridiagonal Laplacian + !================================================================== + call psb_spall(a11, desc1, info, nnz=3*desc1%get_local_rows()) + do iloc = 1, desc1%get_local_rows() + call desc1%l2g(iloc, iglob, info) + grow(1)=iglob; gval(1)=2.0_psb_dpk_ + call psb_spins(1,grow,grow,gval,a11,desc1,info) + if (iglob>1) then + grow(1)=iglob; gval(1)=-1.0_psb_dpk_ + call psb_spins(1,grow,[iglob-1_psb_lpk_],gval,a11,desc1,info) + end if + if (iglob y1=2 (2 boundary rows) + ! rows 2..n-1 give Lap=0, I=1 => y1=1 (n-2 interior rows) + ! Block 2: I*ones = 1 for all n rows + ! amax=2, gemin=1, geasum = (n+2) + n = 2n+2 + res = psb_d_nest_geamax(ynest, descs, info) + call check('T01 spmm amax(y)=2', res, 2.0_psb_dpk_, tol, npass, nfail, iam) + res = psb_d_nest_gemin(ynest, descs, info) + call check('T01 spmm gemin(y)=1', res, done, tol, npass, nfail, iam) + res = psb_d_nest_geasum(ynest, descs, info) + expected = 2.0_psb_dpk_ * real(n, psb_dpk_) + 2.0_psb_dpk_ + call check('T01 spmm geasum(y)=2n+2', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T02: psb_d_nest_geaxpby + ! x = all 3s, y = all 2s + ! y = 2*x + (-1)*y => y = 2*3 - 2 = 4 (all 4s) + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T02: psb_d_nest_geaxpby (y = 2*x - y, x=3 y=2 => 4)' + + call set_nest_val(xnest, 3.0_psb_dpk_) + call set_nest_val(ynest, 2.0_psb_dpk_) + + call psb_d_nest_geaxpby(2.0_psb_dpk_, xnest, -done, ynest, descs, info) + if (info /= psb_success_) goto 9999 + + expected = 4.0_psb_dpk_ + res = psb_d_nest_geamax(ynest, descs, info) + call check('T02 geaxpby amax(y)=4', res, expected, tol, npass, nfail, iam) + res = psb_d_nest_gemin(ynest, descs, info) + call check('T02 geaxpby amin(y)=4', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T03: psb_d_nest_genrm2 + ! x = all 1s => ||x||_2 = sqrt(2*n) = sqrt(20) + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T03: psb_d_nest_genrm2 (x=1 => sqrt(2n))' + + call set_nest_val(xnest, done) + + res = psb_d_nest_genrm2(xnest, descs, info) + expected = sqrt(2.0_psb_dpk_ * real(n, psb_dpk_)) + call check('T03 genrm2(ones)', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T04: psb_d_nest_genrm2s (subroutine form; result must equal T03) + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T04: psb_d_nest_genrm2s (subroutine form of genrm2)' + + call psb_d_nest_genrm2s(res2, xnest, descs, info) + call check('T04 genrm2s == genrm2', res2, res, tol, npass, nfail, iam) + + !================================================================== + ! T05: psb_d_nest_gedot + ! x = all 1s, y = all 2s => dot = 2 * 2*n = 40 + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T05: psb_d_nest_gedot (x=1 y=2 => 2*2n=40)' + + call set_nest_val(xnest, done) + call set_nest_val(ynest, 2.0_psb_dpk_) + + res = psb_d_nest_gedot(xnest, ynest, descs, info) + expected = 2.0_psb_dpk_ * 2.0_psb_dpk_ * real(n, psb_dpk_) + call check('T05 gedot', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T06: psb_d_nest_geamax + ! x = all 5s => ||x||_inf = 5 + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T06: psb_d_nest_geamax (x=5 => 5)' + + call set_nest_val(xnest, 5.0_psb_dpk_) + + res = psb_d_nest_geamax(xnest, descs, info) + expected = 5.0_psb_dpk_ + call check('T06 geamax', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T07: psb_d_nest_geasum + ! x = all 1s => ||x||_1 = 2*n = 20 + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T07: psb_d_nest_geasum (x=1 => 2n=20)' + + call set_nest_val(xnest, done) + + res = psb_d_nest_geasum(xnest, descs, info) + expected = 2.0_psb_dpk_ * real(n, psb_dpk_) + call check('T07 geasum', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T08: psb_d_nest_gemin + ! x = all 7s => min = 7 + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T08: psb_d_nest_gemin (x=7 => 7)' + + call set_nest_val(xnest, 7.0_psb_dpk_) + + res = psb_d_nest_gemin(xnest, descs, info) + expected = 7.0_psb_dpk_ + call check('T08 gemin', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T09: psb_d_nest_minquotient + ! x = all 3s, y = all 6s => min(x/y) = 0.5 + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T09: psb_d_nest_minquotient (x=3 y=6 => 0.5)' + + call set_nest_val(xnest, 3.0_psb_dpk_) + call set_nest_val(ynest, 6.0_psb_dpk_) + + res = psb_d_nest_minquotient(xnest, ynest, descs, info) + expected = 0.5_psb_dpk_ + call check('T09 minquotient', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T10: psb_d_nest_gemlt + ! x = all 2s, y = all 4s => y = y * x = 8 (result in y) + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T10: psb_d_nest_gemlt (x=2 y=4 => y=8)' + + call set_nest_val(xnest, 2.0_psb_dpk_) + call set_nest_val(ynest, 4.0_psb_dpk_) + + call psb_d_nest_gemlt(xnest, ynest, descs, info) + if (info /= psb_success_) goto 9999 + + expected = 8.0_psb_dpk_ + res = psb_d_nest_geamax(ynest, descs, info) + call check('T10 gemlt amax(y)=8', res, expected, tol, npass, nfail, iam) + res = psb_d_nest_gemin(ynest, descs, info) + call check('T10 gemlt amin(y)=8', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T11: psb_d_nest_gediv + ! x = all 6s, y = all 3s => x = x / y = 2 (result in x) + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T11: psb_d_nest_gediv (x=6 y=3 => x=2)' + + call set_nest_val(xnest, 6.0_psb_dpk_) + call set_nest_val(ynest, 3.0_psb_dpk_) + + call psb_d_nest_gediv(xnest, ynest, descs, info) + if (info /= psb_success_) goto 9999 + + expected = 2.0_psb_dpk_ + res = psb_d_nest_geamax(xnest, descs, info) + call check('T11 gediv amax(x)=2', res, expected, tol, npass, nfail, iam) + res = psb_d_nest_gemin(xnest, descs, info) + call check('T11 gediv amin(x)=2', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T12: psb_d_nest_geinv + ! x = all 4s => y = 1/x = 0.25 (result in y) + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T12: psb_d_nest_geinv (x=4 => y=0.25)' + + call set_nest_val(xnest, 4.0_psb_dpk_) + + call psb_d_nest_geinv(xnest, ynest, descs, info) + if (info /= psb_success_) goto 9999 + + expected = 0.25_psb_dpk_ + res = psb_d_nest_geamax(ynest, descs, info) + call check('T12 geinv amax(y)=0.25', res, expected, tol, npass, nfail, iam) + res = psb_d_nest_gemin(ynest, descs, info) + call check('T12 geinv amin(y)=0.25', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T13: psb_d_nest_geabs + ! x = all -3s => y = |x| = 3 (result in y) + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T13: psb_d_nest_geabs (x=-3 => y=3)' + + call set_nest_val(xnest, -3.0_psb_dpk_) + + call psb_d_nest_geabs(xnest, ynest, descs, info) + if (info /= psb_success_) goto 9999 + + expected = 3.0_psb_dpk_ + res = psb_d_nest_geamax(ynest, descs, info) + call check('T13 geabs amax(y)=3', res, expected, tol, npass, nfail, iam) + res = psb_d_nest_gemin(ynest, descs, info) + call check('T13 geabs amin(y)=3', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T14: psb_d_nest_geaddconst + ! x = all 2s, b = 7.0 => z = x + 7 = 9 (result in z) + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T14: psb_d_nest_geaddconst (x=2 b=7 => z=9)' + + call set_nest_val(xnest, 2.0_psb_dpk_) + + call psb_d_nest_geaddconst(xnest, 7.0_psb_dpk_, znest, descs, info) + if (info /= psb_success_) goto 9999 + + expected = 9.0_psb_dpk_ + res = psb_d_nest_geamax(znest, descs, info) + call check('T14 geaddconst amax(z)=9', res, expected, tol, npass, nfail, iam) + res = psb_d_nest_gemin(znest, descs, info) + call check('T14 geaddconst amin(z)=9', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T15a: psb_d_nest_gecmp — entries satisfy threshold + ! x = all 3s, c = 2.0 => z(i)=1 (since |3| >= 2) + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T15a: psb_d_nest_gecmp (x=3 c=2 => z=1)' + + call set_nest_val(xnest, 3.0_psb_dpk_) + + call psb_d_nest_gecmp(xnest, 2.0_psb_dpk_, znest, descs, info) + if (info /= psb_success_) goto 9999 + + expected = done + res = psb_d_nest_geamax(znest, descs, info) + call check('T15a gecmp amax(z)=1', res, expected, tol, npass, nfail, iam) + res = psb_d_nest_gemin(znest, descs, info) + call check('T15a gecmp amin(z)=1', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T15b: psb_d_nest_gecmp — entries do not satisfy threshold + ! x = all 1s, c = 2.0 => z(i)=0 (since |1| < 2) + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T15b: psb_d_nest_gecmp (x=1 c=2 => z=0)' + + call set_nest_val(xnest, done) + + call psb_d_nest_gecmp(xnest, 2.0_psb_dpk_, znest, descs, info) + if (info /= psb_success_) goto 9999 + + expected = dzero + res = psb_d_nest_geamax(znest, descs, info) + call check('T15b gecmp amax(z)=0', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! T16: psb_d_nest_mask + ! Semantics: mask(c, x, m, t) + ! c = values to test (first arg) + ! x = constraint-type indicators (second arg): + ! 2 => satisfied if c(i) > 0 + ! 1 => satisfied if c(i) >= 0 + ! -1 => satisfied if c(i) <= 0 + ! -2 => satisfied if c(i) < 0 + ! m = output mask (0=satisfied, 1=violated) + ! t = .true. iff all entries satisfied + ! + ! Case: c = all +3 (positive), x = all 2 (check > 0) => m=0 t=T + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') 'T16: psb_d_nest_mask (c=3 x=2 => m=0 t=.true.)' + + call set_nest_val(xnest, 3.0_psb_dpk_) ! values to test + call set_nest_val(ynest, 2.0_psb_dpk_) ! constraint indicators (type 2: check > 0) + + call psb_d_nest_mask(xnest, ynest, znest, t_mask, descs, info) + if (info /= psb_success_) goto 9999 + + if (iam == 0) then + if (t_mask) then + write(*,'(a)') ' T16 mask: t=.true. PASS (all constraints satisfied)' + npass = npass + 1 + else + write(*,'(a)') ' T16 mask: t=.false. FAIL (expected .true.)' + nfail = nfail + 1 + end if + end if + + !------------------------------------------------------------------ + ! T16b: c = all -3 (negative), x = all 2 (check > 0) => m=1 t=F + !------------------------------------------------------------------ + if (iam == 0) write(*,'(a)') 'T16b: psb_d_nest_mask (c=-3 x=2 => m=1 t=.false.)' + + call set_nest_val(xnest, -3.0_psb_dpk_) ! values (negative) + call set_nest_val(ynest, 2.0_psb_dpk_) ! indicators (type 2: check > 0) + + call psb_d_nest_mask(xnest, ynest, znest, t_mask, descs, info) + if (info /= psb_success_) goto 9999 + + if (iam == 0) then + if (.not. t_mask) then + write(*,'(a)') ' T16b mask: t=.false. PASS (all constraints violated)' + npass = npass + 1 + else + write(*,'(a)') ' T16b mask: t=.true. FAIL (expected .false.)' + nfail = nfail + 1 + end if + end if + + !================================================================== + ! T17: psb_d_nest_upd_xyz + ! Computes: y_new = alpha*x + beta*y + ! z_new = gamma*y_new + delta*z + ! + ! x=1, y=2, z=3, alpha=2, beta=3, gamma=4, delta=5 + ! => y_new = 2*1 + 3*2 = 8 + ! => z_new = 4*8 + 5*3 = 47 + !================================================================== + if (iam == 0) write(*,'(/,a)') repeat('=',60) + if (iam == 0) write(*,'(a)') & + 'T17: psb_d_nest_upd_xyz (x=1 y=2 z=3 a=2 b=3 g=4 d=5 => y=8 z=47)' + + call set_nest_val(xnest, done) + call set_nest_val(ynest, 2.0_psb_dpk_) + call set_nest_val(znest, 3.0_psb_dpk_) + + call psb_d_nest_upd_xyz(2.0_psb_dpk_, 3.0_psb_dpk_, & + & 4.0_psb_dpk_, 5.0_psb_dpk_, & + & xnest, ynest, znest, descs, info) + if (info /= psb_success_) goto 9999 + + expected = 8.0_psb_dpk_ + res = psb_d_nest_geamax(ynest, descs, info) + call check('T17 upd_xyz amax(y)=8', res, expected, tol, npass, nfail, iam) + res = psb_d_nest_gemin(ynest, descs, info) + call check('T17 upd_xyz amin(y)=8', res, expected, tol, npass, nfail, iam) + + expected = 47.0_psb_dpk_ + res = psb_d_nest_geamax(znest, descs, info) + call check('T17 upd_xyz amax(z)=47', res, expected, tol, npass, nfail, iam) + res = psb_d_nest_gemin(znest, descs, info) + call check('T17 upd_xyz amin(z)=47', res, expected, tol, npass, nfail, iam) + + !================================================================== + ! Summary + !================================================================== + if (iam == 0) then + write(*,'(/,a)') repeat('=',60) + write(*,'(a,i0,a,i0,a)') & + ' RESULTS: ', npass, ' passed, ', nfail, ' failed' + write(*,'(a)') repeat('=',60) + end if + + !================================================================== + ! Clean up + !================================================================== + call psb_gefree_nest(xnest, descs, info) + call psb_gefree_nest(ynest, descs, info) + call psb_gefree_nest(znest, descs, info) + + call psb_cdfree(desc1, info) + call psb_cdfree(desc2, info) + call psb_cdfree(desc3, info) + call psb_cdfree(desc4, info) + + call psb_exit(ctxt) + stop + +9999 continue + write(psb_err_unit,*) trim(name), ': error info=', info, ' rank=', iam + call psb_error(ctxt) + call psb_exit(ctxt) + stop + +contains + + !------------------------------------------------------------------ + ! Set every local entry of every block to val + !------------------------------------------------------------------ + subroutine set_nest_val(v, val) + use psb_base_mod + type(psb_d_nest_vect_type), intent(inout) :: v + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_) :: k, linfo + linfo = 0 + do k = 1, v%nblocks + call v%vects(k)%set(val, linfo) + end do + end subroutine set_nest_val + + !------------------------------------------------------------------ + ! Scalar pass/fail check with tolerance + !------------------------------------------------------------------ + subroutine check(label, got, expected, tol, np_, nf_, myrank) + use psb_base_mod + character(len=*), intent(in) :: label + real(psb_dpk_), intent(in) :: got, expected, tol + integer(psb_ipk_), intent(inout) :: np_, nf_ + integer(psb_ipk_), intent(in) :: myrank + + if (myrank /= 0) return + if (abs(got - expected) <= tol * max(done, abs(expected))) then + write(*,'(2x,a,a,f16.10,a,f16.10)') & + 'PASS ', trim(label)//' got=', got, ' exp=', expected + np_ = np_ + 1 + else + write(*,'(2x,a,a,f16.10,a,f16.10)') & + 'FAIL ', trim(label)//' got=', got, ' exp=', expected + nf_ = nf_ + 1 + end if + end subroutine check + + !------------------------------------------------------------------ + ! Print every block of a nested vector (one rank at a time). + ! Each process flushes stdout before the barrier so that buffered + ! output does not bleed into the next process's print window. + !------------------------------------------------------------------ + subroutine print_nest_vec(v, label, myrank, nprocs, myctxt, ds) + use psb_base_mod + use iso_fortran_env, only: output_unit + type(psb_d_nest_vect_type), intent(inout) :: v + character(len=*), intent(in) :: label + integer(psb_ipk_), intent(in) :: myrank, nprocs + type(psb_ctxt_type), intent(in) :: myctxt + type(psb_desc_nest_type), intent(in) :: ds + + integer(psb_ipk_) :: blk, ip, k, nr, linfo + real(psb_dpk_), allocatable :: vals(:) + + do blk = 1, v%nblocks + nr = ds%descs(blk,blk)%get_local_rows() + do ip = 0, nprocs-1 + call psb_barrier(myctxt) + if (myrank == ip) then + write(*,'(a,a,a,i0,a)') ' [', trim(label), '] block ', blk, ':' + linfo = 0 + allocate(vals(nr), stat=linfo) + if (linfo == 0) vals = v%vects(blk)%get_vect() + do k = 1, nr + write(*,'(4x,i4,f14.6)') k, vals(k) + end do + deallocate(vals) + flush(output_unit) + end if + end do + call psb_barrier(myctxt) + end do + end subroutine print_nest_vec + +end program psb_d_pde_nest_psblas