From e766e9b2fbe524d8a8acd30393190e078deaed02 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sun, 9 Jun 2019 19:49:23 +0100 Subject: [PATCH 01/21] Implementation of CSRD. To be tested. --- base/modules/serial/psb_c_csr_mat_mod.f90 | 156 ++++ base/modules/serial/psb_d_csr_mat_mod.f90 | 156 ++++ base/modules/serial/psb_s_csr_mat_mod.f90 | 156 ++++ base/modules/serial/psb_z_csr_mat_mod.f90 | 156 ++++ base/serial/impl/psb_c_base_mat_impl.F90 | 49 +- base/serial/impl/psb_c_csr_impl.f90 | 1013 ++++++++++++++++++++- base/serial/impl/psb_d_base_mat_impl.F90 | 49 +- base/serial/impl/psb_d_csr_impl.f90 | 1013 ++++++++++++++++++++- base/serial/impl/psb_s_base_mat_impl.F90 | 49 +- base/serial/impl/psb_s_csr_impl.f90 | 1013 ++++++++++++++++++++- base/serial/impl/psb_z_base_mat_impl.F90 | 49 +- base/serial/impl/psb_z_csr_impl.f90 | 1013 ++++++++++++++++++++- 12 files changed, 4812 insertions(+), 60 deletions(-) diff --git a/base/modules/serial/psb_c_csr_mat_mod.f90 b/base/modules/serial/psb_c_csr_mat_mod.f90 index 7452d5aa..0cbe5218 100644 --- a/base/modules/serial/psb_c_csr_mat_mod.f90 +++ b/base/modules/serial/psb_c_csr_mat_mod.f90 @@ -596,8 +596,120 @@ module psb_c_csr_mat_mod integer(psb_ipk_), intent(out) :: info end subroutine psb_c_csr_scals end interface + + type, extends(psb_c_csr_sparse_mat) :: psb_c_csrd_sparse_mat + + !> Pointers to diagonal entries + integer(psb_ipk_), allocatable :: diagp(:) + + contains + procedure, nopass :: get_fmt => c_csrd_get_fmt + procedure, pass(a) :: sizeof => c_csrd_sizeof + procedure, pass(a) :: inner_cssm => psb_c_csrd_cssm + procedure, pass(a) :: inner_cssv => psb_c_csrd_cssv + procedure, pass(a) :: trmv => psb_c_csrd_trmv + !procedure, pass(a) :: reallocate_nz => psb_c_csrd_reallocate_nz + !procedure, pass(a) :: allocate_mnnz => psb_c_csrd_allocate_mnnz +!!$ procedure, pass(a) :: cp_to_coo => psb_c_cp_csrd_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_csrd_from_coo +!!$ procedure, pass(a) :: cp_to_fmt => psb_c_cp_csrd_to_fmt +!!$ procedure, pass(a) :: cp_from_fmt => psb_c_cp_csrd_from_fmt +!!$ procedure, pass(a) :: mv_to_coo => psb_c_mv_csrd_to_coo +!!$ procedure, pass(a) :: mv_from_coo => psb_c_mv_csrd_from_coo +!!$ procedure, pass(a) :: mv_to_fmt => psb_c_mv_csrd_to_fmt +!!$ procedure, pass(a) :: mv_from_fmt => psb_c_mv_csrd_from_fmt + procedure, pass(a) :: clean_zeros => psb_c_csrd_clean_zeros + procedure, pass(a) :: get_diag => psb_c_csrd_get_diag + !procedure, pass(a) :: reinit => psb_c_csrd_reinit + procedure, pass(a) :: trim => psb_c_csrd_trim + procedure, pass(a) :: free => c_csrd_free + procedure, pass(a) :: mold => psb_c_csrd_mold + + end type psb_c_csrd_sparse_mat + + interface + subroutine psb_c_csrd_clean_zeros(a, info) + import :: psb_ipk_, psb_c_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_c_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrd_clean_zeros + end interface + + interface + subroutine psb_c_csrd_get_diag(a,d,info) + import :: psb_ipk_, psb_c_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrd_get_diag + end interface + + interface + subroutine psb_c_cp_csrd_from_coo(a,b,info) + import :: psb_ipk_, psb_c_csrd_sparse_mat, psb_c_coo_sparse_mat, psb_spk_ + implicit none + class(psb_c_csrd_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_csrd_from_coo + end interface + + interface + subroutine psb_c_csrd_trim(a) + import :: psb_ipk_, psb_c_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_c_csrd_sparse_mat), intent(inout) :: a + end subroutine psb_c_csrd_trim + end interface + interface + subroutine psb_c_csrd_mold(a,b,info) + import :: psb_ipk_, psb_c_csrd_sparse_mat, psb_c_base_sparse_mat, psb_spk_ + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrd_mold + end interface + + interface + subroutine psb_c_csrd_trmv(alpha,a,x,beta,y,info,uplo,diag) + import :: psb_ipk_, psb_c_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: uplo, diag + end subroutine psb_c_csrd_trmv + end interface + interface + subroutine psb_c_csrd_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_csrd_cssm + end interface + + interface + subroutine psb_c_csrd_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_csrd_cssv + end interface + contains @@ -717,4 +829,48 @@ contains end subroutine c_csr_free + ! == =================================== + ! + ! + ! + ! CSRD specific versions + ! + ! + ! + ! + ! + ! == =================================== + + function c_csrd_sizeof(a) result(res) + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + integer(psb_long_int_k_) :: res + res = 8 + res = res + (2*psb_sizeof_sp) * psb_size(a%val) + res = res + psb_sizeof_int * psb_size(a%irp) + res = res + psb_sizeof_int * psb_size(a%ja) + res = res + psb_sizeof_int * psb_size(a%diagp) + + end function c_csrd_sizeof + + function c_csrd_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRD' + end function c_csrd_get_fmt + + + subroutine c_csrd_free(a) + implicit none + + class(psb_c_csrd_sparse_mat), intent(inout) :: a + + if (allocated(a%diagp)) deallocate(a%diagp) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_csrd_free + + end module psb_c_csr_mat_mod diff --git a/base/modules/serial/psb_d_csr_mat_mod.f90 b/base/modules/serial/psb_d_csr_mat_mod.f90 index ae5ddfd3..8b573652 100644 --- a/base/modules/serial/psb_d_csr_mat_mod.f90 +++ b/base/modules/serial/psb_d_csr_mat_mod.f90 @@ -596,8 +596,120 @@ module psb_d_csr_mat_mod integer(psb_ipk_), intent(out) :: info end subroutine psb_d_csr_scals end interface + + type, extends(psb_d_csr_sparse_mat) :: psb_d_csrd_sparse_mat + + !> Pointers to diagonal entries + integer(psb_ipk_), allocatable :: diagp(:) + + contains + procedure, nopass :: get_fmt => d_csrd_get_fmt + procedure, pass(a) :: sizeof => d_csrd_sizeof + procedure, pass(a) :: inner_cssm => psb_d_csrd_cssm + procedure, pass(a) :: inner_cssv => psb_d_csrd_cssv + procedure, pass(a) :: trmv => psb_d_csrd_trmv + !procedure, pass(a) :: reallocate_nz => psb_d_csrd_reallocate_nz + !procedure, pass(a) :: allocate_mnnz => psb_d_csrd_allocate_mnnz +!!$ procedure, pass(a) :: cp_to_coo => psb_d_cp_csrd_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_csrd_from_coo +!!$ procedure, pass(a) :: cp_to_fmt => psb_d_cp_csrd_to_fmt +!!$ procedure, pass(a) :: cp_from_fmt => psb_d_cp_csrd_from_fmt +!!$ procedure, pass(a) :: mv_to_coo => psb_d_mv_csrd_to_coo +!!$ procedure, pass(a) :: mv_from_coo => psb_d_mv_csrd_from_coo +!!$ procedure, pass(a) :: mv_to_fmt => psb_d_mv_csrd_to_fmt +!!$ procedure, pass(a) :: mv_from_fmt => psb_d_mv_csrd_from_fmt + procedure, pass(a) :: clean_zeros => psb_d_csrd_clean_zeros + procedure, pass(a) :: get_diag => psb_d_csrd_get_diag + !procedure, pass(a) :: reinit => psb_d_csrd_reinit + procedure, pass(a) :: trim => psb_d_csrd_trim + procedure, pass(a) :: free => d_csrd_free + procedure, pass(a) :: mold => psb_d_csrd_mold + + end type psb_d_csrd_sparse_mat + + interface + subroutine psb_d_csrd_clean_zeros(a, info) + import :: psb_ipk_, psb_d_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_d_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrd_clean_zeros + end interface + + interface + subroutine psb_d_csrd_get_diag(a,d,info) + import :: psb_ipk_, psb_d_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrd_get_diag + end interface + + interface + subroutine psb_d_cp_csrd_from_coo(a,b,info) + import :: psb_ipk_, psb_d_csrd_sparse_mat, psb_d_coo_sparse_mat, psb_dpk_ + implicit none + class(psb_d_csrd_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_csrd_from_coo + end interface + + interface + subroutine psb_d_csrd_trim(a) + import :: psb_ipk_, psb_d_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_d_csrd_sparse_mat), intent(inout) :: a + end subroutine psb_d_csrd_trim + end interface + interface + subroutine psb_d_csrd_mold(a,b,info) + import :: psb_ipk_, psb_d_csrd_sparse_mat, psb_d_base_sparse_mat, psb_dpk_ + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrd_mold + end interface + + interface + subroutine psb_d_csrd_trmv(alpha,a,x,beta,y,info,uplo,diag) + import :: psb_ipk_, psb_d_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: uplo, diag + end subroutine psb_d_csrd_trmv + end interface + interface + subroutine psb_d_csrd_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csrd_cssm + end interface + + interface + subroutine psb_d_csrd_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csrd_cssv + end interface + contains @@ -717,4 +829,48 @@ contains end subroutine d_csr_free + ! == =================================== + ! + ! + ! + ! CSRD specific versions + ! + ! + ! + ! + ! + ! == =================================== + + function d_csrd_sizeof(a) result(res) + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + integer(psb_long_int_k_) :: res + res = 8 + res = res + psb_sizeof_dp * psb_size(a%val) + res = res + psb_sizeof_int * psb_size(a%irp) + res = res + psb_sizeof_int * psb_size(a%ja) + res = res + psb_sizeof_int * psb_size(a%diagp) + + end function d_csrd_sizeof + + function d_csrd_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRD' + end function d_csrd_get_fmt + + + subroutine d_csrd_free(a) + implicit none + + class(psb_d_csrd_sparse_mat), intent(inout) :: a + + if (allocated(a%diagp)) deallocate(a%diagp) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_csrd_free + + end module psb_d_csr_mat_mod diff --git a/base/modules/serial/psb_s_csr_mat_mod.f90 b/base/modules/serial/psb_s_csr_mat_mod.f90 index e48fb7c1..2432481c 100644 --- a/base/modules/serial/psb_s_csr_mat_mod.f90 +++ b/base/modules/serial/psb_s_csr_mat_mod.f90 @@ -596,8 +596,120 @@ module psb_s_csr_mat_mod integer(psb_ipk_), intent(out) :: info end subroutine psb_s_csr_scals end interface + + type, extends(psb_s_csr_sparse_mat) :: psb_s_csrd_sparse_mat + + !> Pointers to diagonal entries + integer(psb_ipk_), allocatable :: diagp(:) + + contains + procedure, nopass :: get_fmt => s_csrd_get_fmt + procedure, pass(a) :: sizeof => s_csrd_sizeof + procedure, pass(a) :: inner_cssm => psb_s_csrd_cssm + procedure, pass(a) :: inner_cssv => psb_s_csrd_cssv + procedure, pass(a) :: trmv => psb_s_csrd_trmv + !procedure, pass(a) :: reallocate_nz => psb_s_csrd_reallocate_nz + !procedure, pass(a) :: allocate_mnnz => psb_s_csrd_allocate_mnnz +!!$ procedure, pass(a) :: cp_to_coo => psb_s_cp_csrd_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_csrd_from_coo +!!$ procedure, pass(a) :: cp_to_fmt => psb_s_cp_csrd_to_fmt +!!$ procedure, pass(a) :: cp_from_fmt => psb_s_cp_csrd_from_fmt +!!$ procedure, pass(a) :: mv_to_coo => psb_s_mv_csrd_to_coo +!!$ procedure, pass(a) :: mv_from_coo => psb_s_mv_csrd_from_coo +!!$ procedure, pass(a) :: mv_to_fmt => psb_s_mv_csrd_to_fmt +!!$ procedure, pass(a) :: mv_from_fmt => psb_s_mv_csrd_from_fmt + procedure, pass(a) :: clean_zeros => psb_s_csrd_clean_zeros + procedure, pass(a) :: get_diag => psb_s_csrd_get_diag + !procedure, pass(a) :: reinit => psb_s_csrd_reinit + procedure, pass(a) :: trim => psb_s_csrd_trim + procedure, pass(a) :: free => s_csrd_free + procedure, pass(a) :: mold => psb_s_csrd_mold + + end type psb_s_csrd_sparse_mat + + interface + subroutine psb_s_csrd_clean_zeros(a, info) + import :: psb_ipk_, psb_s_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_s_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrd_clean_zeros + end interface + + interface + subroutine psb_s_csrd_get_diag(a,d,info) + import :: psb_ipk_, psb_s_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrd_get_diag + end interface + + interface + subroutine psb_s_cp_csrd_from_coo(a,b,info) + import :: psb_ipk_, psb_s_csrd_sparse_mat, psb_s_coo_sparse_mat, psb_spk_ + implicit none + class(psb_s_csrd_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_csrd_from_coo + end interface + + interface + subroutine psb_s_csrd_trim(a) + import :: psb_ipk_, psb_s_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_s_csrd_sparse_mat), intent(inout) :: a + end subroutine psb_s_csrd_trim + end interface + interface + subroutine psb_s_csrd_mold(a,b,info) + import :: psb_ipk_, psb_s_csrd_sparse_mat, psb_s_base_sparse_mat, psb_spk_ + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrd_mold + end interface + + interface + subroutine psb_s_csrd_trmv(alpha,a,x,beta,y,info,uplo,diag) + import :: psb_ipk_, psb_s_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: uplo, diag + end subroutine psb_s_csrd_trmv + end interface + interface + subroutine psb_s_csrd_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_csrd_cssm + end interface + + interface + subroutine psb_s_csrd_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_csrd_sparse_mat, psb_spk_ + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_csrd_cssv + end interface + contains @@ -717,4 +829,48 @@ contains end subroutine s_csr_free + ! == =================================== + ! + ! + ! + ! CSRD specific versions + ! + ! + ! + ! + ! + ! == =================================== + + function s_csrd_sizeof(a) result(res) + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + integer(psb_long_int_k_) :: res + res = 8 + res = res + psb_sizeof_sp * psb_size(a%val) + res = res + psb_sizeof_int * psb_size(a%irp) + res = res + psb_sizeof_int * psb_size(a%ja) + res = res + psb_sizeof_int * psb_size(a%diagp) + + end function s_csrd_sizeof + + function s_csrd_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRD' + end function s_csrd_get_fmt + + + subroutine s_csrd_free(a) + implicit none + + class(psb_s_csrd_sparse_mat), intent(inout) :: a + + if (allocated(a%diagp)) deallocate(a%diagp) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_csrd_free + + end module psb_s_csr_mat_mod diff --git a/base/modules/serial/psb_z_csr_mat_mod.f90 b/base/modules/serial/psb_z_csr_mat_mod.f90 index 8312d960..341dc77a 100644 --- a/base/modules/serial/psb_z_csr_mat_mod.f90 +++ b/base/modules/serial/psb_z_csr_mat_mod.f90 @@ -596,8 +596,120 @@ module psb_z_csr_mat_mod integer(psb_ipk_), intent(out) :: info end subroutine psb_z_csr_scals end interface + + type, extends(psb_z_csr_sparse_mat) :: psb_z_csrd_sparse_mat + + !> Pointers to diagonal entries + integer(psb_ipk_), allocatable :: diagp(:) + + contains + procedure, nopass :: get_fmt => z_csrd_get_fmt + procedure, pass(a) :: sizeof => z_csrd_sizeof + procedure, pass(a) :: inner_cssm => psb_z_csrd_cssm + procedure, pass(a) :: inner_cssv => psb_z_csrd_cssv + procedure, pass(a) :: trmv => psb_z_csrd_trmv + !procedure, pass(a) :: reallocate_nz => psb_z_csrd_reallocate_nz + !procedure, pass(a) :: allocate_mnnz => psb_z_csrd_allocate_mnnz +!!$ procedure, pass(a) :: cp_to_coo => psb_z_cp_csrd_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_csrd_from_coo +!!$ procedure, pass(a) :: cp_to_fmt => psb_z_cp_csrd_to_fmt +!!$ procedure, pass(a) :: cp_from_fmt => psb_z_cp_csrd_from_fmt +!!$ procedure, pass(a) :: mv_to_coo => psb_z_mv_csrd_to_coo +!!$ procedure, pass(a) :: mv_from_coo => psb_z_mv_csrd_from_coo +!!$ procedure, pass(a) :: mv_to_fmt => psb_z_mv_csrd_to_fmt +!!$ procedure, pass(a) :: mv_from_fmt => psb_z_mv_csrd_from_fmt + procedure, pass(a) :: clean_zeros => psb_z_csrd_clean_zeros + procedure, pass(a) :: get_diag => psb_z_csrd_get_diag + !procedure, pass(a) :: reinit => psb_z_csrd_reinit + procedure, pass(a) :: trim => psb_z_csrd_trim + procedure, pass(a) :: free => z_csrd_free + procedure, pass(a) :: mold => psb_z_csrd_mold + + end type psb_z_csrd_sparse_mat + + interface + subroutine psb_z_csrd_clean_zeros(a, info) + import :: psb_ipk_, psb_z_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_z_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrd_clean_zeros + end interface + + interface + subroutine psb_z_csrd_get_diag(a,d,info) + import :: psb_ipk_, psb_z_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrd_get_diag + end interface + + interface + subroutine psb_z_cp_csrd_from_coo(a,b,info) + import :: psb_ipk_, psb_z_csrd_sparse_mat, psb_z_coo_sparse_mat, psb_dpk_ + implicit none + class(psb_z_csrd_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_csrd_from_coo + end interface + + interface + subroutine psb_z_csrd_trim(a) + import :: psb_ipk_, psb_z_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_z_csrd_sparse_mat), intent(inout) :: a + end subroutine psb_z_csrd_trim + end interface + interface + subroutine psb_z_csrd_mold(a,b,info) + import :: psb_ipk_, psb_z_csrd_sparse_mat, psb_z_base_sparse_mat, psb_dpk_ + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrd_mold + end interface + + interface + subroutine psb_z_csrd_trmv(alpha,a,x,beta,y,info,uplo,diag) + import :: psb_ipk_, psb_z_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: uplo, diag + end subroutine psb_z_csrd_trmv + end interface + interface + subroutine psb_z_csrd_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_csrd_cssm + end interface + + interface + subroutine psb_z_csrd_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_csrd_sparse_mat, psb_dpk_ + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_csrd_cssv + end interface + contains @@ -717,4 +829,48 @@ contains end subroutine z_csr_free + ! == =================================== + ! + ! + ! + ! CSRD specific versions + ! + ! + ! + ! + ! + ! == =================================== + + function z_csrd_sizeof(a) result(res) + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + integer(psb_long_int_k_) :: res + res = 8 + res = res + (2*psb_sizeof_dp) * psb_size(a%val) + res = res + psb_sizeof_int * psb_size(a%irp) + res = res + psb_sizeof_int * psb_size(a%ja) + res = res + psb_sizeof_int * psb_size(a%diagp) + + end function z_csrd_sizeof + + function z_csrd_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRD' + end function z_csrd_get_fmt + + + subroutine z_csrd_free(a) + implicit none + + class(psb_z_csrd_sparse_mat), intent(inout) :: a + + if (allocated(a%diagp)) deallocate(a%diagp) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_csrd_free + + end module psb_z_csr_mat_mod diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90 index 0ced6dda..ac9ac82b 100644 --- a/base/serial/impl/psb_c_base_mat_impl.F90 +++ b/base/serial/impl/psb_c_base_mat_impl.F90 @@ -1184,17 +1184,28 @@ subroutine psb_c_base_csmm(alpha,a,x,beta,y,info,trans) integer(psb_ipk_) :: err_act integer(psb_ipk_) :: ierr(5) - character(len=20) :: name='c_base_csmm' + integer(psb_ipk_) :: j,nc + character(len=20) :: name='c_base_csmm' logical, parameter :: debug=.false. call psb_erractionsave(err_act) - ! This is the base version. If we get here - ! it means the derived class is incomplete, - ! so we throw an error. - info = psb_err_missing_override_method_ - call psb_errpush(info,name,a_err=a%get_fmt()) + ! This is the base version. + ! It's a very inefficient implementation, + ! but it's only a fallback, if multivectors + ! are important you are supposed to implement it + ! explicitly in the derived class. + info = psb_success_ + nc = min(size(x,2),size(y,2)) + do j=1,nc + call a%spmm(alpha,x(j,:),beta,y(:,j),info,trans) + if (info /= psb_success_) goto 9999 + end do - call psb_error_handler(err_act) + call psb_erractionrestore(err_act) + + return + +9999 call psb_error_handler(err_act) end subroutine psb_c_base_csmm @@ -1266,17 +1277,29 @@ subroutine psb_c_base_inner_cssm(alpha,a,x,beta,y,info,trans) integer(psb_ipk_) :: err_act integer(psb_ipk_) :: ierr(5) + integer(psb_ipk_) :: j, nc character(len=20) :: name='c_base_inner_cssm' logical, parameter :: debug=.false. call psb_erractionsave(err_act) - ! This is the base version. If we get here - ! it means the derived class is incomplete, - ! so we throw an error. - info = psb_err_missing_override_method_ - call psb_errpush(info,name,a_err=a%get_fmt()) + ! This is the base version. + ! It's a very inefficient implementation, + ! but it's only a fallback, if multivectors + ! are important you are supposed to implement it + ! explicitly in the derived class. + info = psb_success_ + nc = min(size(x,2),size(y,2)) + do j=1,nc + call a%spsm(alpha,x(j,:),beta,y(:,j),info,trans) + if (info /= psb_success_) goto 9999 + end do + + call psb_erractionrestore(err_act) + + return + +9999 call psb_error_handler(err_act) - call psb_error_handler(err_act) end subroutine psb_c_base_inner_cssm diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index e248c5e1..138d2963 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -999,8 +999,6 @@ contains end subroutine psb_c_csr_cssv - - subroutine psb_c_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -3473,3 +3471,1014 @@ contains end subroutine csr_spspmm end subroutine psb_ccsrspspmm + + +! == =================================== +! +! +! +! CSRD specific versions +! +! +! +! +! +! == =================================== + +subroutine psb_c_csrd_clean_zeros(a, info) + use psb_error_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_csrd_clean_zeros + implicit none + class(psb_c_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i, j, k, nr + integer(psb_ipk_), allocatable :: ilrp(:) + + info = 0 + call a%sync() + nr = a%get_nrows() + ilrp = a%irp(:) + a%irp(1) = 1 + j = a%irp(1) + do i=1, nr + a%diagp(i) = 0 + do k = ilrp(i), ilrp(i+1) -1 + if ((a%ja(j)==i).or.(a%val(k) /= czero)) then + a%val(j) = a%val(k) + a%ja(j) = a%ja(k) + if (a%ja(j) == i) a%diagp(i) = j + j = j + 1 + end if + end do + a%irp(i+1) = j + end do + call a%trim() + call a%set_host() +end subroutine psb_c_csrd_clean_zeros + + +subroutine psb_c_csrd_get_diag(a,d,info) + use psb_error_mod + use psb_const_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_csrd_get_diag + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = cone + else + do i=1, mnm + if (a%diagp(i) >0) d(i) = a%val(a%diagp(i)) + end do + end if + do i=mnm+1,size(d) + d(i) = czero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_csrd_get_diag + +subroutine psb_c_cp_csrd_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csrd_from_coo + implicit none + + class(psb_c_csrd_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: itemp(:) + !locals + logical :: nodiag + integer(psb_ipk_) :: nza, nzt, nr, nc, i,j,k,ip,irw, err_act, ncl + integer(psb_ipk_), Parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='c_cp_csrd_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + + if (info /= 0) return + nr = a%get_nrows() + call psb_realloc(nr,a%diagp,info) + if (info /= 0) return + ! + call srch_diag(a,nodiag) + if (nodiag) then + ! + ! This means at least one of the rows does not + ! have a diagonal entry. Add explicit zeros and fix. + ! + call a%cp_to_coo(tmp,info) + nzt = tmp%get_nzeros() + call tmp%reallocate(nzt+nr) + do k=1,nr + tmp%ia(nzt+k) = k + tmp%ja(nzt+k) = k + tmp%val(nzt+k) = czero + end do + call tmp%set_nzeros(nzt+nr) + call tmp%fix(info) + call a%mv_from_coo(tmp,info) + call srch_diag(a,nodiag) + ! At this point, nodiag should not happen + if (nodiag) then + write(0,*) 'Nodiag here?' + info = -1 + end if + end if + +contains + subroutine srch_diag(a,nodiag) + implicit none + class(psb_c_csrd_sparse_mat), intent(inout) :: a + logical, intent(out) :: nodiag + integer(psb_ipk_) :: nr, i,j,k + + nr = a%get_nrows() + nodiag = .false. + outer: do i=1, nr + a%diagp(i) = -1 + inner: do k=a%irp(i),a%irp(i+1)-1 + if (a%ja(k) == i) then + a%diagp(i) = k + exit inner + end if + end do inner + if (a%diagp(i) < 1) then + nodiag = .true. + exit outer + end if + end do outer + return + end subroutine srch_diag +end subroutine psb_c_cp_csrd_from_coo + +subroutine psb_c_csrd_trim(a) + use psb_realloc_mod + use psb_error_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_csrd_trim + implicit none + class(psb_c_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: err_act, info, nz, m + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + call a%psb_c_csr_sparse_mat%trim() + m = a%get_nrows() + call psb_realloc(m,a%diagp,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrd_trim + + +subroutine psb_c_csrd_mold(a,b,info) + use psb_c_csr_mat_mod, psb_protect_name => psb_c_csrd_mold + use psb_error_mod + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='csrd_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_csrd_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_csrd_mold + +subroutine psb_c_csrd_trmv(alpha,a,x,beta,y,info,uplo,diag) + use psb_c_csr_mat_mod, psb_protect_name => psb_c_csrd_trmv + use psb_error_mod + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: uplo, diag + + integer(psb_ipk_) :: i, m, n + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='c_csrd_trmv' + character :: uplo_, diag_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(uplo)) then + uplo_ = uplo + else + uplo_ = 'L' + end if + if (present(diag)) then + diag_ = diag + else + diag_ = 'D' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + n = a%get_ncols() + m = a%get_nrows() + + if (size(x,1) psb_c_csrd_cssm + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + + integer(psb_ipk_) :: jc, nc + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='c_csrd_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + nc = min(size(x,2) , size(y,2)) + + do jc = 1, nc + call a%inner_cssv(alpha,x(:,jc),beta,y(:,jc),info,trans) + if (info /= 0) exit + end do + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='inner_trmv') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_csrd_cssm + + +subroutine psb_c_csrd_cssv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_csrd_cssv + implicit none + class(psb_c_csrd_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + complex(psb_spk_), allocatable :: tmp(:) + logical :: tra,ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='c_csrd_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle())) then + !info = psb_err_invalid_mat_state_ + !call psb_errpush(info,name) + !goto 9999 + ! + ! The whole point of CSRD is to fake a triangle + ! "on the fly" (this is really for the sake of + ! saving memory in Gauss-Seidel lower/upper). + ! + ! + end if + + if (size(x) psb_d_csrd_clean_zeros + implicit none + class(psb_d_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i, j, k, nr + integer(psb_ipk_), allocatable :: ilrp(:) + + info = 0 + call a%sync() + nr = a%get_nrows() + ilrp = a%irp(:) + a%irp(1) = 1 + j = a%irp(1) + do i=1, nr + a%diagp(i) = 0 + do k = ilrp(i), ilrp(i+1) -1 + if ((a%ja(j)==i).or.(a%val(k) /= dzero)) then + a%val(j) = a%val(k) + a%ja(j) = a%ja(k) + if (a%ja(j) == i) a%diagp(i) = j + j = j + 1 + end if + end do + a%irp(i+1) = j + end do + call a%trim() + call a%set_host() +end subroutine psb_d_csrd_clean_zeros + + +subroutine psb_d_csrd_get_diag(a,d,info) + use psb_error_mod + use psb_const_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_csrd_get_diag + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = done + else + do i=1, mnm + if (a%diagp(i) >0) d(i) = a%val(a%diagp(i)) + end do + end if + do i=mnm+1,size(d) + d(i) = dzero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_csrd_get_diag + +subroutine psb_d_cp_csrd_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_csrd_from_coo + implicit none + + class(psb_d_csrd_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: itemp(:) + !locals + logical :: nodiag + integer(psb_ipk_) :: nza, nzt, nr, nc, i,j,k,ip,irw, err_act, ncl + integer(psb_ipk_), Parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='d_cp_csrd_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + + if (info /= 0) return + nr = a%get_nrows() + call psb_realloc(nr,a%diagp,info) + if (info /= 0) return + ! + call srch_diag(a,nodiag) + if (nodiag) then + ! + ! This means at least one of the rows does not + ! have a diagonal entry. Add explicit zeros and fix. + ! + call a%cp_to_coo(tmp,info) + nzt = tmp%get_nzeros() + call tmp%reallocate(nzt+nr) + do k=1,nr + tmp%ia(nzt+k) = k + tmp%ja(nzt+k) = k + tmp%val(nzt+k) = dzero + end do + call tmp%set_nzeros(nzt+nr) + call tmp%fix(info) + call a%mv_from_coo(tmp,info) + call srch_diag(a,nodiag) + ! At this point, nodiag should not happen + if (nodiag) then + write(0,*) 'Nodiag here?' + info = -1 + end if + end if + +contains + subroutine srch_diag(a,nodiag) + implicit none + class(psb_d_csrd_sparse_mat), intent(inout) :: a + logical, intent(out) :: nodiag + integer(psb_ipk_) :: nr, i,j,k + + nr = a%get_nrows() + nodiag = .false. + outer: do i=1, nr + a%diagp(i) = -1 + inner: do k=a%irp(i),a%irp(i+1)-1 + if (a%ja(k) == i) then + a%diagp(i) = k + exit inner + end if + end do inner + if (a%diagp(i) < 1) then + nodiag = .true. + exit outer + end if + end do outer + return + end subroutine srch_diag +end subroutine psb_d_cp_csrd_from_coo + +subroutine psb_d_csrd_trim(a) + use psb_realloc_mod + use psb_error_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_csrd_trim + implicit none + class(psb_d_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: err_act, info, nz, m + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + call a%psb_d_csr_sparse_mat%trim() + m = a%get_nrows() + call psb_realloc(m,a%diagp,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrd_trim + + +subroutine psb_d_csrd_mold(a,b,info) + use psb_d_csr_mat_mod, psb_protect_name => psb_d_csrd_mold + use psb_error_mod + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='csrd_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_csrd_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_csrd_mold + +subroutine psb_d_csrd_trmv(alpha,a,x,beta,y,info,uplo,diag) + use psb_d_csr_mat_mod, psb_protect_name => psb_d_csrd_trmv + use psb_error_mod + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: uplo, diag + + integer(psb_ipk_) :: i, m, n + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='d_csrd_trmv' + character :: uplo_, diag_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(uplo)) then + uplo_ = uplo + else + uplo_ = 'L' + end if + if (present(diag)) then + diag_ = diag + else + diag_ = 'D' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + n = a%get_ncols() + m = a%get_nrows() + + if (size(x,1) psb_d_csrd_cssm + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + + integer(psb_ipk_) :: jc, nc + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='d_csrd_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + nc = min(size(x,2) , size(y,2)) + + do jc = 1, nc + call a%inner_cssv(alpha,x(:,jc),beta,y(:,jc),info,trans) + if (info /= 0) exit + end do + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='inner_trmv') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_csrd_cssm + + +subroutine psb_d_csrd_cssv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_csrd_cssv + implicit none + class(psb_d_csrd_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: tmp(:) + logical :: tra,ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='d_csrd_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle())) then + !info = psb_err_invalid_mat_state_ + !call psb_errpush(info,name) + !goto 9999 + ! + ! The whole point of CSRD is to fake a triangle + ! "on the fly" (this is really for the sake of + ! saving memory in Gauss-Seidel lower/upper). + ! + ! + end if + + if (size(x) psb_s_csrd_clean_zeros + implicit none + class(psb_s_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i, j, k, nr + integer(psb_ipk_), allocatable :: ilrp(:) + + info = 0 + call a%sync() + nr = a%get_nrows() + ilrp = a%irp(:) + a%irp(1) = 1 + j = a%irp(1) + do i=1, nr + a%diagp(i) = 0 + do k = ilrp(i), ilrp(i+1) -1 + if ((a%ja(j)==i).or.(a%val(k) /= szero)) then + a%val(j) = a%val(k) + a%ja(j) = a%ja(k) + if (a%ja(j) == i) a%diagp(i) = j + j = j + 1 + end if + end do + a%irp(i+1) = j + end do + call a%trim() + call a%set_host() +end subroutine psb_s_csrd_clean_zeros + + +subroutine psb_s_csrd_get_diag(a,d,info) + use psb_error_mod + use psb_const_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_csrd_get_diag + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = sone + else + do i=1, mnm + if (a%diagp(i) >0) d(i) = a%val(a%diagp(i)) + end do + end if + do i=mnm+1,size(d) + d(i) = szero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_csrd_get_diag + +subroutine psb_s_cp_csrd_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_csrd_from_coo + implicit none + + class(psb_s_csrd_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: itemp(:) + !locals + logical :: nodiag + integer(psb_ipk_) :: nza, nzt, nr, nc, i,j,k,ip,irw, err_act, ncl + integer(psb_ipk_), Parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='s_cp_csrd_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + + if (info /= 0) return + nr = a%get_nrows() + call psb_realloc(nr,a%diagp,info) + if (info /= 0) return + ! + call srch_diag(a,nodiag) + if (nodiag) then + ! + ! This means at least one of the rows does not + ! have a diagonal entry. Add explicit zeros and fix. + ! + call a%cp_to_coo(tmp,info) + nzt = tmp%get_nzeros() + call tmp%reallocate(nzt+nr) + do k=1,nr + tmp%ia(nzt+k) = k + tmp%ja(nzt+k) = k + tmp%val(nzt+k) = szero + end do + call tmp%set_nzeros(nzt+nr) + call tmp%fix(info) + call a%mv_from_coo(tmp,info) + call srch_diag(a,nodiag) + ! At this point, nodiag should not happen + if (nodiag) then + write(0,*) 'Nodiag here?' + info = -1 + end if + end if + +contains + subroutine srch_diag(a,nodiag) + implicit none + class(psb_s_csrd_sparse_mat), intent(inout) :: a + logical, intent(out) :: nodiag + integer(psb_ipk_) :: nr, i,j,k + + nr = a%get_nrows() + nodiag = .false. + outer: do i=1, nr + a%diagp(i) = -1 + inner: do k=a%irp(i),a%irp(i+1)-1 + if (a%ja(k) == i) then + a%diagp(i) = k + exit inner + end if + end do inner + if (a%diagp(i) < 1) then + nodiag = .true. + exit outer + end if + end do outer + return + end subroutine srch_diag +end subroutine psb_s_cp_csrd_from_coo + +subroutine psb_s_csrd_trim(a) + use psb_realloc_mod + use psb_error_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_csrd_trim + implicit none + class(psb_s_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: err_act, info, nz, m + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + call a%psb_s_csr_sparse_mat%trim() + m = a%get_nrows() + call psb_realloc(m,a%diagp,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrd_trim + + +subroutine psb_s_csrd_mold(a,b,info) + use psb_s_csr_mat_mod, psb_protect_name => psb_s_csrd_mold + use psb_error_mod + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='csrd_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_csrd_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_csrd_mold + +subroutine psb_s_csrd_trmv(alpha,a,x,beta,y,info,uplo,diag) + use psb_s_csr_mat_mod, psb_protect_name => psb_s_csrd_trmv + use psb_error_mod + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: uplo, diag + + integer(psb_ipk_) :: i, m, n + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='s_csrd_trmv' + character :: uplo_, diag_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(uplo)) then + uplo_ = uplo + else + uplo_ = 'L' + end if + if (present(diag)) then + diag_ = diag + else + diag_ = 'D' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + n = a%get_ncols() + m = a%get_nrows() + + if (size(x,1) psb_s_csrd_cssm + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + + integer(psb_ipk_) :: jc, nc + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='s_csrd_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + nc = min(size(x,2) , size(y,2)) + + do jc = 1, nc + call a%inner_cssv(alpha,x(:,jc),beta,y(:,jc),info,trans) + if (info /= 0) exit + end do + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='inner_trmv') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_csrd_cssm + + +subroutine psb_s_csrd_cssv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_csrd_cssv + implicit none + class(psb_s_csrd_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + real(psb_spk_), allocatable :: tmp(:) + logical :: tra,ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='s_csrd_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle())) then + !info = psb_err_invalid_mat_state_ + !call psb_errpush(info,name) + !goto 9999 + ! + ! The whole point of CSRD is to fake a triangle + ! "on the fly" (this is really for the sake of + ! saving memory in Gauss-Seidel lower/upper). + ! + ! + end if + + if (size(x) psb_z_csrd_clean_zeros + implicit none + class(psb_z_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i, j, k, nr + integer(psb_ipk_), allocatable :: ilrp(:) + + info = 0 + call a%sync() + nr = a%get_nrows() + ilrp = a%irp(:) + a%irp(1) = 1 + j = a%irp(1) + do i=1, nr + a%diagp(i) = 0 + do k = ilrp(i), ilrp(i+1) -1 + if ((a%ja(j)==i).or.(a%val(k) /= zzero)) then + a%val(j) = a%val(k) + a%ja(j) = a%ja(k) + if (a%ja(j) == i) a%diagp(i) = j + j = j + 1 + end if + end do + a%irp(i+1) = j + end do + call a%trim() + call a%set_host() +end subroutine psb_z_csrd_clean_zeros + + +subroutine psb_z_csrd_get_diag(a,d,info) + use psb_error_mod + use psb_const_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_csrd_get_diag + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = zone + else + do i=1, mnm + if (a%diagp(i) >0) d(i) = a%val(a%diagp(i)) + end do + end if + do i=mnm+1,size(d) + d(i) = zzero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_csrd_get_diag + +subroutine psb_z_cp_csrd_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_csrd_from_coo + implicit none + + class(psb_z_csrd_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: itemp(:) + !locals + logical :: nodiag + integer(psb_ipk_) :: nza, nzt, nr, nc, i,j,k,ip,irw, err_act, ncl + integer(psb_ipk_), Parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='z_cp_csrd_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + + if (info /= 0) return + nr = a%get_nrows() + call psb_realloc(nr,a%diagp,info) + if (info /= 0) return + ! + call srch_diag(a,nodiag) + if (nodiag) then + ! + ! This means at least one of the rows does not + ! have a diagonal entry. Add explicit zeros and fix. + ! + call a%cp_to_coo(tmp,info) + nzt = tmp%get_nzeros() + call tmp%reallocate(nzt+nr) + do k=1,nr + tmp%ia(nzt+k) = k + tmp%ja(nzt+k) = k + tmp%val(nzt+k) = zzero + end do + call tmp%set_nzeros(nzt+nr) + call tmp%fix(info) + call a%mv_from_coo(tmp,info) + call srch_diag(a,nodiag) + ! At this point, nodiag should not happen + if (nodiag) then + write(0,*) 'Nodiag here?' + info = -1 + end if + end if + +contains + subroutine srch_diag(a,nodiag) + implicit none + class(psb_z_csrd_sparse_mat), intent(inout) :: a + logical, intent(out) :: nodiag + integer(psb_ipk_) :: nr, i,j,k + + nr = a%get_nrows() + nodiag = .false. + outer: do i=1, nr + a%diagp(i) = -1 + inner: do k=a%irp(i),a%irp(i+1)-1 + if (a%ja(k) == i) then + a%diagp(i) = k + exit inner + end if + end do inner + if (a%diagp(i) < 1) then + nodiag = .true. + exit outer + end if + end do outer + return + end subroutine srch_diag +end subroutine psb_z_cp_csrd_from_coo + +subroutine psb_z_csrd_trim(a) + use psb_realloc_mod + use psb_error_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_csrd_trim + implicit none + class(psb_z_csrd_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: err_act, info, nz, m + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + call a%psb_z_csr_sparse_mat%trim() + m = a%get_nrows() + call psb_realloc(m,a%diagp,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrd_trim + + +subroutine psb_z_csrd_mold(a,b,info) + use psb_z_csr_mat_mod, psb_protect_name => psb_z_csrd_mold + use psb_error_mod + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='csrd_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_csrd_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_csrd_mold + +subroutine psb_z_csrd_trmv(alpha,a,x,beta,y,info,uplo,diag) + use psb_z_csr_mat_mod, psb_protect_name => psb_z_csrd_trmv + use psb_error_mod + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: uplo, diag + + integer(psb_ipk_) :: i, m, n + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='z_csrd_trmv' + character :: uplo_, diag_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(uplo)) then + uplo_ = uplo + else + uplo_ = 'L' + end if + if (present(diag)) then + diag_ = diag + else + diag_ = 'D' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + n = a%get_ncols() + m = a%get_nrows() + + if (size(x,1) psb_z_csrd_cssm + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + + integer(psb_ipk_) :: jc, nc + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='z_csrd_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + nc = min(size(x,2) , size(y,2)) + + do jc = 1, nc + call a%inner_cssv(alpha,x(:,jc),beta,y(:,jc),info,trans) + if (info /= 0) exit + end do + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='inner_trmv') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_csrd_cssm + + +subroutine psb_z_csrd_cssv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_csrd_cssv + implicit none + class(psb_z_csrd_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + complex(psb_dpk_), allocatable :: tmp(:) + logical :: tra,ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='z_csrd_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle())) then + !info = psb_err_invalid_mat_state_ + !call psb_errpush(info,name) + !goto 9999 + ! + ! The whole point of CSRD is to fake a triangle + ! "on the fly" (this is really for the sake of + ! saving memory in Gauss-Seidel lower/upper). + ! + ! + end if + + if (size(x) Date: Wed, 12 Jun 2019 13:54:33 +0100 Subject: [PATCH 02/21] Fix bad setup of halo_owner in denum_block --- base/tools/psb_cd_renum_block.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/tools/psb_cd_renum_block.F90 b/base/tools/psb_cd_renum_block.F90 index ea6f940b..ddfa4545 100644 --- a/base/tools/psb_cd_renum_block.F90 +++ b/base/tools/psb_cd_renum_block.F90 @@ -142,7 +142,7 @@ subroutine psb_cd_renum_block(desc_in, desc_out, info) write(debug_unit,*) me,' ',trim(name),': Done g2l_ins ',gidx(:),':',lidx(:),' :',reflidx(:) end if - if (info == psb_success_) call desc_in%indxmap%fnd_owner(gidx(n_row+1:n_col),hproc,info) + if (info == psb_success_) call blck_map%fnd_owner(gidx(n_row+1:n_col),hproc,info) if (info == psb_success_) call blck_map%set_halo_owner(hproc,info) if (info == 0) call blck_map%asb(info) From 122bfdf7f813c5626205cdf7a483d195c837b40e Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 14 Jun 2019 16:44:54 +0100 Subject: [PATCH 03/21] New second descriptor for gather. --- base/comm/psb_cspgather.F90 | 31 ++++++++++++++++++---------- base/comm/psb_dspgather.F90 | 31 ++++++++++++++++++---------- base/comm/psb_sspgather.F90 | 31 ++++++++++++++++++---------- base/comm/psb_zspgather.F90 | 31 ++++++++++++++++++---------- base/modules/comm/psb_c_comm_mod.f90 | 15 +++++++------- base/modules/comm/psb_d_comm_mod.f90 | 15 +++++++------- base/modules/comm/psb_s_comm_mod.f90 | 15 +++++++------- base/modules/comm/psb_z_comm_mod.f90 | 15 +++++++------- 8 files changed, 112 insertions(+), 72 deletions(-) diff --git a/base/comm/psb_cspgather.F90 b/base/comm/psb_cspgather.F90 index 00155a58..be1ddbe0 100644 --- a/base/comm/psb_cspgather.F90 +++ b/base/comm/psb_cspgather.F90 @@ -30,7 +30,7 @@ ! ! ! File: psb_cspgather.f90 -subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) +subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc,desc_c) use psb_desc_mod use psb_error_mod use psb_penv_mod @@ -43,14 +43,17 @@ subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep #ifdef MPI_H include 'mpif.h' #endif - type(psb_cspmat_type), intent(inout) :: loca - type(psb_cspmat_type), intent(inout) :: globa - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: root, dupl - logical, intent(in), optional :: keepnum,keeploc - - type(psb_c_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_cspmat_type), intent(inout) :: loca + type(psb_cspmat_type), intent(inout) :: globa + type(psb_desc_type), intent(in), target :: desc_a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: root, dupl + logical, intent(in), optional :: keepnum,keeploc + type(psb_desc_type), intent(in), optional, target :: desc_c + ! + type(psb_c_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_desc_type), pointer :: p_desc_c + integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ @@ -80,11 +83,17 @@ subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else keeploc_ = .true. end if + if (present(desc_c)) then + p_desc_c => desc_c + else + p_desc_c => desc_a + end if + call globa%free() if (keepnum_) then nrg = desc_a%get_global_rows() - ncg = desc_a%get_global_rows() + ncg = p_desc_c%get_global_cols() allocate(nzbr(np), idisp(np),stat=info) if (info /= psb_success_) then @@ -102,7 +111,7 @@ subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep end if nzl = loc_coo%get_nzeros() call psb_loc_to_glob(loc_coo%ia(1:nzl),desc_a,info,iact='I') - call psb_loc_to_glob(loc_coo%ja(1:nzl),desc_a,info,iact='I') + call psb_loc_to_glob(loc_coo%ja(1:nzl),p_desc_c,info,iact='I') nzbr(:) = 0 nzbr(me+1) = nzl call psb_sum(ictxt,nzbr(1:np)) diff --git a/base/comm/psb_dspgather.F90 b/base/comm/psb_dspgather.F90 index 0d373fdf..59d0d080 100644 --- a/base/comm/psb_dspgather.F90 +++ b/base/comm/psb_dspgather.F90 @@ -30,7 +30,7 @@ ! ! ! File: psb_dspgather.f90 -subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) +subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc,desc_c) use psb_desc_mod use psb_error_mod use psb_penv_mod @@ -43,14 +43,17 @@ subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep #ifdef MPI_H include 'mpif.h' #endif - type(psb_dspmat_type), intent(inout) :: loca - type(psb_dspmat_type), intent(inout) :: globa - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: root, dupl - logical, intent(in), optional :: keepnum,keeploc - - type(psb_d_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_dspmat_type), intent(inout) :: loca + type(psb_dspmat_type), intent(inout) :: globa + type(psb_desc_type), intent(in), target :: desc_a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: root, dupl + logical, intent(in), optional :: keepnum,keeploc + type(psb_desc_type), intent(in), optional, target :: desc_c + ! + type(psb_d_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_desc_type), pointer :: p_desc_c + integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ @@ -80,11 +83,17 @@ subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else keeploc_ = .true. end if + if (present(desc_c)) then + p_desc_c => desc_c + else + p_desc_c => desc_a + end if + call globa%free() if (keepnum_) then nrg = desc_a%get_global_rows() - ncg = desc_a%get_global_rows() + ncg = p_desc_c%get_global_cols() allocate(nzbr(np), idisp(np),stat=info) if (info /= psb_success_) then @@ -102,7 +111,7 @@ subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep end if nzl = loc_coo%get_nzeros() call psb_loc_to_glob(loc_coo%ia(1:nzl),desc_a,info,iact='I') - call psb_loc_to_glob(loc_coo%ja(1:nzl),desc_a,info,iact='I') + call psb_loc_to_glob(loc_coo%ja(1:nzl),p_desc_c,info,iact='I') nzbr(:) = 0 nzbr(me+1) = nzl call psb_sum(ictxt,nzbr(1:np)) diff --git a/base/comm/psb_sspgather.F90 b/base/comm/psb_sspgather.F90 index f0f828bd..b974a2e7 100644 --- a/base/comm/psb_sspgather.F90 +++ b/base/comm/psb_sspgather.F90 @@ -30,7 +30,7 @@ ! ! ! File: psb_sspgather.f90 -subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) +subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc,desc_c) use psb_desc_mod use psb_error_mod use psb_penv_mod @@ -43,14 +43,17 @@ subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep #ifdef MPI_H include 'mpif.h' #endif - type(psb_sspmat_type), intent(inout) :: loca - type(psb_sspmat_type), intent(inout) :: globa - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: root, dupl - logical, intent(in), optional :: keepnum,keeploc - - type(psb_s_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_sspmat_type), intent(inout) :: loca + type(psb_sspmat_type), intent(inout) :: globa + type(psb_desc_type), intent(in), target :: desc_a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: root, dupl + logical, intent(in), optional :: keepnum,keeploc + type(psb_desc_type), intent(in), optional, target :: desc_c + ! + type(psb_s_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_desc_type), pointer :: p_desc_c + integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ @@ -80,11 +83,17 @@ subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else keeploc_ = .true. end if + if (present(desc_c)) then + p_desc_c => desc_c + else + p_desc_c => desc_a + end if + call globa%free() if (keepnum_) then nrg = desc_a%get_global_rows() - ncg = desc_a%get_global_rows() + ncg = p_desc_c%get_global_cols() allocate(nzbr(np), idisp(np),stat=info) if (info /= psb_success_) then @@ -102,7 +111,7 @@ subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep end if nzl = loc_coo%get_nzeros() call psb_loc_to_glob(loc_coo%ia(1:nzl),desc_a,info,iact='I') - call psb_loc_to_glob(loc_coo%ja(1:nzl),desc_a,info,iact='I') + call psb_loc_to_glob(loc_coo%ja(1:nzl),p_desc_c,info,iact='I') nzbr(:) = 0 nzbr(me+1) = nzl call psb_sum(ictxt,nzbr(1:np)) diff --git a/base/comm/psb_zspgather.F90 b/base/comm/psb_zspgather.F90 index 0505420d..7bcb72e7 100644 --- a/base/comm/psb_zspgather.F90 +++ b/base/comm/psb_zspgather.F90 @@ -30,7 +30,7 @@ ! ! ! File: psb_zspgather.f90 -subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) +subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc,desc_c) use psb_desc_mod use psb_error_mod use psb_penv_mod @@ -43,14 +43,17 @@ subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep #ifdef MPI_H include 'mpif.h' #endif - type(psb_zspmat_type), intent(inout) :: loca - type(psb_zspmat_type), intent(inout) :: globa - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: root, dupl - logical, intent(in), optional :: keepnum,keeploc - - type(psb_z_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_zspmat_type), intent(inout) :: loca + type(psb_zspmat_type), intent(inout) :: globa + type(psb_desc_type), intent(in), target :: desc_a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: root, dupl + logical, intent(in), optional :: keepnum,keeploc + type(psb_desc_type), intent(in), optional, target :: desc_c + ! + type(psb_z_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_desc_type), pointer :: p_desc_c + integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ @@ -80,11 +83,17 @@ subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else keeploc_ = .true. end if + if (present(desc_c)) then + p_desc_c => desc_c + else + p_desc_c => desc_a + end if + call globa%free() if (keepnum_) then nrg = desc_a%get_global_rows() - ncg = desc_a%get_global_rows() + ncg = p_desc_c%get_global_cols() allocate(nzbr(np), idisp(np),stat=info) if (info /= psb_success_) then @@ -102,7 +111,7 @@ subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep end if nzl = loc_coo%get_nzeros() call psb_loc_to_glob(loc_coo%ia(1:nzl),desc_a,info,iact='I') - call psb_loc_to_glob(loc_coo%ja(1:nzl),desc_a,info,iact='I') + call psb_loc_to_glob(loc_coo%ja(1:nzl),p_desc_c,info,iact='I') nzbr(:) = 0 nzbr(me+1) = nzl call psb_sum(ictxt,nzbr(1:np)) diff --git a/base/modules/comm/psb_c_comm_mod.f90 b/base/modules/comm/psb_c_comm_mod.f90 index e14d6673..f2a9f72f 100644 --- a/base/modules/comm/psb_c_comm_mod.f90 +++ b/base/modules/comm/psb_c_comm_mod.f90 @@ -151,15 +151,16 @@ module psb_c_comm_mod end interface psb_scatter interface psb_gather - subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) + subroutine psb_csp_allgather(globa, loca, desc_a, info, root,dupl,keepnum,keeploc,desc_c) import implicit none - type(psb_cspmat_type), intent(inout) :: loca - type(psb_cspmat_type), intent(out) :: globa - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: root,dupl - logical, intent(in), optional :: keepnum,keeploc + type(psb_cspmat_type), intent(inout) :: loca + type(psb_cspmat_type), intent(out) :: globa + type(psb_desc_type), intent(in), target :: desc_a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: root,dupl + logical, intent(in), optional :: keepnum,keeploc + type(psb_desc_type), intent(in), optional, target :: desc_c end subroutine psb_csp_allgather subroutine psb_cgatherm(globx, locx, desc_a, info, root) import diff --git a/base/modules/comm/psb_d_comm_mod.f90 b/base/modules/comm/psb_d_comm_mod.f90 index 7c532dad..5e3a4607 100644 --- a/base/modules/comm/psb_d_comm_mod.f90 +++ b/base/modules/comm/psb_d_comm_mod.f90 @@ -151,15 +151,16 @@ module psb_d_comm_mod end interface psb_scatter interface psb_gather - subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) + subroutine psb_dsp_allgather(globa, loca, desc_a, info, root,dupl,keepnum,keeploc,desc_c) import implicit none - type(psb_dspmat_type), intent(inout) :: loca - type(psb_dspmat_type), intent(out) :: globa - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: root,dupl - logical, intent(in), optional :: keepnum,keeploc + type(psb_dspmat_type), intent(inout) :: loca + type(psb_dspmat_type), intent(out) :: globa + type(psb_desc_type), intent(in), target :: desc_a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: root,dupl + logical, intent(in), optional :: keepnum,keeploc + type(psb_desc_type), intent(in), optional, target :: desc_c end subroutine psb_dsp_allgather subroutine psb_dgatherm(globx, locx, desc_a, info, root) import diff --git a/base/modules/comm/psb_s_comm_mod.f90 b/base/modules/comm/psb_s_comm_mod.f90 index 82c848b7..3c9c31c5 100644 --- a/base/modules/comm/psb_s_comm_mod.f90 +++ b/base/modules/comm/psb_s_comm_mod.f90 @@ -151,15 +151,16 @@ module psb_s_comm_mod end interface psb_scatter interface psb_gather - subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) + subroutine psb_ssp_allgather(globa, loca, desc_a, info, root,dupl,keepnum,keeploc,desc_c) import implicit none - type(psb_sspmat_type), intent(inout) :: loca - type(psb_sspmat_type), intent(out) :: globa - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: root,dupl - logical, intent(in), optional :: keepnum,keeploc + type(psb_sspmat_type), intent(inout) :: loca + type(psb_sspmat_type), intent(out) :: globa + type(psb_desc_type), intent(in), target :: desc_a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: root,dupl + logical, intent(in), optional :: keepnum,keeploc + type(psb_desc_type), intent(in), optional, target :: desc_c end subroutine psb_ssp_allgather subroutine psb_sgatherm(globx, locx, desc_a, info, root) import diff --git a/base/modules/comm/psb_z_comm_mod.f90 b/base/modules/comm/psb_z_comm_mod.f90 index e4a6e9ea..822b6897 100644 --- a/base/modules/comm/psb_z_comm_mod.f90 +++ b/base/modules/comm/psb_z_comm_mod.f90 @@ -151,15 +151,16 @@ module psb_z_comm_mod end interface psb_scatter interface psb_gather - subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) + subroutine psb_zsp_allgather(globa, loca, desc_a, info, root,dupl,keepnum,keeploc,desc_c) import implicit none - type(psb_zspmat_type), intent(inout) :: loca - type(psb_zspmat_type), intent(out) :: globa - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: root,dupl - logical, intent(in), optional :: keepnum,keeploc + type(psb_zspmat_type), intent(inout) :: loca + type(psb_zspmat_type), intent(out) :: globa + type(psb_desc_type), intent(in), target :: desc_a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: root,dupl + logical, intent(in), optional :: keepnum,keeploc + type(psb_desc_type), intent(in), optional, target :: desc_c end subroutine psb_zsp_allgather subroutine psb_zgatherm(globx, locx, desc_a, info, root) import From 8695f9bcdd94b064ed171b69ce2a4ed99c006158 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 17 Jun 2019 08:45:08 +0100 Subject: [PATCH 04/21] Make ADD default action for replicated entries. --- base/modules/psb_const_mod.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/modules/psb_const_mod.F90 b/base/modules/psb_const_mod.F90 index 50e90da2..92d0a5ac 100644 --- a/base/modules/psb_const_mod.F90 +++ b/base/modules/psb_const_mod.F90 @@ -154,10 +154,10 @@ module psb_const_mod ! Duplicate coefficients handling ! These are usually set while calling spcnv as one of its ! optional arugments. - integer(psb_ipk_), parameter :: psb_dupl_ovwrt_ = 0 - integer(psb_ipk_), parameter :: psb_dupl_add_ = 1 + integer(psb_ipk_), parameter :: psb_dupl_add_ = 0 + integer(psb_ipk_), parameter :: psb_dupl_ovwrt_ = 1 integer(psb_ipk_), parameter :: psb_dupl_err_ = 2 - integer(psb_ipk_), parameter :: psb_dupl_def_ = psb_dupl_ovwrt_ + integer(psb_ipk_), parameter :: psb_dupl_def_ = psb_dupl_add_ ! Matrix update mode integer(psb_ipk_), parameter :: psb_upd_srch_ = 98764 integer(psb_ipk_), parameter :: psb_upd_perm_ = 98765 From 160b987ed1f455adeeab05a59aa3bb6fd5a987de Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 25 Jun 2019 15:09:19 +0100 Subject: [PATCH 05/21] Reduce memory footprint in csrspspmm. --- base/serial/impl/psb_c_csr_impl.f90 | 2 +- base/serial/impl/psb_d_csr_impl.f90 | 2 +- base/serial/impl/psb_s_csr_impl.f90 | 2 +- base/serial/impl/psb_z_csr_impl.f90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index 138d2963..1490718d 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -3384,7 +3384,7 @@ subroutine psb_ccsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index 6631cc42..523c1501 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -3384,7 +3384,7 @@ subroutine psb_dcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index c4fb5e1a..75a6c0cc 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -3384,7 +3384,7 @@ subroutine psb_scsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index 3a2768d6..6a691d4f 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -3384,7 +3384,7 @@ subroutine psb_zcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) From b8251d67ce391f1b723c91bccfb9e68767923229 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 25 Jun 2019 15:44:54 +0100 Subject: [PATCH 06/21] Do not use aux memory in spsm_vect. --- base/psblas/psb_cspsm.f90 | 35 +++++------------------------------ base/psblas/psb_dspsm.f90 | 35 +++++------------------------------ base/psblas/psb_sspsm.f90 | 35 +++++------------------------------ base/psblas/psb_zspsm.f90 | 35 +++++------------------------------ 4 files changed, 20 insertions(+), 120 deletions(-) diff --git a/base/psblas/psb_cspsm.f90 b/base/psblas/psb_cspsm.f90 index 5abb6746..21b75cec 100644 --- a/base/psblas/psb_cspsm.f90 +++ b/base/psblas/psb_cspsm.f90 @@ -565,7 +565,7 @@ subroutine psb_cspsv_vect(alpha,a,x,beta,y,desc_a,info,& character :: lscale integer(psb_ipk_), parameter :: nb=4 - complex(psb_spk_),pointer :: iwork(:), xp(:), yp(:) + complex(psb_spk_) :: iwork(1) character :: itrans character(len=20) :: name, ch_err logical :: aliw @@ -636,32 +636,10 @@ subroutine psb_cspsv_vect(alpha,a,x,beta,y,desc_a,info,& goto 9999 end if - iwork => null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_realloc' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - + ! With encapsulated vectors the inner routines + ! do not need the work area, hence liwork is + ! a simple array with 1 entry to keep the interface + ! happy. Might remove it entirely in the future. iwork(1)=0.d0 ! Perform local triangular system solve @@ -682,7 +660,6 @@ subroutine psb_cspsv_vect(alpha,a,x,beta,y,desc_a,info,& call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& & cone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') @@ -690,8 +667,6 @@ subroutine psb_cspsv_vect(alpha,a,x,beta,y,desc_a,info,& end if end if - if (aliw) deallocate(iwork) - call psb_erractionrestore(err_act) return diff --git a/base/psblas/psb_dspsm.f90 b/base/psblas/psb_dspsm.f90 index 79e97663..2165f361 100644 --- a/base/psblas/psb_dspsm.f90 +++ b/base/psblas/psb_dspsm.f90 @@ -565,7 +565,7 @@ subroutine psb_dspsv_vect(alpha,a,x,beta,y,desc_a,info,& character :: lscale integer(psb_ipk_), parameter :: nb=4 - real(psb_dpk_),pointer :: iwork(:), xp(:), yp(:) + real(psb_dpk_) :: iwork(1) character :: itrans character(len=20) :: name, ch_err logical :: aliw @@ -636,32 +636,10 @@ subroutine psb_dspsv_vect(alpha,a,x,beta,y,desc_a,info,& goto 9999 end if - iwork => null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_realloc' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - + ! With encapsulated vectors the inner routines + ! do not need the work area, hence liwork is + ! a simple array with 1 entry to keep the interface + ! happy. Might remove it entirely in the future. iwork(1)=0.d0 ! Perform local triangular system solve @@ -682,7 +660,6 @@ subroutine psb_dspsv_vect(alpha,a,x,beta,y,desc_a,info,& call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& & done,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') @@ -690,8 +667,6 @@ subroutine psb_dspsv_vect(alpha,a,x,beta,y,desc_a,info,& end if end if - if (aliw) deallocate(iwork) - call psb_erractionrestore(err_act) return diff --git a/base/psblas/psb_sspsm.f90 b/base/psblas/psb_sspsm.f90 index 1cf436a5..2d6fa2ac 100644 --- a/base/psblas/psb_sspsm.f90 +++ b/base/psblas/psb_sspsm.f90 @@ -565,7 +565,7 @@ subroutine psb_sspsv_vect(alpha,a,x,beta,y,desc_a,info,& character :: lscale integer(psb_ipk_), parameter :: nb=4 - real(psb_spk_),pointer :: iwork(:), xp(:), yp(:) + real(psb_spk_) :: iwork(1) character :: itrans character(len=20) :: name, ch_err logical :: aliw @@ -636,32 +636,10 @@ subroutine psb_sspsv_vect(alpha,a,x,beta,y,desc_a,info,& goto 9999 end if - iwork => null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_realloc' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - + ! With encapsulated vectors the inner routines + ! do not need the work area, hence liwork is + ! a simple array with 1 entry to keep the interface + ! happy. Might remove it entirely in the future. iwork(1)=0.d0 ! Perform local triangular system solve @@ -682,7 +660,6 @@ subroutine psb_sspsv_vect(alpha,a,x,beta,y,desc_a,info,& call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& & sone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') @@ -690,8 +667,6 @@ subroutine psb_sspsv_vect(alpha,a,x,beta,y,desc_a,info,& end if end if - if (aliw) deallocate(iwork) - call psb_erractionrestore(err_act) return diff --git a/base/psblas/psb_zspsm.f90 b/base/psblas/psb_zspsm.f90 index ea01bdbf..73bb9da9 100644 --- a/base/psblas/psb_zspsm.f90 +++ b/base/psblas/psb_zspsm.f90 @@ -565,7 +565,7 @@ subroutine psb_zspsv_vect(alpha,a,x,beta,y,desc_a,info,& character :: lscale integer(psb_ipk_), parameter :: nb=4 - complex(psb_dpk_),pointer :: iwork(:), xp(:), yp(:) + complex(psb_dpk_) :: iwork(1) character :: itrans character(len=20) :: name, ch_err logical :: aliw @@ -636,32 +636,10 @@ subroutine psb_zspsv_vect(alpha,a,x,beta,y,desc_a,info,& goto 9999 end if - iwork => null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_realloc' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - + ! With encapsulated vectors the inner routines + ! do not need the work area, hence liwork is + ! a simple array with 1 entry to keep the interface + ! happy. Might remove it entirely in the future. iwork(1)=0.d0 ! Perform local triangular system solve @@ -682,7 +660,6 @@ subroutine psb_zspsv_vect(alpha,a,x,beta,y,desc_a,info,& call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& & zone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') @@ -690,8 +667,6 @@ subroutine psb_zspsv_vect(alpha,a,x,beta,y,desc_a,info,& end if end if - if (aliw) deallocate(iwork) - call psb_erractionrestore(err_act) return From 5ae90a0daec9893d03bb192ed8db2f70093b1e73 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 28 Jun 2019 16:24:25 +0100 Subject: [PATCH 07/21] Add argument to cdall to force HASH. --- base/modules/tools/psb_cd_tools_mod.f90 | 7 +-- base/tools/psb_cd_inloc.f90 | 13 ++++-- base/tools/psb_cdall.f90 | 59 ++++++++++++++----------- 3 files changed, 47 insertions(+), 32 deletions(-) diff --git a/base/modules/tools/psb_cd_tools_mod.f90 b/base/modules/tools/psb_cd_tools_mod.f90 index e2fee54a..a9a3be36 100644 --- a/base/modules/tools/psb_cd_tools_mod.f90 +++ b/base/modules/tools/psb_cd_tools_mod.f90 @@ -169,16 +169,17 @@ module psb_cd_tools_mod interface psb_cdall subroutine psb_cdall(ictxt, desc, info,mg,ng,parts,vg,vl,flag,nl,repl,& - & globalcheck,lidx) + & globalcheck,lidx,usehash) import :: psb_ipk_, psb_desc_type, psb_parts implicit None procedure(psb_parts) :: parts integer(psb_ipk_), intent(in) :: mg,ng,ictxt, vg(:), vl(:),nl,lidx(:) integer(psb_ipk_), intent(in) :: flag - logical, intent(in) :: repl, globalcheck + logical, intent(in) :: repl, globalcheck, usehash integer(psb_ipk_), intent(out) :: info type(psb_desc_type), intent(out) :: desc - optional :: mg,ng,parts,vg,vl,flag,nl,repl, globalcheck,lidx + optional :: mg, ng, parts, vg, vl, flag, nl, repl, globalcheck, lidx, usehash + end subroutine psb_cdall end interface diff --git a/base/tools/psb_cd_inloc.f90 b/base/tools/psb_cd_inloc.f90 index 6ea184d1..de80f527 100644 --- a/base/tools/psb_cd_inloc.f90 +++ b/base/tools/psb_cd_inloc.f90 @@ -41,7 +41,7 @@ ! ictxt - integer. The communication context. ! desc - type(psb_desc_type). The communication descriptor. ! info - integer. Eventually returns an error code -subroutine psb_cd_inloc(v, ictxt, desc, info, globalcheck,idx) +subroutine psb_cd_inloc(v, ictxt, desc, info, globalcheck,idx,usehash) use psb_base_mod use psi_mod use psb_repl_map_mod @@ -52,7 +52,7 @@ subroutine psb_cd_inloc(v, ictxt, desc, info, globalcheck,idx) integer(psb_ipk_), intent(in) :: ictxt, v(:) integer(psb_ipk_), intent(out) :: info type(psb_desc_type), intent(out) :: desc - logical, intent(in), optional :: globalcheck + logical, intent(in), optional :: globalcheck,usehash integer(psb_ipk_), intent(in), optional :: idx(:) !locals @@ -65,7 +65,7 @@ subroutine psb_cd_inloc(v, ictxt, desc, info, globalcheck,idx) & nov(:), ov_idx(:,:), ix(:) integer(psb_ipk_) :: debug_level, debug_unit integer(psb_mpik_) :: iictxt - logical :: check_, islarge + logical :: check_, islarge, usehash_ character(len=20) :: name if(psb_get_errstatus() /= 0) return @@ -92,6 +92,11 @@ subroutine psb_cd_inloc(v, ictxt, desc, info, globalcheck,idx) else check_ = .false. end if + if (present(usehash)) then + usehash_ = usehash + else + usehash_ = .false. + end if n = m @@ -351,7 +356,7 @@ subroutine psb_cd_inloc(v, ictxt, desc, info, globalcheck,idx) if (np == 1) then allocate(psb_repl_map :: desc%indxmap, stat=info) else - if (islarge) then + if (islarge.or.usehash_) then allocate(psb_hash_map :: desc%indxmap, stat=info) else allocate(psb_list_map :: desc%indxmap, stat=info) diff --git a/base/tools/psb_cdall.f90 b/base/tools/psb_cdall.f90 index 46aa9d44..557fea2c 100644 --- a/base/tools/psb_cdall.f90 +++ b/base/tools/psb_cdall.f90 @@ -1,4 +1,4 @@ -subroutine psb_cdall(ictxt, desc, info,mg,ng,parts,vg,vl,flag,nl,repl, globalcheck,lidx) +subroutine psb_cdall(ictxt, desc, info,mg,ng,parts,vg,vl,flag,nl,repl,globalcheck,lidx,usehash) use psb_desc_mod use psb_serial_mod use psb_const_mod @@ -7,48 +7,49 @@ subroutine psb_cdall(ictxt, desc, info,mg,ng,parts,vg,vl,flag,nl,repl, globalche use psb_cd_tools_mod, psb_protect_name => psb_cdall use psi_mod implicit None - procedure(psb_parts) :: parts - integer(psb_ipk_), intent(in) :: mg,ng,ictxt, vg(:), vl(:),nl,lidx(:) - integer(psb_ipk_), intent(in) :: flag - logical, intent(in) :: repl, globalcheck - integer(psb_ipk_), intent(out) :: info + procedure(psb_parts) :: parts + integer(psb_ipk_), intent(in) :: mg,ng,ictxt, vg(:), vl(:),nl,lidx(:) + integer(psb_ipk_), intent(in) :: flag + logical, intent(in) :: repl, globalcheck,usehash + integer(psb_ipk_), intent(out) :: info type(psb_desc_type), intent(out) :: desc - optional :: mg,ng,parts,vg,vl,flag,nl,repl, globalcheck,lidx + optional :: mg,ng,parts,vg,vl,flag,nl,repl, globalcheck,lidx, usehash interface subroutine psb_cdals(m, n, parts, ictxt, desc, info) use psb_desc_mod - procedure(psb_parts) :: parts - integer(psb_ipk_), intent(in) :: m,n,ictxt - Type(psb_desc_type), intent(out) :: desc - integer(psb_ipk_), intent(out) :: info + procedure(psb_parts) :: parts + integer(psb_ipk_), intent(in) :: m,n,ictxt + Type(psb_desc_type), intent(out) :: desc + integer(psb_ipk_), intent(out) :: info end subroutine psb_cdals subroutine psb_cdalv(v, ictxt, desc, info, flag) use psb_desc_mod - integer(psb_ipk_), intent(in) :: ictxt, v(:) - integer(psb_ipk_), intent(in), optional :: flag - integer(psb_ipk_), intent(out) :: info - Type(psb_desc_type), intent(out) :: desc + integer(psb_ipk_), intent(in) :: ictxt, v(:) + integer(psb_ipk_), intent(in), optional :: flag + integer(psb_ipk_), intent(out) :: info + Type(psb_desc_type), intent(out) :: desc end subroutine psb_cdalv - subroutine psb_cd_inloc(v, ictxt, desc, info, globalcheck,idx) + subroutine psb_cd_inloc(v, ictxt, desc, info, globalcheck,idx, usehash) use psb_desc_mod implicit None - integer(psb_ipk_), intent(in) :: ictxt, v(:) - integer(psb_ipk_), intent(out) :: info - type(psb_desc_type), intent(out) :: desc - logical, intent(in), optional :: globalcheck - integer(psb_ipk_), intent(in), optional :: idx(:) + integer(psb_ipk_), intent(in) :: ictxt, v(:) + integer(psb_ipk_), intent(out) :: info + type(psb_desc_type), intent(out) :: desc + logical, intent(in), optional :: globalcheck, usehash + integer(psb_ipk_), intent(in), optional :: idx(:) end subroutine psb_cd_inloc subroutine psb_cdrep(m, ictxt, desc,info) use psb_desc_mod - integer(psb_ipk_), intent(in) :: m,ictxt + integer(psb_ipk_), intent(in) :: m,ictxt Type(psb_desc_type), intent(out) :: desc - integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(out) :: info end subroutine psb_cdrep end interface - character(len=20) :: name - integer(psb_ipk_) :: err_act, n_, flag_, i, me, np, nlp, nnv, lr + character(len=20) :: name + integer(psb_ipk_) :: err_act, n_, flag_, i, me, np, nlp, nnv, lr + logical :: usehash_ integer(psb_ipk_), allocatable :: itmpsz(:) integer(psb_mpik_) :: iictxt @@ -130,6 +131,14 @@ subroutine psb_cdall(ictxt, desc, info,mg,ng,parts,vg,vl,flag,nl,repl, globalche else if (present(nl)) then + if (present(usehash)) then + usehash_ = usehash + else + usehash_ = .false. + end if + if (usehash_) then + write(0,*) 'Fix usehash_ implementationt ' + end if if (np == 1) then allocate(psb_repl_map :: desc%indxmap, stat=info) From fcc8a8811203479a11cb3f1c421acdfed1a1850b Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 1 Jul 2019 22:24:32 +0100 Subject: [PATCH 08/21] Limit reallocation to NR or NC --- base/serial/impl/psb_c_csc_impl.f90 | 2 +- base/serial/impl/psb_c_csr_impl.f90 | 4 ++-- base/serial/impl/psb_d_csc_impl.f90 | 2 +- base/serial/impl/psb_d_csr_impl.f90 | 4 ++-- base/serial/impl/psb_s_csc_impl.f90 | 2 +- base/serial/impl/psb_s_csr_impl.f90 | 4 ++-- base/serial/impl/psb_z_csc_impl.f90 | 2 +- base/serial/impl/psb_z_csr_impl.f90 | 4 ++-- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/base/serial/impl/psb_c_csc_impl.f90 b/base/serial/impl/psb_c_csc_impl.f90 index 23e8f4a8..d9483937 100644 --- a/base/serial/impl/psb_c_csc_impl.f90 +++ b/base/serial/impl/psb_c_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_c_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index 1490718d..f137b30d 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -2960,7 +2960,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -3129,7 +3129,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() diff --git a/base/serial/impl/psb_d_csc_impl.f90 b/base/serial/impl/psb_d_csc_impl.f90 index ef68164d..25085229 100644 --- a/base/serial/impl/psb_d_csc_impl.f90 +++ b/base/serial/impl/psb_d_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_d_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index 523c1501..c7882d89 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -2960,7 +2960,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -3129,7 +3129,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() diff --git a/base/serial/impl/psb_s_csc_impl.f90 b/base/serial/impl/psb_s_csc_impl.f90 index 6318db9d..6eb0d830 100644 --- a/base/serial/impl/psb_s_csc_impl.f90 +++ b/base/serial/impl/psb_s_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_s_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index 75a6c0cc..da7bfd87 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -2960,7 +2960,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -3129,7 +3129,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() diff --git a/base/serial/impl/psb_z_csc_impl.f90 b/base/serial/impl/psb_z_csc_impl.f90 index bd14f8a3..2d0fa913 100644 --- a/base/serial/impl/psb_z_csc_impl.f90 +++ b/base/serial/impl/psb_z_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_z_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index 6a691d4f..31fb166a 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -2960,7 +2960,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -3129,7 +3129,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() From 2bd9e4ffa6eb66211ce761f4d8d69542ae590945 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 4 Jul 2019 13:38:59 +0100 Subject: [PATCH 09/21] Print_timers: check for allocated timers. --- base/modules/psb_timers_mod.f90 | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/base/modules/psb_timers_mod.f90 b/base/modules/psb_timers_mod.f90 index 8119d24d..1fe11162 100644 --- a/base/modules/psb_timers_mod.f90 +++ b/base/modules/psb_timers_mod.f90 @@ -127,17 +127,19 @@ contains end if if (global_) then - allocate(ptimers(timer_entries_,size(timers,2)),stat=info) - if (info /= 0) then - write(0,*) 'Error while trying to allocate temporary ',info - call psb_abort(ictxt) - end if - ptimers = timers - call psb_max(ictxt,ptimers) - if (me == psb_root_) then - do i=idxmin_, idxmax_ - call print_timer(me, ptimers(:,i), timers_descr(i), iout) - end do + if (allocated(timers)) then + allocate(ptimers(timer_entries_,size(timers,2)),stat=info) + if (info /= 0) then + write(0,*) 'Error while trying to allocate temporary ',info + call psb_abort(ictxt) + end if + ptimers = timers + call psb_max(ictxt,ptimers) + if (me == psb_root_) then + do i=idxmin_, idxmax_ + call print_timer(me, ptimers(:,i), timers_descr(i), iout) + end do + end if end if else if ((proc_ == -1).or.(me==proc_)) then From 8eee384a400eec2b14540ce235658fd2ece44d3f Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 4 Jul 2019 13:39:28 +0100 Subject: [PATCH 10/21] Take care in reallocate size in CSR/CSC. --- base/serial/impl/psb_c_csc_impl.f90 | 3 +-- base/serial/impl/psb_c_csr_impl.f90 | 5 ++--- base/serial/impl/psb_d_csc_impl.f90 | 3 +-- base/serial/impl/psb_d_csr_impl.f90 | 5 ++--- base/serial/impl/psb_s_csc_impl.f90 | 3 +-- base/serial/impl/psb_s_csr_impl.f90 | 5 ++--- base/serial/impl/psb_z_csc_impl.f90 | 3 +-- base/serial/impl/psb_z_csr_impl.f90 | 5 ++--- 8 files changed, 12 insertions(+), 20 deletions(-) diff --git a/base/serial/impl/psb_c_csc_impl.f90 b/base/serial/impl/psb_c_csc_impl.f90 index d9483937..6491636e 100644 --- a/base/serial/impl/psb_c_csc_impl.f90 +++ b/base/serial/impl/psb_c_csc_impl.f90 @@ -2570,8 +2570,7 @@ subroutine psb_c_csc_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ia,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(max(nz,a%get_nrows()+1,& - & a%get_ncols()+1), a%icp,info) + if (info == psb_success_) call psb_realloc(a%get_ncols()+1, a%icp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index f137b30d..4e820d73 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -1710,8 +1710,7 @@ subroutine psb_c_csr_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ja,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(& - & max(nz,a%get_nrows()+1,a%get_ncols()+1),a%irp,info) + if (info == psb_success_) call psb_realloc(a%get_nrows()+1,a%irp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 @@ -2978,7 +2977,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) call psb_safe_ab_cpy(b%ia,itemp,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) - if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info) + if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) endif diff --git a/base/serial/impl/psb_d_csc_impl.f90 b/base/serial/impl/psb_d_csc_impl.f90 index 25085229..39fb8ff5 100644 --- a/base/serial/impl/psb_d_csc_impl.f90 +++ b/base/serial/impl/psb_d_csc_impl.f90 @@ -2570,8 +2570,7 @@ subroutine psb_d_csc_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ia,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(max(nz,a%get_nrows()+1,& - & a%get_ncols()+1), a%icp,info) + if (info == psb_success_) call psb_realloc(a%get_ncols()+1, a%icp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index c7882d89..85cd1970 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -1710,8 +1710,7 @@ subroutine psb_d_csr_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ja,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(& - & max(nz,a%get_nrows()+1,a%get_ncols()+1),a%irp,info) + if (info == psb_success_) call psb_realloc(a%get_nrows()+1,a%irp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 @@ -2978,7 +2977,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) call psb_safe_ab_cpy(b%ia,itemp,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) - if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info) + if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) endif diff --git a/base/serial/impl/psb_s_csc_impl.f90 b/base/serial/impl/psb_s_csc_impl.f90 index 6eb0d830..a28f3f76 100644 --- a/base/serial/impl/psb_s_csc_impl.f90 +++ b/base/serial/impl/psb_s_csc_impl.f90 @@ -2570,8 +2570,7 @@ subroutine psb_s_csc_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ia,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(max(nz,a%get_nrows()+1,& - & a%get_ncols()+1), a%icp,info) + if (info == psb_success_) call psb_realloc(a%get_ncols()+1, a%icp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index da7bfd87..19a765ff 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -1710,8 +1710,7 @@ subroutine psb_s_csr_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ja,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(& - & max(nz,a%get_nrows()+1,a%get_ncols()+1),a%irp,info) + if (info == psb_success_) call psb_realloc(a%get_nrows()+1,a%irp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 @@ -2978,7 +2977,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) call psb_safe_ab_cpy(b%ia,itemp,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) - if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info) + if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) endif diff --git a/base/serial/impl/psb_z_csc_impl.f90 b/base/serial/impl/psb_z_csc_impl.f90 index 2d0fa913..149d3f54 100644 --- a/base/serial/impl/psb_z_csc_impl.f90 +++ b/base/serial/impl/psb_z_csc_impl.f90 @@ -2570,8 +2570,7 @@ subroutine psb_z_csc_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ia,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(max(nz,a%get_nrows()+1,& - & a%get_ncols()+1), a%icp,info) + if (info == psb_success_) call psb_realloc(a%get_ncols()+1, a%icp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index 31fb166a..f6c3bcc1 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -1710,8 +1710,7 @@ subroutine psb_z_csr_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ja,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(& - & max(nz,a%get_nrows()+1,a%get_ncols()+1),a%irp,info) + if (info == psb_success_) call psb_realloc(a%get_nrows()+1,a%irp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 @@ -2978,7 +2977,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) call psb_safe_ab_cpy(b%ia,itemp,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) - if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info) + if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) endif From 486c0338c5ee8bd029dd895b8013601960a4aef1 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 8 Jul 2019 12:59:27 +0100 Subject: [PATCH 11/21] Minimize memory requirements in borderline cases --- base/serial/impl/psb_c_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_d_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_s_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_z_coo_impl.f90 | 23 ++++++++++++++++++----- 4 files changed, 72 insertions(+), 20 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.f90 b/base/serial/impl/psb_c_coo_impl.f90 index 695e3583..3d0db0b3 100644 --- a/base/serial/impl/psb_c_coo_impl.f90 +++ b/base/serial/impl/psb_c_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_d_coo_impl.f90 b/base/serial/impl/psb_d_coo_impl.f90 index 47c0b107..89aa8b97 100644 --- a/base/serial/impl/psb_d_coo_impl.f90 +++ b/base/serial/impl/psb_d_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_s_coo_impl.f90 b/base/serial/impl/psb_s_coo_impl.f90 index b80a8a99..d327e12e 100644 --- a/base/serial/impl/psb_s_coo_impl.f90 +++ b/base/serial/impl/psb_s_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_z_coo_impl.f90 b/base/serial/impl/psb_z_coo_impl.f90 index e1883e03..d5ee9740 100644 --- a/base/serial/impl/psb_z_coo_impl.f90 +++ b/base/serial/impl/psb_z_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then From 849f198b61595dc17ffb475637a21ff2d37df6b4 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 8 Jul 2019 15:28:51 +0100 Subject: [PATCH 12/21] *** empty log message *** --- cbind/test/pargen/ppdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cbind/test/pargen/ppdec.c b/cbind/test/pargen/ppdec.c index 32c5b600..2e803505 100644 --- a/cbind/test/pargen/ppdec.c +++ b/cbind/test/pargen/ppdec.c @@ -131,7 +131,7 @@ int matgen(int ictxt, int ng,int idim,int vg[],psb_c_dspmat *ah,psb_c_descriptor info = 0; psb_c_info(ictxt,&iam,&np); - deltah = (double) 1.0/(idim+2); + deltah = (double) 1.0/(idim+1); sqdeltah = deltah*deltah; deltah2 = 2.0* deltah; psb_c_set_index_base(0); From 5e03cacdec1012140278185def4667b6a00ab1fc Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 19 Jul 2019 14:37:36 +0100 Subject: [PATCH 13/21] New SCAN collective, only for SUM right now. New arg in CDALL to force HASH over GEN_BLOCK --- base/modules/psi_reduce_mod.F90 | 63 +++++++++++++++++++++++++++++++++ base/tools/psb_cdall.f90 | 49 +++++++++++++------------ 2 files changed, 90 insertions(+), 22 deletions(-) diff --git a/base/modules/psi_reduce_mod.F90 b/base/modules/psi_reduce_mod.F90 index 597d8f8e..8322df55 100644 --- a/base/modules/psi_reduce_mod.F90 +++ b/base/modules/psi_reduce_mod.F90 @@ -182,6 +182,14 @@ module psi_reduce_mod end interface #endif + interface psb_scan_sum + module procedure psb_iscan_sums + end interface psb_scan_sum + + interface psb_exscan_sum + module procedure psb_iexscan_sums + end interface psb_exscan_sum + contains @@ -5586,4 +5594,59 @@ contains end subroutine psb_d_nrm2v_ic #endif + + + ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! + ! SCAN + ! + ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + subroutine psb_iscan_sums(ictxt,dat) +#ifdef MPI_MOD + use mpi +#endif + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + integer(psb_mpik_), intent(in) :: ictxt + integer(psb_ipk_), intent(inout) :: dat + integer(psb_ipk_) :: dat_ + integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_) :: iinfo + + +#if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) + call mpi_scan(dat,dat_,1,psb_mpi_ipk_integer,mpi_sum,ictxt,info) + dat = dat_ +#endif + end subroutine psb_iscan_sums + + + subroutine psb_iexscan_sums(ictxt,dat) +#ifdef MPI_MOD + use mpi +#endif + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + integer(psb_mpik_), intent(in) :: ictxt + integer(psb_ipk_), intent(inout) :: dat + integer(psb_ipk_) :: dat_ + integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_) :: iinfo + + +#if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) + call mpi_scan(dat,dat_,1,psb_mpi_ipk_integer,mpi_sum,ictxt,info) + dat = dat_ +#else + dat = 0 +#endif + end subroutine psb_iexscan_sums + end module psi_reduce_mod diff --git a/base/tools/psb_cdall.f90 b/base/tools/psb_cdall.f90 index 557fea2c..bebf51bb 100644 --- a/base/tools/psb_cdall.f90 +++ b/base/tools/psb_cdall.f90 @@ -50,7 +50,7 @@ subroutine psb_cdall(ictxt, desc, info,mg,ng,parts,vg,vl,flag,nl,repl,globalchec character(len=20) :: name integer(psb_ipk_) :: err_act, n_, flag_, i, me, np, nlp, nnv, lr logical :: usehash_ - integer(psb_ipk_), allocatable :: itmpsz(:) + integer(psb_ipk_), allocatable :: itmpv(:), lvl(:) integer(psb_mpik_) :: iictxt @@ -136,35 +136,40 @@ subroutine psb_cdall(ictxt, desc, info,mg,ng,parts,vg,vl,flag,nl,repl,globalchec else usehash_ = .false. end if - if (usehash_) then - write(0,*) 'Fix usehash_ implementationt ' - end if - if (np == 1) then - allocate(psb_repl_map :: desc%indxmap, stat=info) + if (usehash_) then + nlp = nl + call psb_exscan_sum(ictxt,nlp) + lvl = [ (i,i=1,nl) ] + nlp + call psb_cd_inloc(lvl(1:nl),ictxt,desc,info, globalcheck=.false.) + else - allocate(psb_gen_block_map :: desc%indxmap, stat=info) - end if - if (info == psb_success_) then - select type(aa => desc%indxmap) - type is (psb_repl_map) - call aa%repl_map_init(iictxt,nl,info) - type is (psb_gen_block_map) - call aa%gen_block_map_init(iictxt,nl,info) - class default - ! This cannot happen - info = psb_err_internal_error_ - goto 9999 - end select + if (np == 1) then + allocate(psb_repl_map :: desc%indxmap, stat=info) + else + allocate(psb_gen_block_map :: desc%indxmap, stat=info) + end if + if (info == psb_success_) then + select type(aa => desc%indxmap) + type is (psb_repl_map) + call aa%repl_map_init(iictxt,nl,info) + type is (psb_gen_block_map) + call aa%gen_block_map_init(iictxt,nl,info) + class default + ! This cannot happen + info = psb_err_internal_error_ + goto 9999 + end select + end if end if - call psb_realloc(1,itmpsz, info) + call psb_realloc(1,itmpv, info) if (info /= 0) then write(0,*) 'Error reallocating itmspz' goto 9999 end if - itmpsz(:) = -1 - call psi_bld_tmpovrl(itmpsz,desc,info) + itmpv(:) = -1 + call psi_bld_tmpovrl(itmpv,desc,info) endif From 8fd9f70626dd403146771e354df300103dfdcca9 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 19 Jul 2019 14:37:49 +0100 Subject: [PATCH 14/21] csrd has xt_tri. --- base/modules/serial/psb_c_csr_mat_mod.f90 | 12 ++++++++++++ base/modules/serial/psb_d_csr_mat_mod.f90 | 12 ++++++++++++ base/modules/serial/psb_s_csr_mat_mod.f90 | 12 ++++++++++++ base/modules/serial/psb_z_csr_mat_mod.f90 | 12 ++++++++++++ 4 files changed, 48 insertions(+) diff --git a/base/modules/serial/psb_c_csr_mat_mod.f90 b/base/modules/serial/psb_c_csr_mat_mod.f90 index 0cbe5218..e9ac3247 100644 --- a/base/modules/serial/psb_c_csr_mat_mod.f90 +++ b/base/modules/serial/psb_c_csr_mat_mod.f90 @@ -624,6 +624,7 @@ module psb_c_csr_mat_mod procedure, pass(a) :: trim => psb_c_csrd_trim procedure, pass(a) :: free => c_csrd_free procedure, pass(a) :: mold => psb_c_csrd_mold + procedure, nopass :: has_xt_tri => c_csrd_has_xt_tri end type psb_c_csrd_sparse_mat @@ -872,5 +873,16 @@ contains end subroutine c_csrd_free + ! + ! has_xt_tri: does the current type support + ! extended triangle operations? + ! + function c_csrd_has_xt_tri() result(res) + implicit none + logical :: res + + res = .true. + end function c_csrd_has_xt_tri + end module psb_c_csr_mat_mod diff --git a/base/modules/serial/psb_d_csr_mat_mod.f90 b/base/modules/serial/psb_d_csr_mat_mod.f90 index 8b573652..cac76757 100644 --- a/base/modules/serial/psb_d_csr_mat_mod.f90 +++ b/base/modules/serial/psb_d_csr_mat_mod.f90 @@ -624,6 +624,7 @@ module psb_d_csr_mat_mod procedure, pass(a) :: trim => psb_d_csrd_trim procedure, pass(a) :: free => d_csrd_free procedure, pass(a) :: mold => psb_d_csrd_mold + procedure, nopass :: has_xt_tri => d_csrd_has_xt_tri end type psb_d_csrd_sparse_mat @@ -872,5 +873,16 @@ contains end subroutine d_csrd_free + ! + ! has_xt_tri: does the current type support + ! extended triangle operations? + ! + function d_csrd_has_xt_tri() result(res) + implicit none + logical :: res + + res = .true. + end function d_csrd_has_xt_tri + end module psb_d_csr_mat_mod diff --git a/base/modules/serial/psb_s_csr_mat_mod.f90 b/base/modules/serial/psb_s_csr_mat_mod.f90 index 2432481c..a6c40cd7 100644 --- a/base/modules/serial/psb_s_csr_mat_mod.f90 +++ b/base/modules/serial/psb_s_csr_mat_mod.f90 @@ -624,6 +624,7 @@ module psb_s_csr_mat_mod procedure, pass(a) :: trim => psb_s_csrd_trim procedure, pass(a) :: free => s_csrd_free procedure, pass(a) :: mold => psb_s_csrd_mold + procedure, nopass :: has_xt_tri => s_csrd_has_xt_tri end type psb_s_csrd_sparse_mat @@ -872,5 +873,16 @@ contains end subroutine s_csrd_free + ! + ! has_xt_tri: does the current type support + ! extended triangle operations? + ! + function s_csrd_has_xt_tri() result(res) + implicit none + logical :: res + + res = .true. + end function s_csrd_has_xt_tri + end module psb_s_csr_mat_mod diff --git a/base/modules/serial/psb_z_csr_mat_mod.f90 b/base/modules/serial/psb_z_csr_mat_mod.f90 index 341dc77a..c42e6c5f 100644 --- a/base/modules/serial/psb_z_csr_mat_mod.f90 +++ b/base/modules/serial/psb_z_csr_mat_mod.f90 @@ -624,6 +624,7 @@ module psb_z_csr_mat_mod procedure, pass(a) :: trim => psb_z_csrd_trim procedure, pass(a) :: free => z_csrd_free procedure, pass(a) :: mold => psb_z_csrd_mold + procedure, nopass :: has_xt_tri => z_csrd_has_xt_tri end type psb_z_csrd_sparse_mat @@ -872,5 +873,16 @@ contains end subroutine z_csrd_free + ! + ! has_xt_tri: does the current type support + ! extended triangle operations? + ! + function z_csrd_has_xt_tri() result(res) + implicit none + logical :: res + + res = .true. + end function z_csrd_has_xt_tri + end module psb_z_csr_mat_mod From 0460968f5d6f9678765c6a84b906c853ca4efc31 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 19 Jul 2019 14:38:02 +0100 Subject: [PATCH 15/21] Partial fix for ROOT in SPGATHER. --- base/comm/psb_cspgather.F90 | 7 ++++++- base/comm/psb_dspgather.F90 | 7 ++++++- base/comm/psb_sspgather.F90 | 7 ++++++- base/comm/psb_zspgather.F90 | 7 ++++++- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/base/comm/psb_cspgather.F90 b/base/comm/psb_cspgather.F90 index be1ddbe0..c4ea2728 100644 --- a/base/comm/psb_cspgather.F90 +++ b/base/comm/psb_cspgather.F90 @@ -57,7 +57,7 @@ subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ - integer(psb_mpik_) :: ictxt,np,me + integer(psb_mpik_) :: ictxt,np,me, root_ integer(psb_mpik_) :: icomm, minfo, ndx integer(psb_mpik_), allocatable :: nzbr(:), idisp(:) integer(psb_ipk_) :: ierr(5) @@ -88,6 +88,11 @@ subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else p_desc_c => desc_a end if + if (present(root)) then + root_ = root + else + root_ = -1 + end if call globa%free() diff --git a/base/comm/psb_dspgather.F90 b/base/comm/psb_dspgather.F90 index 59d0d080..f928d822 100644 --- a/base/comm/psb_dspgather.F90 +++ b/base/comm/psb_dspgather.F90 @@ -57,7 +57,7 @@ subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ - integer(psb_mpik_) :: ictxt,np,me + integer(psb_mpik_) :: ictxt,np,me, root_ integer(psb_mpik_) :: icomm, minfo, ndx integer(psb_mpik_), allocatable :: nzbr(:), idisp(:) integer(psb_ipk_) :: ierr(5) @@ -88,6 +88,11 @@ subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else p_desc_c => desc_a end if + if (present(root)) then + root_ = root + else + root_ = -1 + end if call globa%free() diff --git a/base/comm/psb_sspgather.F90 b/base/comm/psb_sspgather.F90 index b974a2e7..e09eb26b 100644 --- a/base/comm/psb_sspgather.F90 +++ b/base/comm/psb_sspgather.F90 @@ -57,7 +57,7 @@ subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ - integer(psb_mpik_) :: ictxt,np,me + integer(psb_mpik_) :: ictxt,np,me, root_ integer(psb_mpik_) :: icomm, minfo, ndx integer(psb_mpik_), allocatable :: nzbr(:), idisp(:) integer(psb_ipk_) :: ierr(5) @@ -88,6 +88,11 @@ subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else p_desc_c => desc_a end if + if (present(root)) then + root_ = root + else + root_ = -1 + end if call globa%free() diff --git a/base/comm/psb_zspgather.F90 b/base/comm/psb_zspgather.F90 index 7bcb72e7..090b568d 100644 --- a/base/comm/psb_zspgather.F90 +++ b/base/comm/psb_zspgather.F90 @@ -57,7 +57,7 @@ subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ - integer(psb_mpik_) :: ictxt,np,me + integer(psb_mpik_) :: ictxt,np,me, root_ integer(psb_mpik_) :: icomm, minfo, ndx integer(psb_mpik_), allocatable :: nzbr(:), idisp(:) integer(psb_ipk_) :: ierr(5) @@ -88,6 +88,11 @@ subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else p_desc_c => desc_a end if + if (present(root)) then + root_ = root + else + root_ = -1 + end if call globa%free() From ffb6a6dd68e12eb0d08003b693338ce7e84e40d4 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 22 Jul 2019 17:16:10 +0100 Subject: [PATCH 16/21] Fix handling of SYMmetric --- base/modules/serial/psb_base_mat_mod.f90 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base/modules/serial/psb_base_mat_mod.f90 b/base/modules/serial/psb_base_mat_mod.f90 index 9d6eb75c..cf87a4e7 100644 --- a/base/modules/serial/psb_base_mat_mod.f90 +++ b/base/modules/serial/psb_base_mat_mod.f90 @@ -770,6 +770,7 @@ contains b%state = a%state b%duplicate = a%duplicate b%triangle = a%triangle + b%symmetric = a%symmetric b%unitd = a%unitd b%upper = .not.a%upper b%sorted = .false. @@ -789,6 +790,7 @@ contains b%state = a%state b%duplicate = a%duplicate b%triangle = a%triangle + b%symmetric = a%symmetric b%unitd = a%unitd b%upper = .not.a%upper b%sorted = .false. @@ -808,6 +810,7 @@ contains a%state = a%state a%duplicate = a%duplicate a%triangle = a%triangle + a%symmetric = a%symmetric a%unitd = a%unitd a%upper = .not.a%upper a%sorted = .false. From e0c40e042ea97c2a6898e634a55e09fd58d1042c Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 29 Jul 2019 14:17:32 +0100 Subject: [PATCH 17/21] SAVE module variables for timers. --- base/modules/psb_timers_mod.f90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base/modules/psb_timers_mod.f90 b/base/modules/psb_timers_mod.f90 index 1fe11162..5b2a8054 100644 --- a/base/modules/psb_timers_mod.f90 +++ b/base/modules/psb_timers_mod.f90 @@ -63,7 +63,8 @@ module psb_timers_mod type(psb_string_item), allocatable :: timers_descr(:) logical :: wanted(timer_entries_) type(psb_string_item) :: entries_descr(timer_entries_) - + save :: nsamples, timers, timers_descr, wanted, entries_descr + interface psb_realloc module procedure psb_string_item_realloc end interface psb_realloc From f79104a6d15babe765bba0c9d7b161e3d3eefd53 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 2 Aug 2019 09:35:34 +0100 Subject: [PATCH 18/21] Fix return statement in FCG --- krylov/psb_cfcg.F90 | 2 +- krylov/psb_dfcg.F90 | 2 +- krylov/psb_sfcg.F90 | 2 +- krylov/psb_zfcg.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/krylov/psb_cfcg.F90 b/krylov/psb_cfcg.F90 index 1f2d895d..09ab8127 100644 --- a/krylov/psb_cfcg.F90 +++ b/krylov/psb_cfcg.F90 @@ -305,7 +305,7 @@ subroutine psb_cfcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_end_conv(methdname,itx ,desc_a,stopdat,info,derr,iter) if (present(err)) err = derr - + return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then diff --git a/krylov/psb_dfcg.F90 b/krylov/psb_dfcg.F90 index bdb336d5..6f664238 100644 --- a/krylov/psb_dfcg.F90 +++ b/krylov/psb_dfcg.F90 @@ -305,7 +305,7 @@ subroutine psb_dfcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_end_conv(methdname,itx ,desc_a,stopdat,info,derr,iter) if (present(err)) err = derr - + return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then diff --git a/krylov/psb_sfcg.F90 b/krylov/psb_sfcg.F90 index 5b4e6957..c5b883cc 100644 --- a/krylov/psb_sfcg.F90 +++ b/krylov/psb_sfcg.F90 @@ -305,7 +305,7 @@ subroutine psb_sfcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_end_conv(methdname,itx ,desc_a,stopdat,info,derr,iter) if (present(err)) err = derr - + return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then diff --git a/krylov/psb_zfcg.F90 b/krylov/psb_zfcg.F90 index cb9289bd..ca856f6b 100644 --- a/krylov/psb_zfcg.F90 +++ b/krylov/psb_zfcg.F90 @@ -305,7 +305,7 @@ subroutine psb_zfcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_end_conv(methdname,itx ,desc_a,stopdat,info,derr,iter) if (present(err)) err = derr - + return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then From 0309f00ada48df9fdfd8ca628264487450f5365b Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 7 Aug 2019 16:22:57 +0100 Subject: [PATCH 19/21] New psb_par_spspmm method for parallel triple matrix product. --- base/modules/tools/psb_c_tools_mod.f90 | 15 ++- base/modules/tools/psb_d_tools_mod.f90 | 15 ++- base/modules/tools/psb_s_tools_mod.f90 | 15 ++- base/modules/tools/psb_z_tools_mod.f90 | 15 ++- base/tools/Makefile | 5 +- base/tools/psb_c_par_csr_spspmm.f90 | 156 +++++++++++++++++++++++++ base/tools/psb_d_par_csr_spspmm.f90 | 156 +++++++++++++++++++++++++ base/tools/psb_s_par_csr_spspmm.f90 | 156 +++++++++++++++++++++++++ base/tools/psb_z_par_csr_spspmm.f90 | 156 +++++++++++++++++++++++++ 9 files changed, 684 insertions(+), 5 deletions(-) create mode 100644 base/tools/psb_c_par_csr_spspmm.f90 create mode 100644 base/tools/psb_d_par_csr_spspmm.f90 create mode 100644 base/tools/psb_s_par_csr_spspmm.f90 create mode 100644 base/tools/psb_z_par_csr_spspmm.f90 diff --git a/base/modules/tools/psb_c_tools_mod.f90 b/base/modules/tools/psb_c_tools_mod.f90 index 2259cfaa..7dfa1585 100644 --- a/base/modules/tools/psb_c_tools_mod.f90 +++ b/base/modules/tools/psb_c_tools_mod.f90 @@ -363,7 +363,6 @@ Module psb_c_tools_mod end subroutine psb_cspins_2desc end interface - interface psb_sprn subroutine psb_csprn(a, desc_a,info,clear) import @@ -374,5 +373,19 @@ Module psb_c_tools_mod logical, intent(in), optional :: clear end subroutine psb_csprn end interface + + interface psb_par_spspmm + subroutine psb_c_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + import :: psb_c_csr_sparse_mat, psb_desc_type, psb_ipk_ + Implicit None + type(psb_c_csr_sparse_mat),intent(in) :: acsr + type(psb_c_csr_sparse_mat),intent(inout) :: bcsr + type(psb_c_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + End Subroutine psb_c_par_csr_spspmm + end interface psb_par_spspmm end module psb_c_tools_mod diff --git a/base/modules/tools/psb_d_tools_mod.f90 b/base/modules/tools/psb_d_tools_mod.f90 index 8f0e4824..94e9762c 100644 --- a/base/modules/tools/psb_d_tools_mod.f90 +++ b/base/modules/tools/psb_d_tools_mod.f90 @@ -363,7 +363,6 @@ Module psb_d_tools_mod end subroutine psb_dspins_2desc end interface - interface psb_sprn subroutine psb_dsprn(a, desc_a,info,clear) import @@ -374,5 +373,19 @@ Module psb_d_tools_mod logical, intent(in), optional :: clear end subroutine psb_dsprn end interface + + interface psb_par_spspmm + subroutine psb_d_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + import :: psb_d_csr_sparse_mat, psb_desc_type, psb_ipk_ + Implicit None + type(psb_d_csr_sparse_mat),intent(in) :: acsr + type(psb_d_csr_sparse_mat),intent(inout) :: bcsr + type(psb_d_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + End Subroutine psb_d_par_csr_spspmm + end interface psb_par_spspmm end module psb_d_tools_mod diff --git a/base/modules/tools/psb_s_tools_mod.f90 b/base/modules/tools/psb_s_tools_mod.f90 index a02e4629..6ba4fd5b 100644 --- a/base/modules/tools/psb_s_tools_mod.f90 +++ b/base/modules/tools/psb_s_tools_mod.f90 @@ -363,7 +363,6 @@ Module psb_s_tools_mod end subroutine psb_sspins_2desc end interface - interface psb_sprn subroutine psb_ssprn(a, desc_a,info,clear) import @@ -374,5 +373,19 @@ Module psb_s_tools_mod logical, intent(in), optional :: clear end subroutine psb_ssprn end interface + + interface psb_par_spspmm + subroutine psb_s_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + import :: psb_s_csr_sparse_mat, psb_desc_type, psb_ipk_ + Implicit None + type(psb_s_csr_sparse_mat),intent(in) :: acsr + type(psb_s_csr_sparse_mat),intent(inout) :: bcsr + type(psb_s_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + End Subroutine psb_s_par_csr_spspmm + end interface psb_par_spspmm end module psb_s_tools_mod diff --git a/base/modules/tools/psb_z_tools_mod.f90 b/base/modules/tools/psb_z_tools_mod.f90 index a3d2bed1..b5dca25a 100644 --- a/base/modules/tools/psb_z_tools_mod.f90 +++ b/base/modules/tools/psb_z_tools_mod.f90 @@ -363,7 +363,6 @@ Module psb_z_tools_mod end subroutine psb_zspins_2desc end interface - interface psb_sprn subroutine psb_zsprn(a, desc_a,info,clear) import @@ -374,5 +373,19 @@ Module psb_z_tools_mod logical, intent(in), optional :: clear end subroutine psb_zsprn end interface + + interface psb_par_spspmm + subroutine psb_z_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + import :: psb_z_csr_sparse_mat, psb_desc_type, psb_ipk_ + Implicit None + type(psb_z_csr_sparse_mat),intent(in) :: acsr + type(psb_z_csr_sparse_mat),intent(inout) :: bcsr + type(psb_z_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + End Subroutine psb_z_par_csr_spspmm + end interface psb_par_spspmm end module psb_z_tools_mod diff --git a/base/tools/Makefile b/base/tools/Makefile index fc3e7411..12cf0099 100644 --- a/base/tools/Makefile +++ b/base/tools/Makefile @@ -19,7 +19,10 @@ FOBJS = psb_sallc.o psb_sasb.o \ psb_cspalloc.o psb_cspasb.o psb_cspfree.o\ psb_callc.o psb_casb.o psb_cfree.o psb_cins.o \ psb_cspins.o psb_csprn.o psb_cd_set_bld.o \ - psb_s_map.o psb_d_map.o psb_c_map.o psb_z_map.o + psb_s_map.o psb_d_map.o psb_c_map.o psb_z_map.o \ + psb_s_par_csr_spspmm.o psb_d_par_csr_spspmm.o \ + psb_c_par_csr_spspmm.o psb_z_par_csr_spspmm.o + MPFOBJS = psb_icdasb.o psb_ssphalo.o psb_dsphalo.o psb_csphalo.o psb_zsphalo.o \ psb_dcdbldext.o psb_zcdbldext.o psb_scdbldext.o psb_ccdbldext.o diff --git a/base/tools/psb_c_par_csr_spspmm.f90 b/base/tools/psb_c_par_csr_spspmm.f90 new file mode 100644 index 00000000..ba4cdb7a --- /dev/null +++ b/base/tools/psb_c_par_csr_spspmm.f90 @@ -0,0 +1,156 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_c_par_csr_spspmm.f90 +! +! Subroutine: psb_c_par_csr_spspmm +! Version: complex +! +! This routine computes a parallel product of two sparse matrices +! +! C = A * B +! +! where all the matrices are stored in CSR. On input and output the matrices +! are stored with column indices in local numbering, but inermediate quantities +! are in global numbering because gathering the halo of B to multiply it +! by A implies a potential enlargement of the support. +! Also, B may have a column index space different from its row index space, +! which is obviously the same as the column space of A. +! +! +! Arguments: +! acsr - type(psb_c_csr_sparse_mat), input. +! The sparse matrix structure A +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the column space of A +! bcsr - type(psb_c_csr_sparse_mat), input/output. +! The sparse matrix structure B, gets row-extended on output +! ccsr - type(psb_c_csr_sparse_mat), output +! The sparse matrix structure C +! desc_c - type(psb_desc_type), input/output. +! The communication descriptor of the column space of B +! +! info - integer, output. +! Error code. +! + +Subroutine psb_c_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + use psb_base_mod, psb_protect_name => psb_c_par_csr_spspmm + Implicit None + + type(psb_c_csr_sparse_mat),intent(in) :: acsr + type(psb_c_csr_sparse_mat),intent(inout) :: bcsr + type(psb_c_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + ! ...local scalars.... + integer(psb_ipk_) :: ictxt, np,me + integer(psb_ipk_) :: ncol, nnz + type(psb_c_csr_sparse_mat) :: tcsr1 + logical :: update_desc_c + integer(psb_ipk_) :: debug_level, debug_unit, err_act + character(len=20) :: name, ch_err + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + name='psb_c_p_csr_spspmm' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),': Start' + + update_desc_c = desc_c%is_bld() + + ! + ! This is a bit tricky. + ! DESC_A is the descriptor of (the columns of) A, and therefore + ! of the rows of B; the columns of B, in the intended usage, span + ! a different space for which we have DESC_C. + ! We are gathering the halo rows of B to multiply by A; + ! now, the columns of B would ideally be kept in + ! global numbering, so that we can call this repeatedly to accumulate + ! the product of multiple operators, and convert to local numbering + ! at the last possible moment. However, this would imply calling + ! the serial SPSPMM with a matrix B with the GLOBAL number of columns + ! and this could be very expensive in memory. The solution is to keep B + ! in local numbering, so that only columns really appearing count, but to + ! expand the descriptor when gathering the halo, because by performing + ! the products we are extending the support of the operator; hence + ! this routine is intended to be called with a temporary descriptor + ! DESC_C which is in the BUILD state, to allow for such expansion + ! across multiple products. + ! The caller will at some later point finalize the descriptor DESC_C. + ! + + ncol = desc_a%get_local_cols() + call psb_sphalo(bcsr,desc_a,tcsr1,info,& + & colcnv=.true.,rowscale=.true.,outcol_glob=.true.,col_desc=desc_c,data=data) + nnz = tcsr1%get_nzeros() + if (update_desc_c) then + call desc_c%indxmap%g2lip_ins(tcsr1%ja(1:nnz),info) + else + call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) + end if + if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr1) + if (info == psb_success_) call tcsr1%free() + if(info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') + goto 9999 + end if + call bcsr%set_ncols(desc_c%get_local_cols()) + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm 3' + if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() + call psb_spspmm(acsr,bcsr,ccsr,info) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +End Subroutine psb_c_par_csr_spspmm diff --git a/base/tools/psb_d_par_csr_spspmm.f90 b/base/tools/psb_d_par_csr_spspmm.f90 new file mode 100644 index 00000000..3d13de7d --- /dev/null +++ b/base/tools/psb_d_par_csr_spspmm.f90 @@ -0,0 +1,156 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_d_par_csr_spspmm.f90 +! +! Subroutine: psb_d_par_csr_spspmm +! Version: real +! +! This routine computes a parallel product of two sparse matrices +! +! C = A * B +! +! where all the matrices are stored in CSR. On input and output the matrices +! are stored with column indices in local numbering, but inermediate quantities +! are in global numbering because gathering the halo of B to multiply it +! by A implies a potential enlargement of the support. +! Also, B may have a column index space different from its row index space, +! which is obviously the same as the column space of A. +! +! +! Arguments: +! acsr - type(psb_d_csr_sparse_mat), input. +! The sparse matrix structure A +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the column space of A +! bcsr - type(psb_d_csr_sparse_mat), input/output. +! The sparse matrix structure B, gets row-extended on output +! ccsr - type(psb_d_csr_sparse_mat), output +! The sparse matrix structure C +! desc_c - type(psb_desc_type), input/output. +! The communication descriptor of the column space of B +! +! info - integer, output. +! Error code. +! + +Subroutine psb_d_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + use psb_base_mod, psb_protect_name => psb_d_par_csr_spspmm + Implicit None + + type(psb_d_csr_sparse_mat),intent(in) :: acsr + type(psb_d_csr_sparse_mat),intent(inout) :: bcsr + type(psb_d_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + ! ...local scalars.... + integer(psb_ipk_) :: ictxt, np,me + integer(psb_ipk_) :: ncol, nnz + type(psb_d_csr_sparse_mat) :: tcsr1 + logical :: update_desc_c + integer(psb_ipk_) :: debug_level, debug_unit, err_act + character(len=20) :: name, ch_err + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + name='psb_d_p_csr_spspmm' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),': Start' + + update_desc_c = desc_c%is_bld() + + ! + ! This is a bit tricky. + ! DESC_A is the descriptor of (the columns of) A, and therefore + ! of the rows of B; the columns of B, in the intended usage, span + ! a different space for which we have DESC_C. + ! We are gathering the halo rows of B to multiply by A; + ! now, the columns of B would ideally be kept in + ! global numbering, so that we can call this repeatedly to accumulate + ! the product of multiple operators, and convert to local numbering + ! at the last possible moment. However, this would imply calling + ! the serial SPSPMM with a matrix B with the GLOBAL number of columns + ! and this could be very expensive in memory. The solution is to keep B + ! in local numbering, so that only columns really appearing count, but to + ! expand the descriptor when gathering the halo, because by performing + ! the products we are extending the support of the operator; hence + ! this routine is intended to be called with a temporary descriptor + ! DESC_C which is in the BUILD state, to allow for such expansion + ! across multiple products. + ! The caller will at some later point finalize the descriptor DESC_C. + ! + + ncol = desc_a%get_local_cols() + call psb_sphalo(bcsr,desc_a,tcsr1,info,& + & colcnv=.true.,rowscale=.true.,outcol_glob=.true.,col_desc=desc_c,data=data) + nnz = tcsr1%get_nzeros() + if (update_desc_c) then + call desc_c%indxmap%g2lip_ins(tcsr1%ja(1:nnz),info) + else + call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) + end if + if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr1) + if (info == psb_success_) call tcsr1%free() + if(info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') + goto 9999 + end if + call bcsr%set_ncols(desc_c%get_local_cols()) + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm 3' + if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() + call psb_spspmm(acsr,bcsr,ccsr,info) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +End Subroutine psb_d_par_csr_spspmm diff --git a/base/tools/psb_s_par_csr_spspmm.f90 b/base/tools/psb_s_par_csr_spspmm.f90 new file mode 100644 index 00000000..30530b41 --- /dev/null +++ b/base/tools/psb_s_par_csr_spspmm.f90 @@ -0,0 +1,156 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_s_par_csr_spspmm.f90 +! +! Subroutine: psb_s_par_csr_spspmm +! Version: real +! +! This routine computes a parallel product of two sparse matrices +! +! C = A * B +! +! where all the matrices are stored in CSR. On input and output the matrices +! are stored with column indices in local numbering, but inermediate quantities +! are in global numbering because gathering the halo of B to multiply it +! by A implies a potential enlargement of the support. +! Also, B may have a column index space different from its row index space, +! which is obviously the same as the column space of A. +! +! +! Arguments: +! acsr - type(psb_s_csr_sparse_mat), input. +! The sparse matrix structure A +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the column space of A +! bcsr - type(psb_s_csr_sparse_mat), input/output. +! The sparse matrix structure B, gets row-extended on output +! ccsr - type(psb_s_csr_sparse_mat), output +! The sparse matrix structure C +! desc_c - type(psb_desc_type), input/output. +! The communication descriptor of the column space of B +! +! info - integer, output. +! Error code. +! + +Subroutine psb_s_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + use psb_base_mod, psb_protect_name => psb_s_par_csr_spspmm + Implicit None + + type(psb_s_csr_sparse_mat),intent(in) :: acsr + type(psb_s_csr_sparse_mat),intent(inout) :: bcsr + type(psb_s_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + ! ...local scalars.... + integer(psb_ipk_) :: ictxt, np,me + integer(psb_ipk_) :: ncol, nnz + type(psb_s_csr_sparse_mat) :: tcsr1 + logical :: update_desc_c + integer(psb_ipk_) :: debug_level, debug_unit, err_act + character(len=20) :: name, ch_err + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + name='psb_s_p_csr_spspmm' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),': Start' + + update_desc_c = desc_c%is_bld() + + ! + ! This is a bit tricky. + ! DESC_A is the descriptor of (the columns of) A, and therefore + ! of the rows of B; the columns of B, in the intended usage, span + ! a different space for which we have DESC_C. + ! We are gathering the halo rows of B to multiply by A; + ! now, the columns of B would ideally be kept in + ! global numbering, so that we can call this repeatedly to accumulate + ! the product of multiple operators, and convert to local numbering + ! at the last possible moment. However, this would imply calling + ! the serial SPSPMM with a matrix B with the GLOBAL number of columns + ! and this could be very expensive in memory. The solution is to keep B + ! in local numbering, so that only columns really appearing count, but to + ! expand the descriptor when gathering the halo, because by performing + ! the products we are extending the support of the operator; hence + ! this routine is intended to be called with a temporary descriptor + ! DESC_C which is in the BUILD state, to allow for such expansion + ! across multiple products. + ! The caller will at some later point finalize the descriptor DESC_C. + ! + + ncol = desc_a%get_local_cols() + call psb_sphalo(bcsr,desc_a,tcsr1,info,& + & colcnv=.true.,rowscale=.true.,outcol_glob=.true.,col_desc=desc_c,data=data) + nnz = tcsr1%get_nzeros() + if (update_desc_c) then + call desc_c%indxmap%g2lip_ins(tcsr1%ja(1:nnz),info) + else + call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) + end if + if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr1) + if (info == psb_success_) call tcsr1%free() + if(info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') + goto 9999 + end if + call bcsr%set_ncols(desc_c%get_local_cols()) + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm 3' + if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() + call psb_spspmm(acsr,bcsr,ccsr,info) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +End Subroutine psb_s_par_csr_spspmm diff --git a/base/tools/psb_z_par_csr_spspmm.f90 b/base/tools/psb_z_par_csr_spspmm.f90 new file mode 100644 index 00000000..4ec9adab --- /dev/null +++ b/base/tools/psb_z_par_csr_spspmm.f90 @@ -0,0 +1,156 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_z_par_csr_spspmm.f90 +! +! Subroutine: psb_z_par_csr_spspmm +! Version: complex +! +! This routine computes a parallel product of two sparse matrices +! +! C = A * B +! +! where all the matrices are stored in CSR. On input and output the matrices +! are stored with column indices in local numbering, but inermediate quantities +! are in global numbering because gathering the halo of B to multiply it +! by A implies a potential enlargement of the support. +! Also, B may have a column index space different from its row index space, +! which is obviously the same as the column space of A. +! +! +! Arguments: +! acsr - type(psb_z_csr_sparse_mat), input. +! The sparse matrix structure A +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the column space of A +! bcsr - type(psb_z_csr_sparse_mat), input/output. +! The sparse matrix structure B, gets row-extended on output +! ccsr - type(psb_z_csr_sparse_mat), output +! The sparse matrix structure C +! desc_c - type(psb_desc_type), input/output. +! The communication descriptor of the column space of B +! +! info - integer, output. +! Error code. +! + +Subroutine psb_z_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + use psb_base_mod, psb_protect_name => psb_z_par_csr_spspmm + Implicit None + + type(psb_z_csr_sparse_mat),intent(in) :: acsr + type(psb_z_csr_sparse_mat),intent(inout) :: bcsr + type(psb_z_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + ! ...local scalars.... + integer(psb_ipk_) :: ictxt, np,me + integer(psb_ipk_) :: ncol, nnz + type(psb_z_csr_sparse_mat) :: tcsr1 + logical :: update_desc_c + integer(psb_ipk_) :: debug_level, debug_unit, err_act + character(len=20) :: name, ch_err + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + name='psb_z_p_csr_spspmm' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),': Start' + + update_desc_c = desc_c%is_bld() + + ! + ! This is a bit tricky. + ! DESC_A is the descriptor of (the columns of) A, and therefore + ! of the rows of B; the columns of B, in the intended usage, span + ! a different space for which we have DESC_C. + ! We are gathering the halo rows of B to multiply by A; + ! now, the columns of B would ideally be kept in + ! global numbering, so that we can call this repeatedly to accumulate + ! the product of multiple operators, and convert to local numbering + ! at the last possible moment. However, this would imply calling + ! the serial SPSPMM with a matrix B with the GLOBAL number of columns + ! and this could be very expensive in memory. The solution is to keep B + ! in local numbering, so that only columns really appearing count, but to + ! expand the descriptor when gathering the halo, because by performing + ! the products we are extending the support of the operator; hence + ! this routine is intended to be called with a temporary descriptor + ! DESC_C which is in the BUILD state, to allow for such expansion + ! across multiple products. + ! The caller will at some later point finalize the descriptor DESC_C. + ! + + ncol = desc_a%get_local_cols() + call psb_sphalo(bcsr,desc_a,tcsr1,info,& + & colcnv=.true.,rowscale=.true.,outcol_glob=.true.,col_desc=desc_c,data=data) + nnz = tcsr1%get_nzeros() + if (update_desc_c) then + call desc_c%indxmap%g2lip_ins(tcsr1%ja(1:nnz),info) + else + call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) + end if + if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr1) + if (info == psb_success_) call tcsr1%free() + if(info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') + goto 9999 + end if + call bcsr%set_ncols(desc_c%get_local_cols()) + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm 3' + if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() + call psb_spspmm(acsr,bcsr,ccsr,info) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +End Subroutine psb_z_par_csr_spspmm From 2478dc55ba6df752b96fe5179d6ae247ea9dc9d6 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 23 Aug 2019 14:30:01 +0100 Subject: [PATCH 20/21] New name for map_aggr_ parm. --- base/modules/comm/psb_base_linmap_mod.f90 | 41 +++++++++++++++++------ base/modules/desc/psb_desc_const_mod.f90 | 4 +-- base/tools/psb_c_map.f90 | 14 ++++---- base/tools/psb_d_map.f90 | 14 ++++---- base/tools/psb_s_map.f90 | 14 ++++---- base/tools/psb_z_map.f90 | 14 ++++---- 6 files changed, 61 insertions(+), 40 deletions(-) diff --git a/base/modules/comm/psb_base_linmap_mod.f90 b/base/modules/comm/psb_base_linmap_mod.f90 index 864d87e9..23f1b95f 100644 --- a/base/modules/comm/psb_base_linmap_mod.f90 +++ b/base/modules/comm/psb_base_linmap_mod.f90 @@ -47,13 +47,15 @@ module psb_base_linmap_mod type(psb_desc_type), pointer :: p_desc_U=>null(), p_desc_V=>null() type(psb_desc_type) :: desc_U, desc_V contains - procedure, pass(map) :: sizeof => base_map_sizeof - procedure, pass(map) :: is_ok => base_is_ok - procedure, pass(map) :: is_asb => base_is_asb - procedure, pass(map) :: get_kind => base_get_kind - procedure, pass(map) :: set_kind => base_set_kind - procedure, pass(map) :: free => base_free - procedure, pass(map) :: clone => base_clone + procedure, pass(map) :: sizeof => base_map_sizeof + procedure, pass(map) :: is_ok => base_is_ok + procedure, pass(map) :: is_asb => base_is_asb + procedure, pass(map) :: get_kind => base_get_kind + procedure, pass(map) :: set_kind => base_set_kind + procedure, pass(map) :: is_dec_aggr => base_is_dec_aggr + procedure, pass(map) :: is_gen_linear => base_is_gen_linear + procedure, pass(map) :: free => base_free + procedure, pass(map) :: clone => base_clone end type psb_base_linmap_type @@ -62,7 +64,8 @@ module psb_base_linmap_mod end interface private :: base_map_sizeof, base_is_ok, base_is_asb,& - & base_get_kind, base_set_kind, base_free, base_clone + & base_get_kind, base_set_kind, base_free, base_clone,& + & base_is_dec_aggr, base_is_gen_linear contains @@ -93,7 +96,7 @@ contains res = .false. select case(map%get_kind()) - case (psb_map_aggr_) + case (psb_map_dec_aggr_) if (.not.associated(map%p_desc_U)) return if (.not.associated(map%p_desc_V)) return res = map%p_desc_U%is_ok().and.map%p_desc_V%is_ok() @@ -111,7 +114,7 @@ contains res = .false. select case(map%get_kind()) - case (psb_map_aggr_) + case (psb_map_dec_aggr_) if (.not.associated(map%p_desc_U)) return if (.not.associated(map%p_desc_V)) return res = map%p_desc_U%is_asb().and.map%p_desc_V%is_asb() @@ -121,6 +124,24 @@ contains end function base_is_asb + function base_is_dec_aggr(map) result(res) + use psb_desc_mod + implicit none + class(psb_base_linmap_type), intent(in) :: map + logical :: res + + res = (map%get_kind() == psb_map_dec_aggr_) + end function base_is_dec_aggr + + function base_is_gen_linear(map) result(res) + use psb_desc_mod + implicit none + class(psb_base_linmap_type), intent(in) :: map + logical :: res + + res = (map%get_kind() == psb_map_gen_linear_) + end function base_is_gen_linear + function base_map_sizeof(map) result(val) use psb_desc_mod implicit none diff --git a/base/modules/desc/psb_desc_const_mod.f90 b/base/modules/desc/psb_desc_const_mod.f90 index 26d633fb..210aa6db 100644 --- a/base/modules/desc/psb_desc_const_mod.f90 +++ b/base/modules/desc/psb_desc_const_mod.f90 @@ -55,8 +55,8 @@ module psb_desc_const_mod ! Types of mapping between descriptors. integer(psb_ipk_), parameter :: psb_map_xhal_ = 123 integer(psb_ipk_), parameter :: psb_map_asov_ = psb_map_xhal_+1 - integer(psb_ipk_), parameter :: psb_map_aggr_ = psb_map_asov_+1 - integer(psb_ipk_), parameter :: psb_map_gen_linear_ = psb_map_aggr_+1 + integer(psb_ipk_), parameter :: psb_map_dec_aggr_ = psb_map_asov_+1 + integer(psb_ipk_), parameter :: psb_map_gen_linear_ = psb_map_dec_aggr_+1 integer(psb_ipk_), parameter :: psb_ovt_xhal_ = psb_map_xhal_, psb_ovt_asov_=psb_map_asov_ ! diff --git a/base/tools/psb_c_map.f90 b/base/tools/psb_c_map.f90 index afbf189a..4c9d70cd 100644 --- a/base/tools/psb_c_map.f90 +++ b/base/tools/psb_c_map.f90 @@ -64,7 +64,7 @@ subroutine psb_c_map_U2V_a(alpha,x,beta,y,map,info,work) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_V%get_context() nr2 = map%p_desc_V%get_global_rows() @@ -104,7 +104,7 @@ subroutine psb_c_map_U2V_a(alpha,x,beta,y,map,info,work) case default write(psb_err_unit,*) trim(name),' Invalid descriptor input', & - & map_kind, psb_map_aggr_, psb_map_gen_linear_ + & map_kind, psb_map_dec_aggr_, psb_map_gen_linear_ info = 1 return end select @@ -138,7 +138,7 @@ subroutine psb_c_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_V%get_context() call psb_info(ictxt,iam,np) @@ -205,7 +205,7 @@ subroutine psb_c_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) case default write(psb_err_unit,*) trim(name),' Invalid descriptor input', & - & map_kind, psb_map_aggr_, psb_map_gen_linear_ + & map_kind, psb_map_dec_aggr_, psb_map_gen_linear_ info = 1 return end select @@ -246,7 +246,7 @@ subroutine psb_c_map_V2U_a(alpha,x,beta,y,map,info,work) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_U%get_context() nr2 = map%p_desc_U%get_global_rows() @@ -319,7 +319,7 @@ subroutine psb_c_map_V2U_v(alpha,x,beta,y,map,info,work,vtx,vty) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_U%get_context() call psb_info(ictxt,iam,np) @@ -408,7 +408,7 @@ function psb_c_linmap(map_kind,desc_U, desc_V, mat_U2V, mat_V2U,iaggr,naggr) & info = psb_success_ select case(map_kind) - case (psb_map_aggr_) + case (psb_map_dec_aggr_) ! OK if (psb_is_ok_desc(desc_U)) then diff --git a/base/tools/psb_d_map.f90 b/base/tools/psb_d_map.f90 index 6e0430f5..1ca84f67 100644 --- a/base/tools/psb_d_map.f90 +++ b/base/tools/psb_d_map.f90 @@ -64,7 +64,7 @@ subroutine psb_d_map_U2V_a(alpha,x,beta,y,map,info,work) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_V%get_context() nr2 = map%p_desc_V%get_global_rows() @@ -104,7 +104,7 @@ subroutine psb_d_map_U2V_a(alpha,x,beta,y,map,info,work) case default write(psb_err_unit,*) trim(name),' Invalid descriptor input', & - & map_kind, psb_map_aggr_, psb_map_gen_linear_ + & map_kind, psb_map_dec_aggr_, psb_map_gen_linear_ info = 1 return end select @@ -138,7 +138,7 @@ subroutine psb_d_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_V%get_context() call psb_info(ictxt,iam,np) @@ -205,7 +205,7 @@ subroutine psb_d_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) case default write(psb_err_unit,*) trim(name),' Invalid descriptor input', & - & map_kind, psb_map_aggr_, psb_map_gen_linear_ + & map_kind, psb_map_dec_aggr_, psb_map_gen_linear_ info = 1 return end select @@ -246,7 +246,7 @@ subroutine psb_d_map_V2U_a(alpha,x,beta,y,map,info,work) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_U%get_context() nr2 = map%p_desc_U%get_global_rows() @@ -319,7 +319,7 @@ subroutine psb_d_map_V2U_v(alpha,x,beta,y,map,info,work,vtx,vty) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_U%get_context() call psb_info(ictxt,iam,np) @@ -408,7 +408,7 @@ function psb_d_linmap(map_kind,desc_U, desc_V, mat_U2V, mat_V2U,iaggr,naggr) & info = psb_success_ select case(map_kind) - case (psb_map_aggr_) + case (psb_map_dec_aggr_) ! OK if (psb_is_ok_desc(desc_U)) then diff --git a/base/tools/psb_s_map.f90 b/base/tools/psb_s_map.f90 index 323025c2..4de9ecc8 100644 --- a/base/tools/psb_s_map.f90 +++ b/base/tools/psb_s_map.f90 @@ -64,7 +64,7 @@ subroutine psb_s_map_U2V_a(alpha,x,beta,y,map,info,work) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_V%get_context() nr2 = map%p_desc_V%get_global_rows() @@ -104,7 +104,7 @@ subroutine psb_s_map_U2V_a(alpha,x,beta,y,map,info,work) case default write(psb_err_unit,*) trim(name),' Invalid descriptor input', & - & map_kind, psb_map_aggr_, psb_map_gen_linear_ + & map_kind, psb_map_dec_aggr_, psb_map_gen_linear_ info = 1 return end select @@ -138,7 +138,7 @@ subroutine psb_s_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_V%get_context() call psb_info(ictxt,iam,np) @@ -205,7 +205,7 @@ subroutine psb_s_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) case default write(psb_err_unit,*) trim(name),' Invalid descriptor input', & - & map_kind, psb_map_aggr_, psb_map_gen_linear_ + & map_kind, psb_map_dec_aggr_, psb_map_gen_linear_ info = 1 return end select @@ -246,7 +246,7 @@ subroutine psb_s_map_V2U_a(alpha,x,beta,y,map,info,work) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_U%get_context() nr2 = map%p_desc_U%get_global_rows() @@ -319,7 +319,7 @@ subroutine psb_s_map_V2U_v(alpha,x,beta,y,map,info,work,vtx,vty) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_U%get_context() call psb_info(ictxt,iam,np) @@ -408,7 +408,7 @@ function psb_s_linmap(map_kind,desc_U, desc_V, mat_U2V, mat_V2U,iaggr,naggr) & info = psb_success_ select case(map_kind) - case (psb_map_aggr_) + case (psb_map_dec_aggr_) ! OK if (psb_is_ok_desc(desc_U)) then diff --git a/base/tools/psb_z_map.f90 b/base/tools/psb_z_map.f90 index fe53ba7f..a63f38d7 100644 --- a/base/tools/psb_z_map.f90 +++ b/base/tools/psb_z_map.f90 @@ -64,7 +64,7 @@ subroutine psb_z_map_U2V_a(alpha,x,beta,y,map,info,work) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_V%get_context() nr2 = map%p_desc_V%get_global_rows() @@ -104,7 +104,7 @@ subroutine psb_z_map_U2V_a(alpha,x,beta,y,map,info,work) case default write(psb_err_unit,*) trim(name),' Invalid descriptor input', & - & map_kind, psb_map_aggr_, psb_map_gen_linear_ + & map_kind, psb_map_dec_aggr_, psb_map_gen_linear_ info = 1 return end select @@ -138,7 +138,7 @@ subroutine psb_z_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_V%get_context() call psb_info(ictxt,iam,np) @@ -205,7 +205,7 @@ subroutine psb_z_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) case default write(psb_err_unit,*) trim(name),' Invalid descriptor input', & - & map_kind, psb_map_aggr_, psb_map_gen_linear_ + & map_kind, psb_map_dec_aggr_, psb_map_gen_linear_ info = 1 return end select @@ -246,7 +246,7 @@ subroutine psb_z_map_V2U_a(alpha,x,beta,y,map,info,work) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_U%get_context() nr2 = map%p_desc_U%get_global_rows() @@ -319,7 +319,7 @@ subroutine psb_z_map_V2U_v(alpha,x,beta,y,map,info,work,vtx,vty) map_kind = map%get_kind() select case(map_kind) - case(psb_map_aggr_) + case(psb_map_dec_aggr_) ictxt = map%p_desc_U%get_context() call psb_info(ictxt,iam,np) @@ -408,7 +408,7 @@ function psb_z_linmap(map_kind,desc_U, desc_V, mat_U2V, mat_V2U,iaggr,naggr) & info = psb_success_ select case(map_kind) - case (psb_map_aggr_) + case (psb_map_dec_aggr_) ! OK if (psb_is_ok_desc(desc_U)) then From acfbc29fb72972e16020c0922df312066ed5ef89 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 23 Aug 2019 14:49:32 +0100 Subject: [PATCH 21/21] Pretty-printing --- base/modules/tools/psb_cd_tools_mod.f90 | 1 - 1 file changed, 1 deletion(-) diff --git a/base/modules/tools/psb_cd_tools_mod.f90 b/base/modules/tools/psb_cd_tools_mod.f90 index a9a3be36..317a40c0 100644 --- a/base/modules/tools/psb_cd_tools_mod.f90 +++ b/base/modules/tools/psb_cd_tools_mod.f90 @@ -179,7 +179,6 @@ module psb_cd_tools_mod integer(psb_ipk_), intent(out) :: info type(psb_desc_type), intent(out) :: desc optional :: mg, ng, parts, vg, vl, flag, nl, repl, globalcheck, lidx, usehash - end subroutine psb_cdall end interface