From 6987582c30432afc65f6b6988e650349a9400a07 Mon Sep 17 00:00:00 2001 From: gabrielequatrana Date: Sat, 9 Mar 2024 19:22:40 +0100 Subject: [PATCH] Done SERIAL --- .gitignore | 3 + base/comm/psb_dscatter.F90 | 68 ++++ base/modules/comm/psb_d_comm_mod.f90 | 10 + base/modules/psblas/psb_d_psblas_mod.F90 | 89 ++++- base/modules/serial/psb_d_base_mat_mod.F90 | 92 +++-- base/modules/serial/psb_d_base_vect_mod.F90 | 202 +++++++---- base/modules/serial/psb_d_mat_mod.F90 | 11 + base/modules/serial/psb_d_vect_mod.F90 | 337 +++++++++-------- base/modules/tools/psb_d_tools_mod.F90 | 26 ++ base/psblas/Makefile | 2 +- base/psblas/psb_damax.f90 | 98 +++++ base/psblas/psb_daxpby.f90 | 185 ++++++++++ base/psblas/psb_ddot.f90 | 280 +++++++++++++- base/psblas/psb_dnrm2.f90 | 106 ++++++ base/psblas/psb_dqrfact.f90 | 76 ++++ base/psblas/psb_dspmm.f90 | 228 ++++++++++++ base/serial/impl/psb_d_base_mat_impl.F90 | 20 + base/serial/impl/psb_d_mat_impl.F90 | 42 +++ base/tools/psb_dallc.f90 | 141 +++++++ base/tools/psb_dasb.f90 | 103 +++++- base/tools/psb_dfree.f90 | 52 +++ krylov/Makefile | 2 +- krylov/psb_dbgmres.f90 | 383 ++++++++++++++++++++ krylov/psb_dkrylov.f90 | 128 +++++++ krylov/psb_krylov_mod.f90 | 21 ++ prec/psb_d_base_prec_mod.f90 | 44 ++- prec/psb_d_prec_type.f90 | 43 ++- test/block_krylov/Makefile | 40 ++ test/block_krylov/getp.f90 | 169 +++++++++ test/block_krylov/psb_dbf_sample.f90 | 265 ++++++++++++++ 30 files changed, 3012 insertions(+), 254 deletions(-) create mode 100644 base/psblas/psb_dqrfact.f90 create mode 100644 krylov/psb_dbgmres.f90 create mode 100644 test/block_krylov/Makefile create mode 100644 test/block_krylov/getp.f90 create mode 100644 test/block_krylov/psb_dbf_sample.f90 diff --git a/.gitignore b/.gitignore index 7227f784..82b9c1a0 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,6 @@ autom4te.cache # the executable from tests runs +#IDE +.vscode/ + diff --git a/base/comm/psb_dscatter.F90 b/base/comm/psb_dscatter.F90 index 3465333b..a6d09a67 100644 --- a/base/comm/psb_dscatter.F90 +++ b/base/comm/psb_dscatter.F90 @@ -98,3 +98,71 @@ subroutine psb_dscatter_vect(globx, locx, desc_a, info, root, mold) return end subroutine psb_dscatter_vect + +! Subroutine: psb_dscatter_multivect +! This subroutine scatters a global vector locally owned by one process +! into pieces that are local to all the processes. +! +! Arguments: +! globx - real,dimension(:,:) The global matrix to scatter. +! locx - type(psb_d_multivect_type) The local piece of the distributed matrix. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Error code. +! iroot - integer(optional). The process that owns the global matrix. +! If -1 all the processes have a copy. +! Default -1 +subroutine psb_dscatter_multivect(globx, locx, desc_a, info, root, mold) + use psb_base_mod, psb_protect_name => psb_dscatter_multivect + implicit none + type(psb_d_multivect_type), intent(inout) :: locx + real(psb_dpk_), intent(in) :: globx(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: root + class(psb_d_base_multivect_type), intent(in), optional :: mold + + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_mpk_) :: np, me, icomm, myrank, rootrank + integer(psb_ipk_) :: ierr(5), err_act, m, n, i, j, idx, nrow, iglobx, jglobx,& + & ilocx, jlocx, lda_locx, lda_globx, k, pos, ilx, jlx + real(psb_dpk_), allocatable :: vlocx(:,:) + character(len=20) :: name, ch_err + integer(psb_ipk_) :: debug_level, debug_unit + + name='psb_scatter_multivect' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + ctxt=desc_a%get_context() + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + + ! check on blacs grid + call psb_info(ctxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (info == psb_success_) call psb_scatter(globx, vlocx, desc_a, info, root=root) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_scatterv') + goto 9999 + endif + + call locx%bld(vlocx,mold=mold) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_dscatter_multivect diff --git a/base/modules/comm/psb_d_comm_mod.f90 b/base/modules/comm/psb_d_comm_mod.f90 index 013c76e2..45bd2367 100644 --- a/base/modules/comm/psb_d_comm_mod.f90 +++ b/base/modules/comm/psb_d_comm_mod.f90 @@ -92,6 +92,16 @@ module psb_d_comm_mod integer(psb_ipk_), intent(in), optional :: root class(psb_d_base_vect_type), intent(in), optional :: mold end subroutine psb_dscatter_vect + subroutine psb_dscatter_multivect(globx, locx, desc_a, info, root, mold) + import + implicit none + type(psb_d_multivect_type), intent(inout) :: locx + real(psb_dpk_), intent(in) :: globx(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: root + class(psb_d_base_multivect_type), intent(in), optional :: mold + end subroutine psb_dscatter_multivect end interface psb_scatter interface psb_gather diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index e4988387..6ea1d499 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -32,6 +32,7 @@ module psb_d_psblas_mod use psb_desc_mod, only : psb_desc_type, psb_dpk_, psb_ipk_, psb_lpk_ use psb_d_vect_mod, only : psb_d_vect_type + use psb_d_multivect_mod, only: psb_d_multivect_type use psb_d_mat_mod, only : psb_dspmat_type interface psb_gedot @@ -44,6 +45,27 @@ module psb_d_psblas_mod integer(psb_ipk_), intent(out) :: info logical, intent(in), optional :: global end function psb_ddot_vect + function psb_ddot_multivect(x, y, desc_a,info,t,global) result(res) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_multivect_type, psb_dspmat_type + real(psb_dpk_), allocatable :: res(:,:) + type(psb_d_multivect_type), intent(inout) :: x, y + logical, optional, intent(in) :: t + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: global + end function psb_ddot_multivect + function psb_ddot_multivect_1(x, y, desc_a,info,t,global) result(res) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_multivect_type, psb_dspmat_type + real(psb_dpk_), allocatable :: res(:,:) + type(psb_d_multivect_type), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:,:) + logical, optional, intent(in) :: t + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: global + end function psb_ddot_multivect_1 function psb_ddotv(x, y, desc_a,info,global) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_d_vect_type, psb_dspmat_type @@ -98,6 +120,24 @@ module psb_d_psblas_mod type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_daxpby_vect + subroutine psb_daxpby_multivect(alpha, x, beta, y, desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_multivect_type, psb_dspmat_type + type(psb_d_multivect_type), intent (inout) :: x + type(psb_d_multivect_type), intent (inout) :: y + real(psb_dpk_), intent (in) :: alpha, beta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_daxpby_multivect + subroutine psb_daxpby_multivect_1(alpha, x, beta, y, desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_multivect_type, psb_dspmat_type + real(psb_dpk_), intent(in) :: x(:,:) + type(psb_d_multivect_type), intent (inout) :: y + real(psb_dpk_), intent (in) :: alpha, beta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_daxpby_multivect_1 subroutine psb_daxpby_vect_out(alpha, x, beta, y,& & z, desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & @@ -143,6 +183,17 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface + interface psb_geqrfact + function psb_dqrfact(x, desc_a, info) result(res) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_multivect_type, psb_dspmat_type + real(psb_dpk_), allocatable :: res(:,:) + type(psb_d_multivect_type), intent (inout) :: x + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end function psb_dqrfact + end interface + interface psb_geamax function psb_damax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_dpk_, psb_ipk_, & @@ -172,14 +223,23 @@ module psb_d_psblas_mod integer(psb_ipk_), intent(out) :: info logical, intent(in), optional :: global end function psb_damax_vect + function psb_damax_multivect(x, desc_a, info, global) result(res) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_multivect_type, psb_dspmat_type + real(psb_dpk_) :: res + type(psb_d_multivect_type), intent (inout) :: x + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: global + end function psb_damax_multivect end interface #if ! defined(HAVE_BUGGY_GENERICS) interface psb_genrmi - procedure psb_damax, psb_damaxv, psb_damax_vect + procedure psb_damax, psb_damaxv, psb_damax_vect, psb_damax_multivect end interface interface psb_normi - procedure psb_damax, psb_damaxv, psb_damax_vect + procedure psb_damax, psb_damaxv, psb_damax_vect, psb_damax_multivect end interface #endif @@ -330,11 +390,20 @@ module psb_d_psblas_mod logical, intent(in), optional :: global type(psb_d_vect_type), intent (inout), optional :: aux end function psb_dnrm2_weightmask_vect + function psb_dnrm2_multivect(x, desc_a, info,global) result(res) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_multivect_type, psb_dspmat_type + real(psb_dpk_) :: res + type(psb_d_multivect_type), intent (inout) :: x + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: global + end function psb_dnrm2_multivect end interface #if ! defined(HAVE_BUGGY_GENERICS) interface psb_norm2 - procedure psb_dnrm2, psb_dnrm2v, psb_dnrm2_vect, psb_dnrm2_weight_vect, psb_dnrm2_weightmask_vect + procedure psb_dnrm2, psb_dnrm2v, psb_dnrm2_vect, psb_dnrm2_weight_vect, psb_dnrm2_weightmask_vect, psb_dnrm2_multivect end interface #endif @@ -431,6 +500,20 @@ module psb_d_psblas_mod logical, optional, intent(in) :: doswap integer(psb_ipk_), intent(out) :: info end subroutine psb_dspmv_vect + subroutine psb_dspmv_multivect(alpha, a, x, beta, y,& + & desc_a, info, trans, work,doswap) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_multivect_type, psb_dspmat_type + type(psb_dspmat_type), intent(in) :: a + type(psb_d_multivect_type), intent(inout) :: x + type(psb_d_multivect_type), intent(inout) :: y + real(psb_dpk_), intent(in) :: alpha, beta + type(psb_desc_type), intent(in) :: desc_a + character, optional, intent(in) :: trans + real(psb_dpk_), optional, intent(inout),target :: work(:) + logical, optional, intent(in) :: doswap + integer(psb_ipk_), intent(out) :: info + end subroutine psb_dspmv_multivect end interface interface psb_spsm diff --git a/base/modules/serial/psb_d_base_mat_mod.F90 b/base/modules/serial/psb_d_base_mat_mod.F90 index 5f4c76df..1e06d0c4 100644 --- a/base/modules/serial/psb_d_base_mat_mod.F90 +++ b/base/modules/serial/psb_d_base_mat_mod.F90 @@ -35,6 +35,7 @@ module psb_d_base_mat_mod use psb_base_mat_mod use psb_d_base_vect_mod + use psb_d_base_multivect_mod !> \namespace psb_base_mod \class psb_d_base_sparse_mat @@ -103,33 +104,34 @@ module psb_d_base_mat_mod ! ! Computational methods: defined here but not implemented. ! - procedure, pass(a) :: vect_mv => psb_d_base_vect_mv - procedure, pass(a) :: csmv => psb_d_base_csmv - procedure, pass(a) :: csmm => psb_d_base_csmm - generic, public :: spmm => csmm, csmv, vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_base_inner_vect_sv - procedure, pass(a) :: inner_cssv => psb_d_base_inner_cssv - procedure, pass(a) :: inner_cssm => psb_d_base_inner_cssm - generic, public :: inner_spsm => inner_cssm, inner_cssv, in_vect_sv - procedure, pass(a) :: vect_cssv => psb_d_base_vect_cssv - procedure, pass(a) :: cssv => psb_d_base_cssv - procedure, pass(a) :: cssm => psb_d_base_cssm - generic, public :: spsm => cssm, cssv, vect_cssv - procedure, pass(a) :: scals => psb_d_base_scals - procedure, pass(a) :: scalv => psb_d_base_scal - generic, public :: scal => scals, scalv - procedure, pass(a) :: maxval => psb_d_base_maxval - procedure, pass(a) :: spnmi => psb_d_base_csnmi - procedure, pass(a) :: spnm1 => psb_d_base_csnm1 - procedure, pass(a) :: rowsum => psb_d_base_rowsum - procedure, pass(a) :: arwsum => psb_d_base_arwsum - procedure, pass(a) :: colsum => psb_d_base_colsum - procedure, pass(a) :: aclsum => psb_d_base_aclsum - procedure, pass(a) :: scalpid => psb_d_base_scalplusidentity - procedure, pass(a) :: spaxpby => psb_d_base_spaxpby - procedure, pass(a) :: cmpval => psb_d_base_cmpval - procedure, pass(a) :: cmpmat => psb_d_base_cmpmat - generic, public :: spcmp => cmpval, cmpmat + procedure, pass(a) :: vect_mv => psb_d_base_vect_mv + procedure, pass(a) :: multivect_mv => psb_d_base_multivect_mv + procedure, pass(a) :: csmv => psb_d_base_csmv + procedure, pass(a) :: csmm => psb_d_base_csmm + generic, public :: spmm => csmm, csmv, vect_mv, multivect_mv + procedure, pass(a) :: in_vect_sv => psb_d_base_inner_vect_sv + procedure, pass(a) :: inner_cssv => psb_d_base_inner_cssv + procedure, pass(a) :: inner_cssm => psb_d_base_inner_cssm + generic, public :: inner_spsm => inner_cssm, inner_cssv, in_vect_sv + procedure, pass(a) :: vect_cssv => psb_d_base_vect_cssv + procedure, pass(a) :: cssv => psb_d_base_cssv + procedure, pass(a) :: cssm => psb_d_base_cssm + generic, public :: spsm => cssm, cssv, vect_cssv + procedure, pass(a) :: scals => psb_d_base_scals + procedure, pass(a) :: scalv => psb_d_base_scal + generic, public :: scal => scals, scalv + procedure, pass(a) :: maxval => psb_d_base_maxval + procedure, pass(a) :: spnmi => psb_d_base_csnmi + procedure, pass(a) :: spnm1 => psb_d_base_csnm1 + procedure, pass(a) :: rowsum => psb_d_base_rowsum + procedure, pass(a) :: arwsum => psb_d_base_arwsum + procedure, pass(a) :: colsum => psb_d_base_colsum + procedure, pass(a) :: aclsum => psb_d_base_aclsum + procedure, pass(a) :: scalpid => psb_d_base_scalplusidentity + procedure, pass(a) :: spaxpby => psb_d_base_spaxpby + procedure, pass(a) :: cmpval => psb_d_base_cmpval + procedure, pass(a) :: cmpmat => psb_d_base_cmpmat + generic, public :: spcmp => cmpval, cmpmat end type psb_d_base_sparse_mat private :: d_base_mat_sync, d_base_mat_is_host, d_base_mat_is_dev, & @@ -1239,6 +1241,42 @@ module psb_d_base_mat_mod end subroutine psb_d_base_vect_mv end interface + !> Function multivect_mv: + !! \memberof psb_d_base_sparse_mat + !! \brief Product by an encapsulated array type(psb_d_multivect_type) + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! Usually the unwrapping of the encapsulated multivector is done + !! here, so that all the derived classes need only the + !! versions with the standard arrays. + !! Must be overridden explicitly in case of non standard memory + !! management; an example would be external memory allocation + !! in attached processors such as GPUs. + !! + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x the input X + !! \param beta Scaling factor for y + !! \param y the input/output Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_d_base_multivect_mv(alpha,a,x,beta,y,info,trans) + import + class(psb_d_base_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_multivect_type), intent(inout) :: x + class(psb_d_base_multivect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_base_multivect_mv + end interface + ! !> Function cssm: !! \memberof psb_d_base_sparse_mat diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 87f5b0e4..707ad9ba 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -2230,9 +2230,9 @@ module psb_d_base_multivect_mod procedure, pass(x) :: set_host => d_base_mlv_set_host procedure, pass(x) :: set_dev => d_base_mlv_set_dev procedure, pass(x) :: set_sync => d_base_mlv_set_sync - ! ! Basic info + ! procedure, pass(x) :: get_nrows => d_base_mlv_get_nrows procedure, pass(x) :: get_ncols => d_base_mlv_get_ncols procedure, pass(x) :: sizeof => d_base_mlv_sizeof @@ -2245,7 +2245,6 @@ module psb_d_base_multivect_mod procedure, pass(x) :: set_scal => d_base_mlv_set_scal procedure, pass(x) :: set_vect => d_base_mlv_set_vect generic, public :: set => set_vect, set_scal - ! ! Dot product and AXPBY ! @@ -2279,7 +2278,7 @@ module psb_d_base_multivect_mod procedure, pass(x) :: absval1 => d_base_mlv_absval1 procedure, pass(x) :: absval2 => d_base_mlv_absval2 generic, public :: absval => absval1, absval2 - + procedure, pass(x) :: qr_fact => d_base_mlv_qr_fact ! ! These are for handling gather/scatter in new ! comm internals implementation. @@ -2291,7 +2290,6 @@ module psb_d_base_multivect_mod procedure, pass(x) :: free_buffer => d_base_mlv_free_buffer procedure, pass(x) :: new_comid => d_base_mlv_new_comid procedure, pass(x) :: free_comid => d_base_mlv_free_comid - ! ! Gather/scatter. These are needed for MPI interfacing. ! May have to be reworked. @@ -2807,22 +2805,29 @@ contains ! Dot products ! ! - !> Function base_mlv_dot_v + !> Function base_mlv_dot_v !! \memberof psb_d_base_multivect_type - !! \brief Dot product by another base_mlv_vector - !! \param n Number of entries to be considered - !! \param y The other (base_mlv_vect) to be multiplied by + !! \brief Dot product by another base_mlv_vector + !! \param y The other (base_mlv_vect) to be multiplied by + !! \param res Result matrix + !! \param t If true, x is transposed !! - function d_base_mlv_dot_v(n,x,y) result(res) + subroutine d_base_mlv_dot_v(x,y,res,t) implicit none class(psb_d_base_multivect_type), intent(inout) :: x, y - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_), allocatable :: res(:) - real(psb_dpk_), external :: ddot - integer(psb_ipk_) :: j,nc + real(psb_dpk_), intent(inout) :: res(:,:) + logical, optional, intent(in) :: t + logical :: t_ + external :: dgemm + integer(psb_ipk_) :: x_m, x_n, y_m, y_n + + if (present(t)) then + t_ = t + else + t_ = .false. + end if if (x%is_dev()) call x%sync() - res = dzero ! ! Note: this is the base implementation. ! When we get here, we are sure that X is of @@ -2830,47 +2835,65 @@ contains ! If Y is not, throw the burden on it, implicitly ! calling dot_a ! + x_m = x%get_nrows() + x_n = x%get_ncols() + y_m = y%get_nrows() + y_n = y%get_ncols() select type(yy => y) type is (psb_d_base_multivect_type) if (y%is_dev()) call y%sync() - nc = min(psb_size(x%v,2_psb_ipk_),psb_size(y%v,2_psb_ipk_)) - allocate(res(nc)) - do j=1,nc - res(j) = ddot(n,x%v(:,j),1,y%v(:,j),1) - end do - class default - res = y%dot(n,x%v) + if (t_) then + call dgemm('T','N',x_n,y_n,x_n,done,x%v,x_n,y%v,y_m,dzero,res,x_n) + else + call dgemm('N','N',x_m,y_n,x_n,done,x%v,x_m,y%v,y_m,dzero,res,x_m) + end if + class default + call x%dot(y%v,res,t) end select - end function d_base_mlv_dot_v + end subroutine d_base_mlv_dot_v ! ! Base workhorse is good old BLAS1 ! ! - !> Function base_mlv_dot_a - !! \memberof psb_d_base_multivect_type - !! \brief Dot product by a normal array - !! \param n Number of entries to be considered - !! \param y(:) The array to be multiplied by + !> Function base_mlv_dot_a + !! \memberof psb_d_base_multivect_type + !! \brief Dot product by a normal array + !! \param n Number of entries to be considered + !! \param y(:,:) The array to be multiplied by + !! \param res Result matrix + !! \param t If true, x is transposed !! - function d_base_mlv_dot_a(n,x,y) result(res) - implicit none + subroutine d_base_mlv_dot_a(x,y,res,t) class(psb_d_base_multivect_type), intent(inout) :: x - real(psb_dpk_), intent(in) :: y(:,:) - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_), allocatable :: res(:) - real(psb_dpk_), external :: ddot - integer(psb_ipk_) :: j,nc + real(psb_dpk_), intent(in) :: y(:,:) + real(psb_dpk_), intent(inout) :: res(:,:) + logical, optional, intent(in) :: t + logical :: t_ + external :: dgemm + integer(psb_ipk_) :: x_m, x_n, y_m, y_n + + if (present(t)) then + t_ = t + else + t_ = .false. + end if if (x%is_dev()) call x%sync() - nc = min(psb_size(x%v,2_psb_ipk_),size(y,2_psb_ipk_)) - allocate(res(nc)) - do j=1,nc - res(j) = ddot(n,x%v(:,j),1,y(:,j),1) - end do - end function d_base_mlv_dot_a + x_m = x%get_nrows() + x_n = x%get_ncols() + y_m = size(y,dim=1) + y_n = size(y,dim=2) + + if (t_) then + call dgemm('T','N',x_n,y_n,x_n,done,x%v,x_n,y,y_m,dzero,res,x_n) + else + call dgemm('N','N',x_m,y_n,x_n,done,x%v,x_m,y,y_m,dzero,res,x_m) + end if + + end subroutine d_base_mlv_dot_a ! ! AXPBY is invoked via Y, hence the structure below. @@ -2920,7 +2943,7 @@ contains !! \brief AXPBY by a normal array y=alpha*x+beta*y !! \param m Number of entries to be considered !! \param alpha scalar alpha - !! \param x(:) The array to be added + !! \param x(:,:) The array to be added !! \param beta scalar alpha !! \param info return code !! @@ -3192,21 +3215,18 @@ contains !> Function base_mlv_nrm2 !! \memberof psb_d_base_multivect_type !! \brief 2-norm |x(1:n)|_2 - !! \param n how many entries to consider - function d_base_mlv_nrm2(n,x) result(res) + !! \param nc how many entries to consider + function d_base_mlv_nrm2(nc,x) result(res) implicit none class(psb_d_base_multivect_type), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_), allocatable :: res(:) - real(psb_dpk_), external :: dnrm2 - integer(psb_ipk_) :: j, nc + integer(psb_ipk_), intent(in) :: nc + real(psb_dpk_), allocatable :: res + real(psb_dpk_), external :: dnrm2 + integer(psb_ipk_) :: j, nr if (x%is_dev()) call x%sync() - nc = psb_size(x%v,2_psb_ipk_) - allocate(res(nc)) - do j=1,nc - res(j) = dnrm2(n,x%v(:,j),1) - end do + nr = x%get_nrows() + res = dnrm2(nc*nr,x%v,1) end function d_base_mlv_nrm2 @@ -3214,19 +3234,19 @@ contains !> Function base_mlv_amax !! \memberof psb_d_base_multivect_type !! \brief infinity-norm |x(1:n)|_\infty - !! \param n how many entries to consider - function d_base_mlv_amax(n,x) result(res) + !! \param nc how many entries to consider + function d_base_mlv_amax(nc,x) result(res) implicit none class(psb_d_base_multivect_type), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_), allocatable :: res(:) - integer(psb_ipk_) :: j, nc + integer(psb_ipk_), intent(in) :: nc + real(psb_dpk_) :: res + integer(psb_ipk_) :: i, nr if (x%is_dev()) call x%sync() - nc = psb_size(x%v,2_psb_ipk_) - allocate(res(nc)) - do j=1,nc - res(j) = maxval(abs(x%v(1:n,j))) + nr = x%get_nrows() + res = 0 + do i=1,nr + res = max(res,sum(abs(x%v(i,1:nc)))) end do end function d_base_mlv_amax @@ -3235,19 +3255,19 @@ contains !> Function base_mlv_asum !! \memberof psb_d_base_multivect_type !! \brief 1-norm |x(1:n)|_1 - !! \param n how many entries to consider - function d_base_mlv_asum(n,x) result(res) + !! \param nc how many entries to consider + function d_base_mlv_asum(nc,x) result(res) implicit none class(psb_d_base_multivect_type), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_), allocatable :: res(:) - integer(psb_ipk_) :: j, nc + integer(psb_ipk_), intent(in) :: nc + real(psb_dpk_), allocatable :: res + integer(psb_ipk_) :: j, nr if (x%is_dev()) call x%sync() - nc = psb_size(x%v,2_psb_ipk_) - allocate(res(nc)) + nr = x%get_nrows() + res = 0 do j=1,nc - res(j) = sum(abs(x%v(1:n,j))) + res = max(res,sum(abs(x%v(1:nr,j)))) end do end function d_base_mlv_asum @@ -3285,6 +3305,50 @@ contains end subroutine d_base_mlv_absval2 + ! + ! QR factorization + ! + !> Function base_mlv_qr_fact + !! \memberof psb_d_base_multivect_type + !! \param res Result array + !! \param info Return code + !! \brief QR factorization + ! + subroutine d_base_mlv_qr_fact(x, res, info) + implicit none + class(psb_d_base_multivect_type), intent(inout) :: x + real(psb_dpk_), intent(inout) :: res(:,:) + real(psb_dpk_), allocatable :: tau(:), work(:) + integer(psb_ipk_) :: i, j, m, n, lda, lwork + integer(psb_ipk_), intent(out) :: info + external :: dgeqrf, dorgqr + + if (x%is_dev()) call x%sync() + + ! Initialize params + m = x%get_nrows() + n = x%get_ncols() + lda = m + lwork = n + allocate(tau(n), work(lwork)) + + ! Perform QR factorization + call dgeqrf(m, n, x%v, lda, tau, work, lwork, info) + + ! Set R + res = x%v(1:n,1:n) + do i = 2, n + do j = 1, i-1 + res(i,j) = dzero + enddo + enddo + + ! Generate Q matrix + call dorgqr(m, n, n, x%v, lda, tau, work, lwork, info) + + deallocate(tau, work) + + end subroutine d_base_mlv_qr_fact function d_base_mlv_use_buffer() result(res) implicit none diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 8f967ce1..9817bda3 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -230,6 +230,7 @@ module psb_d_mat_mod procedure, pass(a) :: colsum => psb_d_colsum procedure, pass(a) :: aclsum => psb_d_aclsum procedure, pass(a) :: csmv_v => psb_d_csmv_vect + procedure, pass(a) :: csmv_mv => psb_d_csmv_multivect procedure, pass(a) :: csmv => psb_d_csmv procedure, pass(a) :: csmm => psb_d_csmm generic, public :: spmm => csmm, csmv, csmv_v @@ -1069,6 +1070,16 @@ module psb_d_mat_mod integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans end subroutine psb_d_csmv_vect + subroutine psb_d_csmv_multivect(alpha,a,x,beta,y,info,trans) + use psb_d_multivect_mod, only : psb_d_multivect_type + import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_ + class(psb_dspmat_type), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + type(psb_d_multivect_type), intent(inout) :: x + type(psb_d_multivect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csmv_multivect end interface interface psb_cssm diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index 88fa3262..4647c2e9 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -1345,44 +1345,60 @@ module psb_d_multivect_mod integer(psb_ipk_) :: dupl = psb_dupl_add_ real(psb_dpk_), allocatable :: rmtv(:,:) contains - procedure, pass(x) :: get_nrows => d_vect_get_nrows - procedure, pass(x) :: get_ncols => d_vect_get_ncols - procedure, pass(x) :: sizeof => d_vect_sizeof - procedure, pass(x) :: get_fmt => d_vect_get_fmt - procedure, pass(x) :: is_remote_build => d_mvect_is_remote_build - procedure, pass(x) :: set_remote_build => d_mvect_set_remote_build - procedure, pass(x) :: get_dupl => d_mvect_get_dupl - procedure, pass(x) :: set_dupl => d_mvect_set_dupl - + ! + ! Constructors/allocators + ! + procedure, pass(x) :: bld_x => d_vect_bld_x + procedure, pass(x) :: bld_n => d_vect_bld_n + generic, public :: bld => bld_x, bld_n procedure, pass(x) :: all => d_vect_all - procedure, pass(x) :: reall => d_vect_reall + ! + ! Insert/set. Assembly and free. + ! Assembly does almost nothing here, but is important + ! in derived classes. + ! + procedure, pass(x) :: ins => d_vect_ins procedure, pass(x) :: zero => d_vect_zero procedure, pass(x) :: asb => d_vect_asb - procedure, pass(x) :: sync => d_vect_sync procedure, pass(x) :: free => d_vect_free - procedure, pass(x) :: ins => d_vect_ins - procedure, pass(x) :: bld_x => d_vect_bld_x - procedure, pass(x) :: bld_n => d_vect_bld_n - generic, public :: bld => bld_x, bld_n + ! + ! Sync: centerpiece of handling of external storage. + ! Any derived class having extra storage upon sync + ! will guarantee that both fortran/host side and + ! external side contain the same data. The base + ! version is only a placeholder. + ! + procedure, pass(x) :: sync => d_vect_sync + ! + ! Basic info + ! + procedure, pass(x) :: get_nrows => d_vect_get_nrows + procedure, pass(x) :: get_ncols => d_vect_get_ncols + procedure, pass(x) :: sizeof => d_vect_sizeof + procedure, pass(x) :: get_fmt => d_vect_get_fmt + procedure, pass(x) :: get_nrmv => d_vect_get_nrmv + procedure, pass(x) :: set_nrmv => d_vect_set_nrmv + ! + ! Set/get data from/to an external array; also + ! overload assignment. + ! procedure, pass(x) :: get_vect => d_vect_get_vect - procedure, pass(x) :: cnv => d_vect_cnv procedure, pass(x) :: set_scal => d_vect_set_scal procedure, pass(x) :: set_vect => d_vect_set_vect generic, public :: set => set_vect, set_scal - procedure, pass(x) :: clone => d_vect_clone - procedure, pass(x) :: gthab => d_vect_gthab - procedure, pass(x) :: gthzv => d_vect_gthzv - procedure, pass(x) :: gthzv_x => d_vect_gthzv_x - generic, public :: gth => gthab, gthzv - procedure, pass(y) :: sctb => d_vect_sctb - procedure, pass(y) :: sctb_x => d_vect_sctb_x - generic, public :: sct => sctb, sctb_x -!!$ procedure, pass(x) :: dot_v => d_vect_dot_v -!!$ procedure, pass(x) :: dot_a => d_vect_dot_a -!!$ generic, public :: dot => dot_v, dot_a -!!$ procedure, pass(y) :: axpby_v => d_vect_axpby_v -!!$ procedure, pass(y) :: axpby_a => d_vect_axpby_a -!!$ generic, public :: axpby => axpby_v, axpby_a + ! + ! Dot product and AXPBY + ! + procedure, pass(x) :: dot_v => d_vect_dot_v + procedure, pass(x) :: dot_a => d_vect_dot_a + generic, public :: dot => dot_v, dot_a + procedure, pass(y) :: axpby_v => d_vect_axpby_v + procedure, pass(y) :: axpby_a => d_vect_axpby_a + generic, public :: axpby => axpby_v, axpby_a + ! + ! MultiVector by vector/multivector multiplication. Need all variants + ! to handle multiple requirements from preconditioners + ! !!$ procedure, pass(y) :: mlt_v => d_vect_mlt_v !!$ procedure, pass(y) :: mlt_a => d_vect_mlt_a !!$ procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2 @@ -1391,10 +1407,35 @@ module psb_d_multivect_mod !!$ procedure, pass(z) :: mlt_av => d_vect_mlt_av !!$ generic, public :: mlt => mlt_v, mlt_a, mlt_a_2,& !!$ & mlt_v_2, mlt_av, mlt_va + ! + ! Scaling and norms + ! !!$ procedure, pass(x) :: scal => d_vect_scal -!!$ procedure, pass(x) :: nrm2 => d_vect_nrm2 -!!$ procedure, pass(x) :: amax => d_vect_amax -!!$ procedure, pass(x) :: asum => d_vect_asum + procedure, pass(x) :: nrm2 => d_vect_nrm2 + procedure, pass(x) :: amax => d_vect_amax + procedure, pass(x) :: asum => d_vect_asum + procedure, pass(x) :: qr_fact => d_vect_qr_fact + ! + ! Gather/scatter. These are needed for MPI interfacing. + ! May have to be reworked. + ! + procedure, pass(x) :: gthab => d_vect_gthab + procedure, pass(x) :: gthzv => d_vect_gthzv + procedure, pass(x) :: gthzv_x => d_vect_gthzv_x + generic, public :: gth => gthab, gthzv + procedure, pass(y) :: sctb => d_vect_sctb + procedure, pass(y) :: sctb_x => d_vect_sctb_x + generic, public :: sct => sctb, sctb_x + ! + ! Other procedures + ! + procedure, pass(x) :: is_remote_build => d_mvect_is_remote_build + procedure, pass(x) :: set_remote_build => d_mvect_set_remote_build + procedure, pass(x) :: get_dupl => d_mvect_get_dupl + procedure, pass(x) :: set_dupl => d_mvect_set_dupl + procedure, pass(x) :: reall => d_vect_reall + procedure, pass(x) :: cnv => d_vect_cnv + procedure, pass(x) :: clone => d_vect_clone end type psb_d_multivect_type public :: psb_d_multivect, psb_d_multivect_type,& @@ -1417,9 +1458,7 @@ module psb_d_multivect_mod module procedure psb_d_get_multivect_default end interface psb_get_multivect_default - contains - function d_mvect_get_dupl(x) result(res) implicit none @@ -1440,7 +1479,6 @@ contains end if end subroutine d_mvect_set_dupl - function d_mvect_is_remote_build(x) result(res) implicit none class(psb_d_multivect_type), intent(in) :: x @@ -1458,8 +1496,7 @@ contains else x%remote_build = psb_matbld_remote_ end if - end subroutine d_mvect_set_remote_build - + end subroutine d_mvect_set_remote_build subroutine psb_d_set_multivect_default(v) implicit none @@ -1481,7 +1518,6 @@ contains end function psb_d_get_multivect_default - function psb_d_get_base_multivect_default() result(res) implicit none class(psb_d_base_multivect_type), pointer :: res @@ -1494,7 +1530,6 @@ contains end function psb_d_get_base_multivect_default - subroutine d_vect_clone(x,y,info) implicit none class(psb_d_multivect_type), intent(inout) :: x @@ -1526,7 +1561,6 @@ contains end subroutine d_vect_bld_x - subroutine d_vect_bld_n(x,m,n,mold) integer(psb_ipk_), intent(in) :: m,n class(psb_d_multivect_type), intent(out) :: x @@ -1571,7 +1605,6 @@ contains end subroutine d_vect_set_vect - function constructor(x) result(this) real(psb_dpk_) :: x(:,:) type(psb_d_multivect_type) :: this @@ -1582,7 +1615,6 @@ contains end function constructor - function size_const(m,n) result(this) integer(psb_ipk_), intent(in) :: m,n type(psb_d_multivect_type) :: this @@ -1625,6 +1657,20 @@ contains if (allocated(x%v)) res = x%v%get_fmt() end function d_vect_get_fmt + function d_vect_get_nrmv(x) result(res) + implicit none + class(psb_d_multivect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = x%nrmv + end function d_vect_get_nrmv + + subroutine d_vect_set_nrmv(x,val) + implicit none + class(psb_d_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + x%nrmv = val + end subroutine d_vect_set_nrmv + subroutine d_vect_all(m,n, x, info, mold) implicit none @@ -1785,7 +1831,6 @@ contains end subroutine d_vect_ins - subroutine d_vect_cnv(x,mold) class(psb_d_multivect_type), intent(inout) :: x class(psb_d_base_multivect_type), intent(in), optional :: mold @@ -1805,64 +1850,60 @@ contains call move_alloc(tmp,x%v) end subroutine d_vect_cnv + subroutine d_vect_dot_v(x,y,res,t) + implicit none + class(psb_d_multivect_type), intent(inout) :: x, y + real(psb_dpk_), intent(inout) :: res(:,:) + logical, optional, intent(in) :: t + + if (allocated(x%v).and.allocated(y%v)) & + & call x%v%dot(y%v,res,t) + + end subroutine d_vect_dot_v + + subroutine d_vect_dot_a(x,y,res,t) + implicit none + class(psb_d_multivect_type), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:,:) + real(psb_dpk_), intent(inout) :: res(:,:) + logical, optional, intent(in) :: t + + if (allocated(x%v)) & + & call x%v%dot(y,res,t) + + end subroutine d_vect_dot_a + + subroutine d_vect_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_multivect_type), intent(inout) :: x + class(psb_d_multivect_type), intent(inout) :: y + real(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + + if (allocated(x%v).and.allocated(y%v)) then + call y%v%axpby(m,alpha,x%v,beta,info) + else + info = psb_err_invalid_vect_state_ + end if + + end subroutine d_vect_axpby_v + + subroutine d_vect_axpby_a(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent(in) :: x(:,:) + class(psb_d_multivect_type), intent(inout) :: y + real(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + + if (allocated(y%v)) & + & call y%v%axpby(m,alpha,x,beta,info) + + end subroutine d_vect_axpby_a -!!$ function d_vect_dot_v(n,x,y) result(res) -!!$ implicit none -!!$ class(psb_d_multivect_type), intent(inout) :: x, y -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ -!!$ res = dzero -!!$ if (allocated(x%v).and.allocated(y%v)) & -!!$ & res = x%v%dot(n,y%v) -!!$ -!!$ end function d_vect_dot_v -!!$ -!!$ function d_vect_dot_a(n,x,y) result(res) -!!$ implicit none -!!$ class(psb_d_multivect_type), intent(inout) :: x -!!$ real(psb_dpk_), intent(in) :: y(:) -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ -!!$ res = dzero -!!$ if (allocated(x%v)) & -!!$ & res = x%v%dot(n,y) -!!$ -!!$ end function d_vect_dot_a -!!$ -!!$ subroutine d_vect_axpby_v(m,alpha, x, beta, y, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ integer(psb_ipk_), intent(in) :: m -!!$ class(psb_d_multivect_type), intent(inout) :: x -!!$ class(psb_d_multivect_type), intent(inout) :: y -!!$ real(psb_dpk_), intent (in) :: alpha, beta -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ if (allocated(x%v).and.allocated(y%v)) then -!!$ call y%v%axpby(m,alpha,x%v,beta,info) -!!$ else -!!$ info = psb_err_invalid_vect_state_ -!!$ end if -!!$ -!!$ end subroutine d_vect_axpby_v -!!$ -!!$ subroutine d_vect_axpby_a(m,alpha, x, beta, y, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ integer(psb_ipk_), intent(in) :: m -!!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_multivect_type), intent(inout) :: y -!!$ real(psb_dpk_), intent (in) :: alpha, beta -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ if (allocated(y%v)) & -!!$ & call y%v%axpby(m,alpha,x,beta,info) -!!$ -!!$ end subroutine d_vect_axpby_a -!!$ -!!$ !!$ subroutine d_vect_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none @@ -1972,46 +2013,58 @@ contains !!$ end subroutine d_vect_scal !!$ !!$ -!!$ function d_vect_nrm2(n,x) result(res) -!!$ implicit none -!!$ class(psb_d_multivect_type), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ -!!$ if (allocated(x%v)) then -!!$ res = x%v%nrm2(n) -!!$ else -!!$ res = dzero -!!$ end if -!!$ -!!$ end function d_vect_nrm2 -!!$ -!!$ function d_vect_amax(n,x) result(res) -!!$ implicit none -!!$ class(psb_d_multivect_type), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ -!!$ if (allocated(x%v)) then -!!$ res = x%v%amax(n) -!!$ else -!!$ res = dzero -!!$ end if -!!$ -!!$ end function d_vect_amax -!!$ -!!$ function d_vect_asum(n,x) result(res) -!!$ implicit none -!!$ class(psb_d_multivect_type), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ -!!$ if (allocated(x%v)) then -!!$ res = x%v%asum(n) -!!$ else -!!$ res = dzero -!!$ end if -!!$ -!!$ end function d_vect_asum + + function d_vect_nrm2(nc,x) result(res) + implicit none + class(psb_d_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: nc + real(psb_dpk_), allocatable :: res + + if (allocated(x%v)) then + res = x%v%nrm2(nc) + else + res = dzero + end if + + end function d_vect_nrm2 + + function d_vect_amax(nc,x) result(res) + implicit none + class(psb_d_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: nc + real(psb_dpk_) :: res + + if (allocated(x%v)) then + res = x%v%amax(nc) + else + res = dzero + end if + + end function d_vect_amax + + function d_vect_asum(nc,x) result(res) + implicit none + class(psb_d_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: nc + real(psb_dpk_) :: res + + if (allocated(x%v)) then + res = x%v%asum(nc) + else + res = dzero + end if + + end function d_vect_asum + + subroutine d_vect_qr_fact(x, res, info) + implicit none + class(psb_d_multivect_type), intent(inout) :: x + real(psb_dpk_), intent(inout) :: res(:,:) + integer(psb_ipk_), intent(out) :: info + + if (allocated(x%v)) then + call x%v%qr_fact(res, info) + endif + end subroutine d_vect_qr_fact end module psb_d_multivect_mod diff --git a/base/modules/tools/psb_d_tools_mod.F90 b/base/modules/tools/psb_d_tools_mod.F90 index 30e45d53..77840e13 100644 --- a/base/modules/tools/psb_d_tools_mod.F90 +++ b/base/modules/tools/psb_d_tools_mod.F90 @@ -66,6 +66,15 @@ Module psb_d_tools_mod integer(psb_ipk_), optional, intent(in) :: n integer(psb_ipk_), optional, intent(in) :: dupl, bldmode end subroutine psb_dalloc_multivect + subroutine psb_dalloc_multivect_r2(x, desc_a,info,m,n,lb, dupl, bldmode) + import + implicit none + type(psb_d_multivect_type), intent(out) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_),intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: m, n, lb + integer(psb_ipk_), optional, intent(in) :: dupl, bldmode + end subroutine psb_dalloc_multivect_r2 end interface @@ -98,6 +107,16 @@ Module psb_d_tools_mod logical, intent(in), optional :: scratch integer(psb_ipk_), optional, intent(in) :: n end subroutine psb_dasb_multivect + subroutine psb_dasb_multivect_r2(x, desc_a, info,mold, scratch, n) + import + implicit none + type(psb_desc_type), intent(in) :: desc_a + type(psb_d_multivect_type), intent(inout) :: x(:) + integer(psb_ipk_), intent(out) :: info + class(psb_d_base_multivect_type), intent(in), optional :: mold + logical, intent(in), optional :: scratch + integer(psb_ipk_), optional, intent(in) :: n + end subroutine psb_dasb_multivect_r2 end interface interface psb_gefree @@ -122,6 +141,13 @@ Module psb_d_tools_mod type(psb_d_multivect_type), intent(inout) :: x integer(psb_ipk_), intent(out) :: info end subroutine psb_dfree_multivect + subroutine psb_dfree_multivect_r2(x, desc_a, info) + import + implicit none + type(psb_desc_type), intent(in) :: desc_a + type(psb_d_multivect_type), allocatable, intent(inout) :: x(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_dfree_multivect_r2 end interface diff --git a/base/psblas/Makefile b/base/psblas/Makefile index 7d03f1f0..386742d3 100644 --- a/base/psblas/Makefile +++ b/base/psblas/Makefile @@ -1,7 +1,7 @@ include ../../Make.inc #FCOPT=-O2 -OBJS= psb_ddot.o psb_damax.o psb_dasum.o psb_daxpby.o\ +OBJS= psb_ddot.o psb_damax.o psb_dasum.o psb_daxpby.o psb_dqrfact.o\ psb_dnrm2.o psb_dnrmi.o psb_dspmm.o psb_dspsm.o\ psb_sspnrm1.o psb_dspnrm1.o psb_cspnrm1.o psb_zspnrm1.o \ psb_zamax.o psb_zasum.o psb_zaxpby.o psb_zdot.o \ diff --git a/base/psblas/psb_damax.f90 b/base/psblas/psb_damax.f90 index 490d4ffe..a36cad94 100644 --- a/base/psblas/psb_damax.f90 +++ b/base/psblas/psb_damax.f90 @@ -354,6 +354,104 @@ function psb_damax_vect(x, desc_a, info,global) result(res) end function psb_damax_vect +! Function: psb_damax_multivect +! Computes the maximum absolute value of X. +! +! normi := max(abs(X(i)) +! +! Arguments: +! x - type(psb_d_multivect_type) The input vector. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! +function psb_damax_multivect(x, desc_a, info, global) result(res) + use psb_penv_mod + use psb_serial_mod + use psb_desc_mod + use psb_check_mod + use psb_error_mod + use psb_d_multivect_mod + implicit none + + real(psb_dpk_) :: res + type(psb_d_multivect_type), intent (inout) :: x + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: global + + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me, err_act, iix, jjx + integer(psb_lpk_) :: ix, jx, m, n + logical :: global_ + character(len=20) :: name, ch_err + + name='psb_damaxmv' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(global)) then + global_ = global + else + global_ = .true. + end if + + ix = 1 + jx = 1 + + m = x%get_nrows() + n = x%get_ncols() + + call psb_chkvect(m,n,x%get_nrows(),ix,jx,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (iix /= 1) then + info=psb_err_ix_n1_iy_n1_unsupported_ + call psb_errpush(info,name) + goto 9999 + end if + + ! compute local max + if ((desc_a%get_local_rows() > 0).and.(m /= 0)) then + res = x%amax(x%get_ncols()) + else + res = dzero + end if + + ! compute global max + if (global_) call psb_amx(ctxt, res) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end function psb_damax_multivect !!$ !!$ Parallel Sparse BLAS version 3.5 diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index c386f8f2..9ef7d5b2 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -130,6 +130,191 @@ subroutine psb_daxpby_vect(alpha, x, beta, y,& end subroutine psb_daxpby_vect +! +! Subroutine: psb_daxpby_multivect +! Adds one distributed multivector to another, +! +! Y := beta * Y + alpha * X +! +! Arguments: +! alpha - real,input The scalar used to multiply each component of X +! x - type(psb_d_multivect_type) The input multivector containing the entries of X +! beta - real,input The scalar used to multiply each component of Y +! y - type(psb_d_multivect_type) The input/output multivector Y +! desc_a - type(psb_desc_type) The communication descriptor. +! info - integer Return code +! +! Note: from a functional point of view, X is input, but here +! it's declared INOUT because of the sync() methods. +! +subroutine psb_daxpby_multivect(alpha, x, beta, y, desc_a, info) + use psb_base_mod, psb_protect_name => psb_daxpby_multivect + implicit none + type(psb_d_multivect_type), intent (inout) :: x + type(psb_d_multivect_type), intent (inout) :: y + real(psb_dpk_), intent (in) :: alpha, beta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me, err_act, iix, jjx, iiy, jjy + integer(psb_lpk_) :: ix, ijx, iy, ijy, x_m, x_n, y_m, y_n + character(len=20) :: name, ch_err + + name='psb_dgeaxpby' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + ijx = ione + + iy = ione + ijy = ione + + x_m = x%get_nrows() + x_n = x%get_ncols() + + y_m = y%get_nrows() + y_n = y%get_ncols() + + ! check vector correctness + call psb_chkvect(x_m,x_n,x%get_nrows(),ix,ijx,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(y_m,y_n,y%get_nrows(),iy,ijy,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((iix /= ione).or.(iiy /= ione)) then + info=psb_err_ix_n1_iy_n1_unsupported_ + call psb_errpush(info,name) + end if + + if(desc_a%get_local_rows() > 0) then + call y%axpby(desc_a%get_local_rows(),alpha,x,beta,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_daxpby_multivect + +! +! Subroutine: psb_daxpby_multivect +! Adds one distributed multivector to another, +! +! Y := beta * Y + alpha * X +! +! Arguments: +! alpha - real,input The scalar used to multiply each component of X +! x - real(psb_dpk_)(:,:) The input multivector containing the entries of X +! beta - real,input The scalar used to multiply each component of Y +! y - type(psb_d_multivect_type) The input/output multivector Y +! desc_a - type(psb_desc_type) The communication descriptor. +! info - integer Return code +! +! Note: from a functional point of view, X is input, but here +! it's declared INOUT because of the sync() methods. +! +subroutine psb_daxpby_multivect_1(alpha, x, beta, y, desc_a, info) + use psb_base_mod, psb_protect_name => psb_daxpby_multivect_1 + implicit none + real(psb_dpk_), intent(in) :: x(:,:) + type(psb_d_multivect_type), intent (inout) :: y + real(psb_dpk_), intent (in) :: alpha, beta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me, err_act, iiy, jjy + integer(psb_lpk_) :: iy, ijy, y_m, y_n + character(len=20) :: name, ch_err + + name='psb_dgeaxpby' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + iy = ione + ijy = ione + + y_m = y%get_nrows() + y_n = y%get_ncols() + + call psb_chkvect(y_m,y_n,y%get_nrows(),iy,ijy,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (iiy /= ione) then + info=psb_err_ix_n1_iy_n1_unsupported_ + call psb_errpush(info,name) + end if + + if(desc_a%get_local_rows() > 0) then + call y%axpby(desc_a%get_local_rows(),alpha,x,beta,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_daxpby_multivect_1 + ! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 diff --git a/base/psblas/psb_ddot.f90 b/base/psblas/psb_ddot.f90 index 633c7549..586a1935 100644 --- a/base/psblas/psb_ddot.f90 +++ b/base/psblas/psb_ddot.f90 @@ -57,7 +57,7 @@ function psb_ddot_vect(x, y, desc_a,info,global) result(res) use psb_d_vect_mod use psb_d_psblas_mod, psb_protect_name => psb_ddot_vect implicit none - real(psb_dpk_) :: res + real(psb_dpk_) :: res type(psb_d_vect_type), intent(inout) :: x, y type(psb_desc_type), intent(in) :: desc_a integer(psb_ipk_), intent(out) :: info @@ -158,6 +158,284 @@ function psb_ddot_vect(x, y, desc_a,info,global) result(res) end function psb_ddot_vect ! +! Function: psb_ddot_multivect +! psb_ddot computes the dot product of two distributed vectors, +! +! dot := ( X )**C * ( Y ) +! +! +! Arguments: +! x - type(psb_d_multivect_type) The input vector containing the entries of sub( X ). +! y - type(psb_d_multivect_type) The input vector containing the entries of sub( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! t - logical(optional) Whether x is transposed, default: .false. +! global - logical(optional) Whether to perform the global sum, default: .true. +! +! Note: from a functional point of view, X and Y are input, but here +! they are declared INOUT because of the sync() methods. +! +! +function psb_ddot_multivect(x, y, desc_a,info,t,global) result(res) + use psb_desc_mod + use psb_d_base_mat_mod + use psb_check_mod + use psb_error_mod + use psb_penv_mod + use psb_d_vect_mod + use psb_d_psblas_mod, psb_protect_name => psb_ddot_multivect + implicit none + real(psb_dpk_), allocatable :: res(:,:) + type(psb_d_multivect_type), intent(inout) :: x, y + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: t + logical, intent(in), optional :: global + + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me, idx, ndm,& + & err_act, iix, jjx, iiy, jjy, i, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, x_m, x_n, y_m, y_n + logical :: global_, t_ + character(len=20) :: name, ch_err + + name='psb_ddot_multivect' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + + ctxt=desc_a%get_context() + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(t)) then + t_ = t + else + t_ = .false. + end if + + if (present(global)) then + global_ = global + else + global_ = .true. + end if + + ix = ione + ijx = ione + + iy = ione + ijy = ione + + x_m = x%get_nrows() + x_n = x%get_ncols() + + y_m = y%get_nrows() + y_n = y%get_ncols() + + ! check vector correctness + call psb_chkvect(x_m,x_n,x%get_nrows(),ix,ijx,desc_a,info,iix,jjx) + if (info == psb_success_) & + & call psb_chkvect(y_m,y_n,y%get_nrows(),iy,ijy,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((iix /= ione).or.(iiy /= ione)) then + info=psb_err_ix_n1_iy_n1_unsupported_ + call psb_errpush(info,name) + goto 9999 + end if + + nr = desc_a%get_local_rows() + if(nr > 0) then + + if (t_) then + allocate(res(x_n,y_n)) + else + allocate(res(x_m,y_n)) + endif + + call x%dot(y,res,t_) + ! TODO adjust dot_local because overlapped elements are computed more than once + ! if (size(desc_a%ovrlap_elem,1)>0) then + ! if (x%v%is_dev()) call x%sync() + ! if (y%v%is_dev()) call y%sync() + ! do i=1,size(desc_a%ovrlap_elem,1) + ! idx = desc_a%ovrlap_elem(i,1) + ! ndm = desc_a%ovrlap_elem(i,2) + ! res = res - (real(ndm-1)/real(ndm))*(x%v%v(idx)*y%v%v(idx)) + ! end do + ! end if + else + res = dzero + end if + + ! compute global sum + if (global_) call psb_sum(ctxt, res) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end function psb_ddot_multivect +! +! Function: psb_ddot_multivect_1 +! psb_ddot computes the dot product of two distributed vectors, +! +! dot := ( X )**C * ( Y ) +! +! +! Arguments: +! x - type(psb_d_multivect_type) The input vector containing the entries of sub( X ). +! y - real(psb_dpk_)(:,:) The input vector containing the entries of sub( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! t - logical(optional) Whether x is transposed, default: .false. +! global - logical(optional) Whether to perform the global sum, default: .true. +! +! Note: from a functional point of view, X and Y are input, but here +! they are declared INOUT because of the sync() methods. +! +! +function psb_ddot_multivect_1(x, y, desc_a,info,t,global) result(res) + use psb_desc_mod + use psb_d_base_mat_mod + use psb_check_mod + use psb_error_mod + use psb_penv_mod + use psb_d_vect_mod + use psb_d_psblas_mod, psb_protect_name => psb_ddot_multivect_1 + implicit none + real(psb_dpk_), allocatable :: res(:,:) + type(psb_d_multivect_type), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: t + logical, intent(in), optional :: global + + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me, idx, ndm,& + & err_act, iix, jjx, i, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, x_m, x_n, y_n + logical :: global_, t_ + character(len=20) :: name, ch_err + + name='psb_ddot_multivect' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + + ctxt=desc_a%get_context() + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(t)) then + t_ = t + else + t_ = .false. + end if + + if (present(global)) then + global_ = global + else + global_ = .true. + end if + + ix = ione + ijx = ione + + x_m = x%get_nrows() + x_n = x%get_ncols() + + y_n = size(y,dim=2) + + ! check vector correctness + call psb_chkvect(x_m,x_n,x%get_nrows(),ix,ijx,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (iix /= ione) then + info=psb_err_ix_n1_iy_n1_unsupported_ + call psb_errpush(info,name) + goto 9999 + end if + + nr = desc_a%get_local_rows() + if(nr > 0) then + + if (t_) then + allocate(res(x_n,y_n)) + else + allocate(res(x_m,y_n)) + endif + + call x%dot(y,res,t_) + ! TODO adjust dot_local because overlapped elements are computed more than once + ! if (size(desc_a%ovrlap_elem,1)>0) then + ! if (x%v%is_dev()) call x%sync() + ! if (y%v%is_dev()) call y%sync() + ! do i=1,size(desc_a%ovrlap_elem,1) + ! idx = desc_a%ovrlap_elem(i,1) + ! ndm = desc_a%ovrlap_elem(i,2) + ! res = res - (real(ndm-1)/real(ndm))*(x%v%v(idx)*y%v%v(idx)) + ! end do + ! end if + else + res = dzero + end if + + ! compute global sum + if (global_) call psb_sum(ctxt, res) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end function psb_ddot_multivect_1 +! ! Function: psb_ddot ! psb_ddot computes the dot product of two distributed vectors, ! diff --git a/base/psblas/psb_dnrm2.f90 b/base/psblas/psb_dnrm2.f90 index 7ebe9439..a3a9c129 100644 --- a/base/psblas/psb_dnrm2.f90 +++ b/base/psblas/psb_dnrm2.f90 @@ -373,6 +373,112 @@ function psb_dnrm2_vect(x, desc_a, info,global) result(res) return end function psb_dnrm2_vect +! Function: psb_dnrm2_multivect +! Computes the norm2 of a distributed multivector, +! +! norm2 := sqrt ( X**C * X) +! +! Arguments: +! x - type(psb_d_multivect_type) The input vector containing the entries of X. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. +! +function psb_dnrm2_multivect(x, desc_a, info,global) result(res) + use psb_desc_mod + use psb_check_mod + use psb_error_mod + use psb_penv_mod + use psb_d_multivect_mod + implicit none + + real(psb_dpk_) :: res + type(psb_d_multivect_type), intent (inout) :: x + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: global + + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me, err_act, iix, jjx + integer(psb_lpk_) :: ix, jx, m, n + logical :: global_ + character(len=20) :: name, ch_err + + name='psb_dnrm2mv' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + info=psb_success_ + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -1) then + info=psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(global)) then + global_ = global + else + global_ = .true. + end if + + ix = 1 + jx = 1 + + m = x%get_nrows() + n = x%get_ncols() + + call psb_chkvect(m,n,x%get_nrows(),ix,jx,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix /= 1) then + info=psb_err_ix_n1_iy_n1_unsupported_ + call psb_errpush(info,name) + goto 9999 + end if + + if (desc_a%get_local_rows() > 0) then + res = x%nrm2(x%get_ncols()) + ! TODO adjust because overlapped elements are computed more than once + ! if (size(desc_a%ovrlap_elem,1)>0) then + ! if (x%is_dev()) call x%sync() + ! do i=1,size(desc_a%ovrlap_elem,1) + ! idx = desc_a%ovrlap_elem(i,1) + ! ndm = desc_a%ovrlap_elem(i,2) + ! dd = dble(ndm-1)/dble(ndm) + ! res = res * sqrt(done - dd*(abs(x%v%v(idx))/res)**2) + ! end do + ! end if + else + res = dzero + end if + + if (global_) call psb_nrm2(ctxt,res) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end function psb_dnrm2_multivect + ! Function: psb_dnrm2_weight_vect ! Computes the weighted norm2 of a distributed vector, ! diff --git a/base/psblas/psb_dqrfact.f90 b/base/psblas/psb_dqrfact.f90 new file mode 100644 index 00000000..721f9d4e --- /dev/null +++ b/base/psblas/psb_dqrfact.f90 @@ -0,0 +1,76 @@ +! +! Subroutine: psb_dqrfact +! Computes QR factorization of a multivector +! +! Arguments: +! x - type(psb_d_multivect_type) The input multivector containing the entries of X +! desc_a - type(psb_desc_type) The communication descriptor. +! info - integer Return code +! +! Note: from a functional point of view, X is input, but here +! it's declared INOUT because of the sync() methods. +! +function psb_dqrfact(x, desc_a, info) result(res) + use psb_base_mod, psb_protect_name => psb_dqrfact + implicit none + real(psb_dpk_), allocatable :: res(:,:) + type(psb_d_multivect_type), intent(inout) :: x + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me, err_act, iix, jjx + integer(psb_lpk_) :: ix, ijx, x_m, x_n + character(len=20) :: name, ch_err + + name='psb_dgqrfact' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + ijx = ione + + x_m = x%get_nrows() + x_n = x%get_ncols() + + call psb_chkvect(x_m,x_n,x%get_nrows(),ix,ijx,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (iix /= ione) then + info=psb_err_ix_n1_iy_n1_unsupported_ + call psb_errpush(info,name) + end if + + if(desc_a%get_local_rows() > 0) then + allocate(res(x_n,x_n)) + call x%qr_fact(res, info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return +end function psb_dqrfact diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index a006c7e9..a0365426 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -261,6 +261,234 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_dspmv_vect ! +! Subroutine: psb_dspm_vect +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A' * Pr * X + beta * Y, +! +! alpha and beta are scalars, X and Y are distributed +! vectors and A is a M-by-N distributed matrix. +! +! Arguments: +! alpha - real The scalar alpha. +! a - type(psb_dspmat_type). The sparse matrix containing A. +! x - type(psb_d_vect_type) The input vector containing the entries of ( X ). +! beta - real The scalar beta. +! y - type(psb_d_vect_type) The input vector containing the entries of ( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! trans - character(optional). Whether A or A'. Default: 'N' +! work(:) - real,(optional). Working area. +! doswap - logical(optional). Whether to performe halo updates. +! +subroutine psb_dspmv_multivect(alpha, a, x, beta, y, desc_a, info, trans, work,doswap) + use psb_base_mod, psb_protect_name => psb_dspmv_multivect + use psi_mod + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + type(psb_d_multivect_type), intent(inout) :: x + type(psb_d_multivect_type), intent(inout) :: y + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + real(psb_dpk_), optional, target, intent(inout) :: work(:) + character, intent(in), optional :: trans + logical, intent(in), optional :: doswap + + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, ib, ip, idx + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja + integer(psb_ipk_), parameter :: nb=4 + real(psb_dpk_), pointer :: iwork(:), xp(:), yp(:) + real(psb_dpk_), allocatable :: xvsave(:,:) + character :: trans_ + character(len=20) :: name, ch_err + logical :: aliw, doswap_ + integer(psb_ipk_) :: debug_level, debug_unit + + name='psb_dspmv' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ctxt=desc_a%get_context() + call psb_info(ctxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(doswap)) then + doswap_ = doswap + else + doswap_ = .true. + endif + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + endif + if ( (trans_ == 'N').or.(trans_ == 'T')& + & .or.(trans_ == 'C')) then + else + info = psb_err_iarg_invalid_value_ + call psb_errpush(info,name) + goto 9999 + end if + + m = desc_a%get_global_rows() + n = desc_a%get_global_cols() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + lldx = x%get_nrows() + lldy = y%get_nrows() + if ((info == 0).and.(lldx null() + ! check for presence/size of a work area + liwork= 2*ncol + + if (present(work)) then + if (size(work) >= liwork) then + aliw =.false. + else + aliw=.true. + endif + else + aliw=.true. + end if + + if (aliw) then + allocate(iwork(liwork),stat=info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='Allocate' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + iwork => work + endif + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' Allocated work ', info + + if (trans_ == 'N') then + ! Matrix is not transposed + + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + + if(info /= psb_success_) then + info = psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + else + ! Matrix is transposed + + ! + ! Non-empty overlap, need a buffer to hold + ! the entries updated with average operator. + ! Why the average? because in this way they will contribute + ! with a proper scale factor (1/np) to the overall product. + ! + call psi_ovrl_save(x%v,xvsave,desc_a,info) + if (info == psb_success_) call psi_ovrl_upd(x%v,desc_a,psb_avg_,info) + + if (beta /= dzero) call y%set(dzero) + ! local Matrix-vector product + if (info == psb_success_) call psb_csmm(alpha,a,x,beta,y,info,trans=trans_) + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' csmm ', info + + if (info == psb_success_) call psi_ovrl_restore(x%v,xvsave,desc_a,info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='psb_csmm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (doswap_) then + call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& + & done,y%v,desc_a,iwork,info) + if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & done,y%v,desc_a,iwork,info,data=psb_comm_ovr_) + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' swaptran ', info + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='PSI_SwapTran' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + end if + + if (aliw) deallocate(iwork,stat=info) + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' deallocat ',aliw, info + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='Deallocate iwork' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nullify(iwork) + + call psb_erractionrestore(err_act) + if (debug_level >= psb_debug_comp_) then + call psb_barrier(ctxt) + write(debug_unit,*) me,' ',trim(name),' Returning ' + endif + return + +9999 call psb_error_handler(ctxt,err_act) + + return +end subroutine psb_dspmv_multivect +! ! Subroutine: psb_dspmm ! Performs one of the distributed matrix-vector operations ! diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90 index 69112529..89cb6063 100644 --- a/base/serial/impl/psb_d_base_mat_impl.F90 +++ b/base/serial/impl/psb_d_base_mat_impl.F90 @@ -2012,6 +2012,26 @@ subroutine psb_d_base_vect_mv(alpha,a,x,beta,y,info,trans) call y%set_host() end subroutine psb_d_base_vect_mv +subroutine psb_d_base_multivect_mv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_const_mod + use psb_d_base_mat_mod, psb_protect_name => psb_d_base_multivect_mv + implicit none + class(psb_d_base_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_multivect_type), intent(inout) :: x + class(psb_d_base_multivect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + ! For the time being we just throw everything back + ! onto the normal routines. + call x%sync() + call y%sync() + call a%spmm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() +end subroutine psb_d_base_multivect_mv + subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) use psb_d_base_mat_mod, psb_protect_name => psb_d_base_vect_cssv use psb_d_base_vect_mod diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index 2a6fb9a5..4cdadced 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -2062,7 +2062,49 @@ subroutine psb_d_csmv_vect(alpha,a,x,beta,y,info,trans) end subroutine psb_d_csmv_vect +subroutine psb_d_csmv_multivect(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_d_multivect_mod + use psb_d_mat_mod, psb_protect_name => psb_d_csmv_multivect + implicit none + class(psb_dspmat_type), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + type(psb_d_multivect_type), intent(inout) :: x + type(psb_d_multivect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + integer(psb_ipk_) :: err_act + character(len=20) :: name='psb_csmv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (.not.allocated(a%a)) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + call a%a%spmm(alpha,x%v,beta,y%v,info,trans) + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_csmv_multivect subroutine psb_d_cssm(alpha,a,x,beta,y,info,trans,scale,d) use psb_error_mod diff --git a/base/tools/psb_dallc.f90 b/base/tools/psb_dallc.f90 index 108e2000..52f979fc 100644 --- a/base/tools/psb_dallc.f90 +++ b/base/tools/psb_dallc.f90 @@ -378,3 +378,144 @@ subroutine psb_dalloc_multivect(x, desc_a,info,n, dupl, bldmode) return end subroutine psb_dalloc_multivect +! +! Function: psb_dalloc_multivect_r2 +! Allocates a vector of dense multivectors for PSBLAS routines. +! The descriptor may be in either the build or assembled state. +! +! Arguments: +! x - the multivector to be allocated. +! desc_a - the communication descriptor. +! info - Return code +! m - optional number of columns. +! n - optional number of columns of each multivector. +! lb - optional lower bound on column indices +! +subroutine psb_dalloc_multivect_r2(x, desc_a,info,m,n,lb, dupl, bldmode) + use psb_base_mod, psb_protect_name => psb_dalloc_multivect_r2 + use psi_mod + implicit none + + !....parameters... + type(psb_d_multivect_type), allocatable, intent(out) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_),intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: m,n,lb + integer(psb_ipk_), optional, intent(in) :: dupl, bldmode + + !locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me,nr,i,err_act, m_, n_, lb_ + integer(psb_ipk_) :: dupl_, bldmode_, nrmt_ + integer(psb_ipk_) :: exch(1) + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info=psb_success_ + if (psb_errstatus_fatal()) return + name='psb_geall' + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + ! ....verify blacs grid correctness.. + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + !... check m and n parameters.... + if (.not.desc_a%is_ok()) then + info = psb_err_invalid_cd_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (present(m)) then + m_ = m + else + m_ = 1 + endif + if (present(n)) then + n_ = n + else + n_ = 1 + endif + if (present(lb)) then + lb_ = lb + else + lb_ = 1 + endif + + !global check on n parameters + if (me == psb_root_) then + exch(1)=n_ + call psb_bcast(ctxt,exch(1),root=psb_root_) + else + call psb_bcast(ctxt,exch(1),root=psb_root_) + if (exch(1) /= n_) then + info=psb_err_parm_differs_among_procs_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + endif + ! As this is a rank-1 array, optional parameter N is actually ignored. + + !....allocate x ..... + if (desc_a%is_asb().or.desc_a%is_upd()) then + nr = max(1,desc_a%get_local_cols()) + else if (desc_a%is_bld()) then + nr = max(1,desc_a%get_local_rows()) + else + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Invalid desc_a') + goto 9999 + endif + + allocate(x(lb_:lb_+m_-1), stat=info) + if (info == 0) then + do i=lb_, lb_+m_-1 + allocate(psb_d_base_multivect_type :: x(i)%v, stat=info) + if (info == 0) call x(i)%all(nr,n_,info) + if (info == 0) call x(i)%zero() + if (info /= 0) exit + end do + end if + + if (present(bldmode)) then + bldmode_ = bldmode + else + bldmode_ = psb_matbld_noremote_ + end if + if (present(dupl)) then + dupl_ = dupl + else + dupl_ = psb_dupl_def_ + end if + + do i=lb_, lb_+m_-1 + call x(i)%set_dupl(dupl_) + call x(i)%set_remote_build(bldmode_) + if (x(i)%is_remote_build()) then + nrmt_ = max(100,(desc_a%get_local_cols()-desc_a%get_local_rows())) + allocate(x(i)%rmtv(nrmt_,n_)) + end if + end do + if (psb_errstatus_fatal()) then + info=psb_err_alloc_request_ + call psb_errpush(info,name,i_err=(/nr/),a_err='real(psb_spk_)') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_dalloc_multivect_r2 diff --git a/base/tools/psb_dasb.f90 b/base/tools/psb_dasb.f90 index 19a19ff1..4b2b2b6f 100644 --- a/base/tools/psb_dasb.f90 +++ b/base/tools/psb_dasb.f90 @@ -249,11 +249,13 @@ subroutine psb_dasb_multivect(x, desc_a, info, mold, scratch,n) character(len=20) :: name,ch_err info = psb_success_ - if (psb_errstatus_fatal()) return + name = 'psb_dgeasb_mlv' - name = 'psb_dgeasb' + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + endif - ctxt = desc_a%get_context() + ctxt = desc_a%get_context() debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() @@ -317,3 +319,98 @@ subroutine psb_dasb_multivect(x, desc_a, info, mold, scratch,n) end subroutine psb_dasb_multivect +subroutine psb_dasb_multivect_r2(x, desc_a, info, mold, scratch, n) + use psb_base_mod, psb_protect_name => psb_dasb_multivect_r2 + implicit none + + type(psb_desc_type), intent(in) :: desc_a + type(psb_d_multivect_type), intent(inout) :: x(:) + integer(psb_ipk_), intent(out) :: info + class(psb_d_base_multivect_type), intent(in), optional :: mold + integer(psb_ipk_), optional, intent(in) :: n + logical, intent(in), optional :: scratch + + ! local variables + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me, i, n_arr + integer(psb_ipk_) :: i1sz,nrow,ncol, err_act, n_, dupl_ + logical :: scratch_ + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name,ch_err + + info = psb_success_ + name = 'psb_dgeasb_mv' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + + ctxt = desc_a%get_context() + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + scratch_ = .false. + if (present(scratch)) scratch_ = scratch + + if (present(n)) then + n_ = n + else + if (allocated(x(1)%v)) then + n_ = x(1)%v%get_ncols() + else + n_ = 1 + end if + endif + + call psb_info(ctxt, me, np) + + ! ....verify blacs grid correctness.. + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + else if (.not.desc_a%is_ok()) then + info = psb_err_invalid_cd_state_ + call psb_errpush(info,name) + goto 9999 + end if + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + n_arr = size(x) + if (debug_level >= psb_debug_ext_) & + & write(debug_unit,*) me,' ',trim(name),': sizes: ',nrow,ncol + + if (scratch_) then + do i=1,n_arr + call x(i)%free(info) + call x(i)%bld(ncol,n_,mold=mold) + end do + + else + do i=1,n_arr + dupl_ = x(i)%get_dupl() + call x(i)%asb(ncol,n_,info) + if (info /= 0) exit + ! ..update halo elements.. + call psb_halo(x(i),desc_a,info) + if (info /= 0) exit + call x(i)%cnv(mold) + end do + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_halo') + goto 9999 + end if + end if + if (debug_level >= psb_debug_ext_) & + & write(debug_unit,*) me,' ',trim(name),': end' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_dasb_multivect_r2 diff --git a/base/tools/psb_dfree.f90 b/base/tools/psb_dfree.f90 index 8e092dfa..34dc39b4 100644 --- a/base/tools/psb_dfree.f90 +++ b/base/tools/psb_dfree.f90 @@ -200,3 +200,55 @@ subroutine psb_dfree_multivect(x, desc_a, info) return end subroutine psb_dfree_multivect + +subroutine psb_dfree_multivect_r2(x, desc_a, info) + use psb_base_mod, psb_protect_name => psb_dfree_multivect_r2 + implicit none + !....parameters... + type(psb_d_multivect_type), allocatable, intent(inout) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + !...locals.... + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me,err_act, i + character(len=20) :: name + + + info=psb_success_ + if (psb_errstatus_fatal()) return + call psb_erractionsave(err_act) + name='psb_dfreev' + + if (.not.desc_a%is_ok()) then + info = psb_err_invalid_cd_state_ + call psb_errpush(info,name) + goto 9999 + end if + ctxt = desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + + do i=lbound(x,1),ubound(x,1) + call x(i)%free(info) + if (info /= 0) exit + end do + if (info == 0) deallocate(x,stat=info) + if (info /= psb_no_err_) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_dfree_multivect_r2 diff --git a/krylov/Makefile b/krylov/Makefile index f71dbb1a..65abf37d 100644 --- a/krylov/Makefile +++ b/krylov/Makefile @@ -12,7 +12,7 @@ MODOBJS= psb_base_krylov_conv_mod.o \ psb_krylov_mod.o F90OBJS=psb_dkrylov.o psb_skrylov.o psb_ckrylov.o psb_zkrylov.o \ psb_dcgstab.o psb_dcg.o psb_dfcg.o psb_dgcr.o psb_dcgs.o \ - psb_dbicg.o psb_dcgstabl.o psb_drgmres.o\ + psb_dbicg.o psb_dcgstabl.o psb_drgmres.o psb_dbgmres.o\ psb_scgstab.o psb_scg.o psb_sfcg.o psb_sgcr.o psb_scgs.o \ psb_sbicg.o psb_scgstabl.o psb_srgmres.o\ psb_ccgstab.o psb_ccg.o psb_cfcg.o psb_cgcr.o psb_ccgs.o \ diff --git a/krylov/psb_dbgmres.f90 b/krylov/psb_dbgmres.f90 new file mode 100644 index 00000000..7e76fec8 --- /dev/null +++ b/krylov/psb_dbgmres.f90 @@ -0,0 +1,383 @@ +! File: psb_dbgmres.f90 +! +! Subroutine: psb_dbgmres +! This subroutine implements the BGMRES method with right preconditioning. +! +! Arguments: +! +! a - type(psb_dspmat_type) Input: sparse matrix containing A. +! prec - class(psb_dprec_type) Input: preconditioner +! b - real,dimension(:,:) Input: vector containing the +! right hand side B +! x - real,dimension(:,:) Input/Output: vector containing the +! initial guess and final solution X. +! eps - real Input: Stopping tolerance; the iteration is +! stopped when the error estimate |err| <= eps +! desc_a - type(psb_desc_type). Input: The communication descriptor. +! info - integer. Output: Return code +! +! itmax - integer(optional) Input: maximum number of iterations to be +! performed. +! +! iter - integer(optional) Output: how many iterations have been +! performed. +! performed. +! err - real (optional) Output: error estimate on exit. If the +! denominator of the estimate is exactly +! 0, it is changed into 1. +! itrace - integer(optional) Input: print an informational message +! with the error estimate every itrace +! iterations +! itrs - integer(optional) Input: iteration number parameter +! istop - integer(optional) Input: stopping criterion, or how +! to estimate the error. +! 1: err = |r|/(|a||x|+|b|); here the iteration is +! stopped when |r| <= eps * (|a||x|+|b|) +! 2: err = |r|/|b|; here the iteration is +! stopped when |r| <= eps * |b| +! where r is the (preconditioned, recursive +! estimate of) residual. +! + +subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter, err, itrace, itrs, istop) + + use psb_base_mod + use psb_prec_mod + use psb_d_krylov_conv_mod + use psb_krylov_mod + + implicit none + + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), Intent(in) :: desc_a + class(psb_dprec_type), intent(inout) :: prec + + type(psb_d_multivect_type), Intent(inout) :: b + type(psb_d_multivect_type), Intent(inout) :: x + + real(psb_dpk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, itrs, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + real(psb_dpk_), Optional, Intent(out) :: err + + real(psb_dpk_), allocatable :: aux(:), h(:,:), beta(:,:) + + type(psb_d_multivect_type), allocatable :: v(:) + type(psb_d_multivect_type) :: v_tot, w + + real(psb_dpk_) :: t1, t2 + + real(psb_dpk_) :: rti, rti1 + integer(psb_ipk_) :: litmax, naux, itrace_, n_row, n_col, nrep + integer(psb_lpk_) :: mglob, m, n, n_add + integer(psb_ipk_) :: n_ + + integer(psb_ipk_) :: i, j, istop_, err_act, idx_i, idx_j, idx + integer(psb_ipk_) :: debug_level, debug_unit + + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me, itx + real(psb_dpk_) :: rni, xni, bni, ani, bn2, r0n2 + real(psb_dpk_) :: errnum, errden, deps, derr + character(len=20) :: name + character(len=*), parameter :: methdname='BGMRES' + + info = psb_success_ + name = 'psb_dbgmres' + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ctxt = desc_a%get_context() + call psb_info(ctxt, me, np) + + if (debug_level >= psb_debug_ext_) & + & write(debug_unit,*) me,' ',trim(name),': from psb_info',np + + if (.not.allocated(b%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + mglob = desc_a%get_global_rows() + n_row = desc_a%get_local_rows() + n_col = desc_a%get_local_cols() + + if (present(istop)) then + istop_ = istop + else + istop_ = 2 + endif + + if ((istop_ < 1 ).or.(istop_ > 2 ) ) then + info=psb_err_invalid_istop_ + err=info + call psb_errpush(info,name,i_err=(/istop_/)) + goto 9999 + endif + + if (present(itmax)) then + litmax = itmax + else + litmax = 1000 + endif + + if (present(itrace)) then + itrace_ = itrace + else + itrace_ = 0 + end if + + if (present(itrs)) then + nrep = itrs + if (debug_level >= psb_debug_ext_) & + & write(debug_unit,*) me,' ',trim(name),& + & ' present: itrs: ',itrs,nrep + else + nrep = 10 + if (debug_level >= psb_debug_ext_) & + & write(debug_unit,*) me,' ',trim(name),& + & ' not present: itrs: ',itrs,nrep + endif + if (nrep <=0 ) then + info=psb_err_invalid_irst_ + err=info + call psb_errpush(info,name,i_err=(/nrep/)) + goto 9999 + endif + + m = x%get_nrows() + n = x%get_ncols() + + call psb_chkvect(m,n,x%get_nrows(),lone,lone,desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_chkvect on X') + goto 9999 + end if + call psb_chkvect(m,n,b%get_nrows(),lone,lone,desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_chkvect on B') + goto 9999 + end if + + naux=4*n_col + allocate(aux(naux),h((nrep+1)*n,nrep*n),stat=info) + + n_ = n + if (info == psb_success_) call psb_geall(v,desc_a,info,m=nrep+1,n=n_) + if (info == psb_success_) call psb_geall(v_tot,desc_a,info,n=(nrep+1)*n_) + if (info == psb_success_) call psb_geall(w,desc_a,info,n=n_) + if (info == psb_success_) call psb_geasb(v,desc_a,info,mold=x%v,n=n_) + if (info == psb_success_) call psb_geasb(v_tot,desc_a,info,mold=x%v,n=(nrep+1)*n_) + if (info == psb_success_) call psb_geasb(w,desc_a,info,mold=x%v,n=n_) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + if (debug_level >= psb_debug_ext_) & + & write(debug_unit,*) me,' ',trim(name),& + & ' Size of V,W ',v(1)%get_nrows(),size(v),& + & w%get_nrows() + + if (istop_ == 1) then + ani = psb_spnrmi(a,desc_a,info) + bni = psb_geamax(b,desc_a,info) + else if (istop_ == 2) then + bn2 = psb_genrm2(b,desc_a,info) + endif + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + errnum = dzero + errden = done + deps = eps + itx = 0 + n_add = n-1 + + if ((itrace_ > 0).and.(me == psb_root_)) call log_header(methdname) + + ! BGMRES algorithm + + ! TODO inserire timer operazioni tra mlv + + ! STEP 1: Compute R(0) = B - A*X(0) + + ! Store B in V(1) + call psb_geaxpby(done,b,dzero,v(1),desc_a,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + ! Store R(0) in V(1) + call psb_spmm(-done,a,x,done,v(1),desc_a,info,work=aux) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + ! STEP 2: Compute QR_fact(R(0)) + beta = psb_geqrfact(v(1),desc_a,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + ! STEP 3: Outer loop + outer: do j=1,nrep + + itx = itx + 1 + + ! Compute j index for H operations + idx_j = (j-1)*n+1 + + ! STEP 4: Compute W = AV(j) + call psb_spmm(done,a,v(j),dzero,w,desc_a,info,work=aux) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + if (itx >= litmax) exit outer + + ! STEP 5: Inner loop + inner: do i=1,j + + ! Compute i index for H operations + idx_i = (i-1)*n+1 + + ! STEP 6: Compute H(i,j) = V(i)_T*W + h(idx_i:idx_i+n_add,idx_j:idx_j+n_add) = psb_gedot(v(i),w,desc_a,info,.true.) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + ! STEP 7: Compute W = W - V(i)*H(i,j) + call psb_geaxpby(-done,psb_gedot(v(i),h(idx_i:idx_i+n_add,idx_j:idx_j+n_add),desc_a,info),done,w,desc_a,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + end do inner + + ! STEP 8: Compute QR_fact(W) + + ! Store R in H(j+1,j) + h(idx_j+n:idx_j+n+n_add,idx_j:idx_j+n_add) = psb_geqrfact(w,desc_a,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + ! Store Q in V(j+1) + call psb_geaxpby(done,w,dzero,v(j+1),desc_a,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + end do outer + + ! STEP 9: Compute Y(m) + call frobenius_norm_min() + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + ! STEP 10: Compute V = {V(1),...,V(m)} + do i=1,nrep+1 + idx = (i-1)*n+1 + v_tot%v%v(:,idx:idx+n_add) = v(i)%v%v(:,1:n) + enddo + + ! STEP 11: X(m) = X(0) + V*Y(m) + call psb_geaxpby(done,psb_gedot(v_tot,h,desc_a,info),done,x,desc_a,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + ! END algorithm + + if (itrace_ > 0) call log_conv(methdname,me,itx,ione,errnum,errden,deps) + + call log_end(methdname,me,itx,itrace_,errnum,errden,deps,err=derr,iter=iter) + if (present(err)) err = derr + + if (info == psb_success_) call psb_gefree(v,desc_a,info) + if (info == psb_success_) call psb_gefree(v_tot,desc_a,info) + if (info == psb_success_) call psb_gefree(w,desc_a,info) + if (info == psb_success_) deallocate(aux,h,stat=info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + ! Minimize Frobenius norm + subroutine frobenius_norm_min() + + implicit none + + integer(psb_ipk_) :: lwork + real(psb_dpk_), allocatable :: work(:), beta_e1(:,:) + + integer(psb_ipk_) :: m_h, n_h, mn + + ! Initialize params + m_h = (nrep+1)*n + n_h = nrep*n + mn = min(m_h,n_h) + lwork = max(1,mn+max(mn,n)) + allocate(work(lwork)) + + ! Compute E1*beta + allocate(beta_e1(m_h,n)) + beta_e1 = dzero + beta_e1(1:n,1:n) = beta + + ! Compute min Frobenius norm + call dgels('N',m_h,n_h,n,h,m_h,beta_e1,m_h,work,lwork,info) + + deallocate(work,beta_e1) + + return + + end subroutine frobenius_norm_min + +end subroutine psb_dbgmres_multivect diff --git a/krylov/psb_dkrylov.f90 b/krylov/psb_dkrylov.f90 index d5d40eaf..36f48c66 100644 --- a/krylov/psb_dkrylov.f90 +++ b/krylov/psb_dkrylov.f90 @@ -225,4 +225,132 @@ Subroutine psb_dkrylov_vect(method,a,prec,b,x,eps,desc_a,info,& return end subroutine psb_dkrylov_vect +! +! +! Subroutine: psb_dkrylov_multivect +! +! Front-end for the Krylov subspace iterations, realversion +! +! Arguments: +! +! methd - character The specific method; can take the values: +! BGMRES +! +! a - type(psb_dspmat_type) Input: sparse matrix containing A. +! prec - class(psb_dprec_type) Input: preconditioner +! b - real,dimension(:,:) Input: multivector containing the +! right hand side B +! x - real,dimension(:,:) Input/Output: multivector containing the +! initial guess and final solution X. +! eps - real Input: Stopping tolerance; the iteration is +! stopped when the error +! estimate |err| <= eps +! +! desc_a - type(psb_desc_type). Input: The communication descriptor. +! info - integer. Output: Return code +! +! itmax - integer(optional) Input: maximum number of iterations to be +! performed. +! iter - integer(optional) Output: how many iterations have been +! performed. +! err - real (optional) Output: error estimate on exit +! itrace - integer(optional) Input: print an informational message +! with the error estimate every itrace +! iterations +! itrs - integer(optional) Input: number of iterations +! istop - integer(optional) Input: stopping criterion, or how +! to estimate the error. +! 1: err = |r|/(|a||x|+|b|) +! 2: err = |r|/|b| +! where r is the (preconditioned, recursive +! estimate of) residual +! +Subroutine psb_dkrylov_multivect(method,a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,itrs,istop) + + use psb_base_mod + use psb_prec_mod,only : psb_dprec_type + use psb_krylov_mod, psb_protect_name => psb_dkrylov_multivect + + character(len=*) :: method + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + class(psb_dprec_type), intent(inout) :: prec + type(psb_d_multivect_type), Intent(inout) :: b + type(psb_d_multivect_type), Intent(inout) :: x + Real(psb_dpk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, itrs, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + Real(psb_dpk_), Optional, Intent(out) :: err + + abstract interface + subroutine psb_dkryl_multivect(a,prec,b,x,eps,desc_a,& + &info,itmax,iter,err,itrace,itrs,istop) + import :: psb_ipk_, psb_dpk_, psb_desc_type, & + & psb_dspmat_type, psb_dprec_type, psb_d_multivect_type + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_d_multivect_type), Intent(inout) :: b + type(psb_d_multivect_type), Intent(inout) :: x + real(psb_dpk_), intent(in) :: eps + class(psb_dprec_type), intent(inout) :: prec + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: itmax,itrace,itrs,istop + integer(psb_ipk_), optional, intent(out) :: iter + real(psb_dpk_), optional, intent(out) :: err + end subroutine psb_dkryl_multivect + end interface + + procedure(psb_dkryl_multivect) :: psb_dbgmres_multivect + + logical :: do_alloc_wrk + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: me, np, err_act, itrace_ + character(len=20) :: name + + info = psb_success_ + name = 'psb_krylov' + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + + if (present(itrace)) then + itrace_ = itrace + else + itrace_ = -1 + end if + + do_alloc_wrk = .not.prec%is_allocated_wrk() + if (do_alloc_wrk) call prec%mv_allocate_wrk(info,vmold=x%v,desc=desc_a) + + select case(psb_toupper(method)) + case('BGMRES','GMRES') + call psb_dbgmres_multivect(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace=itrace_,itrs=itrs,istop=istop) + case default + if (me == 0) write(psb_err_unit,*) trim(name),& + & ': Warning: Unknown method ',method,& + & ', defaulting to BGMRES' + call psb_dbgmres_multivect(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace=itrace_,itrs=itrs,istop=istop) + end select + + if ((info==psb_success_).and.do_alloc_wrk) call prec%free_wrk(info) + + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err=trim(method)) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return +end subroutine psb_dkrylov_multivect diff --git a/krylov/psb_krylov_mod.f90 b/krylov/psb_krylov_mod.f90 index d8d4d904..5a30847e 100644 --- a/krylov/psb_krylov_mod.f90 +++ b/krylov/psb_krylov_mod.f90 @@ -124,6 +124,27 @@ Module psb_krylov_mod end Subroutine psb_zkrylov_vect + Subroutine psb_dkrylov_multivect(method,a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,itrs,istop) + + use psb_base_mod, only : psb_ipk_, psb_desc_type, psb_dspmat_type, & + & psb_dpk_, psb_d_multivect_type + use psb_prec_mod, only : psb_dprec_type + + character(len=*) :: method + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + class(psb_dprec_type), intent(inout) :: prec + type(psb_d_multivect_type), Intent(inout) :: b + type(psb_d_multivect_type), Intent(inout) :: x + Real(psb_dpk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, itrs, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + Real(psb_dpk_), Optional, Intent(out) :: err + + end Subroutine psb_dkrylov_multivect + end interface diff --git a/prec/psb_d_base_prec_mod.f90 b/prec/psb_d_base_prec_mod.f90 index e3869193..3db07ca2 100644 --- a/prec/psb_d_base_prec_mod.f90 +++ b/prec/psb_d_base_prec_mod.f90 @@ -42,7 +42,8 @@ module psb_d_base_prec_mod & psb_erractionsave, psb_erractionrestore, psb_error, & & psb_errstatus_fatal, psb_success_,& & psb_d_base_sparse_mat, psb_dspmat_type, psb_d_csr_sparse_mat,& - & psb_d_base_vect_type, psb_d_vect_type, psb_i_base_vect_type + & psb_d_base_vect_type, psb_d_vect_type, psb_i_base_vect_type,& + & psb_d_base_multivect_type, psb_d_multivect_type, psb_i_base_multivect_type use psb_prec_const_mod @@ -62,7 +63,8 @@ module psb_d_base_prec_mod generic, public :: build => precbld generic, public :: descr => precdescr procedure, pass(prec) :: desc_prefix => psb_d_base_desc_prefix - procedure, pass(prec) :: allocate_wrk => psb_d_base_allocate_wrk + procedure, pass(prec) :: allocate_wrk => psb_d_base_allocate_wrk_vect + procedure, pass(prec) :: mv_allocate_wrk => psb_d_base_allocate_wrk_multivect procedure, pass(prec) :: free_wrk => psb_d_base_free_wrk procedure, pass(prec) :: is_allocated_wrk => psb_d_base_is_allocated_wrk procedure(psb_d_base_precbld), pass(prec), deferred :: precbld @@ -263,7 +265,7 @@ contains end subroutine psb_d_base_precsetc - subroutine psb_d_base_allocate_wrk(prec,info,vmold,desc) + subroutine psb_d_base_allocate_wrk_vect(prec,info,vmold,desc) use psb_base_mod implicit none @@ -295,7 +297,41 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine psb_d_base_allocate_wrk + end subroutine psb_d_base_allocate_wrk_vect + + subroutine psb_d_base_allocate_wrk_multivect(prec,info,vmold,desc) + use psb_base_mod + implicit none + + ! Arguments + class(psb_d_base_prec_type), intent(inout) :: prec + integer(psb_ipk_), intent(out) :: info + class(psb_d_base_multivect_type), intent(in), optional :: vmold + type(psb_desc_type), intent(in), optional :: desc + + ! Local variables + integer(psb_ipk_) :: err_act + character(len=20) :: name + + info=psb_success_ + name = 'psb_d_allocate_wrk' + call psb_erractionsave(err_act) + + if (psb_get_errstatus().ne.0) goto 9999 + + ! + ! Base version does nothing. + ! + + info = psb_success_ + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_base_allocate_wrk_multivect subroutine psb_d_base_free_wrk(prec,info) use psb_base_mod diff --git a/prec/psb_d_prec_type.f90 b/prec/psb_d_prec_type.f90 index a5e3a7ad..831d0f3b 100644 --- a/prec/psb_d_prec_type.f90 +++ b/prec/psb_d_prec_type.f90 @@ -58,7 +58,8 @@ module psb_d_prec_type procedure, pass(prec) :: csetc => psb_dcprecsetc procedure, pass(prec) :: csetr => psb_dcprecsetr generic, public :: set => cseti, csetc, csetr - procedure, pass(prec) :: allocate_wrk => psb_d_allocate_wrk + procedure, pass(prec) :: allocate_wrk => psb_d_allocate_wrk_vect + procedure, pass(prec) :: mv_allocate_wrk => psb_d_allocate_wrk_multivect procedure, pass(prec) :: free_wrk => psb_d_free_wrk procedure, pass(prec) :: is_allocated_wrk => psb_d_is_allocated_wrk end type psb_dprec_type @@ -244,7 +245,7 @@ contains end subroutine psb_d_prec_dump - subroutine psb_d_allocate_wrk(prec,info,vmold,desc) + subroutine psb_d_allocate_wrk_vect(prec,info,vmold,desc) use psb_base_mod implicit none @@ -278,7 +279,43 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine psb_d_allocate_wrk + end subroutine psb_d_allocate_wrk_vect + + subroutine psb_d_allocate_wrk_multivect(prec,info,vmold,desc) + use psb_base_mod + implicit none + + ! Arguments + class(psb_dprec_type), intent(inout) :: prec + integer(psb_ipk_), intent(out) :: info + class(psb_d_base_multivect_type), intent(in), optional :: vmold + type(psb_desc_type), intent(in), optional :: desc + + ! Local variables + integer(psb_ipk_) :: err_act + character(len=20) :: name + + info=psb_success_ + name = 'psb_d_allocate_wrk' + call psb_erractionsave(err_act) + + if (psb_get_errstatus().ne.0) goto 9999 + + if (.not.allocated(prec%prec)) then + info = -1 + write(psb_err_unit,*) 'Trying to allocate wrk to a non-built preconditioner' + return + end if + + call prec%prec%mv_allocate_wrk(info,vmold=vmold,desc=desc) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_allocate_wrk_multivect subroutine psb_d_free_wrk(prec,info) use psb_base_mod diff --git a/test/block_krylov/Makefile b/test/block_krylov/Makefile new file mode 100644 index 00000000..3107d696 --- /dev/null +++ b/test/block_krylov/Makefile @@ -0,0 +1,40 @@ +INSTALLDIR=../.. +INCDIR=$(INSTALLDIR)/include/ +MODDIR=$(INSTALLDIR)/modules/ +include $(INCDIR)/Make.inc.psblas +# +# Libraries used +# +LIBDIR=$(INSTALLDIR)/lib/ +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) + +FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). + +DBFOBJS=getp.o psb_dbf_sample.o + +EXEDIR=./runs + +all: runsd psb_dbf_sample + +runsd: + (if test ! -d runs ; then mkdir runs; fi) + +psb_dbf_sample.o: getp.o + +psb_dbf_sample: $(DBFOBJS) + $(FLINK) $(LOPT) $(DBFOBJS) -o psb_dbf_sample $(PSBLAS_LIB) $(LDLIBS) + /bin/mv psb_dbf_sample $(EXEDIR) + +.f90.o: + $(MPFC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c $< + +clean: + /bin/rm -f $(DBFOBJS)\ + *$(.mod) $(EXEDIR)/psb_*f_sample + +lib: + (cd ../../; make library) +verycleanlib: + (cd ../../; make veryclean) + diff --git a/test/block_krylov/getp.f90 b/test/block_krylov/getp.f90 new file mode 100644 index 00000000..344295d1 --- /dev/null +++ b/test/block_krylov/getp.f90 @@ -0,0 +1,169 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +Module getp + interface get_parms + module procedure get_dparms + end interface + +contains + ! + ! Get iteration parameters from the command line + ! + subroutine get_dparms(ctxt,mtrx_file,rhs_file,filefmt,kmethd,ptype,part,& + & afmt,nrhs,istopc,itmax,itrace,itrs,eps) + use psb_base_mod + type(psb_ctxt_type) :: ctxt + character(len=2) :: filefmt + character(len=40) :: kmethd, mtrx_file, rhs_file, ptype + character(len=20) :: part + integer(psb_ipk_) :: nrhs,iret,istopc,itmax,itrace,itrs + character(len=40) :: charbuf + real(psb_dpk_) :: eps + character :: afmt*5 + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: inparms(40), ip, inp_unit + character(len=1024) :: filename + + call psb_info(ctxt,iam,np) + if (iam == 0) then + if (command_argument_count()>0) then + call get_command_argument(1,filename) + inp_unit = 30 + open(inp_unit,file=filename,action='read',iostat=info) + if (info /= 0) then + write(psb_err_unit,*) 'Could not open file ',filename,' for input' + call psb_abort(ctxt) + stop + else + write(psb_err_unit,*) 'Opened file ',trim(filename),' for input' + end if + else + inp_unit=psb_inp_unit + end if + ! Read Input Parameters + read(inp_unit,*) ip + if (ip >= 5) then + read(inp_unit,*) mtrx_file + read(inp_unit,*) rhs_file + read(inp_unit,*) filefmt + read(inp_unit,*) kmethd + read(inp_unit,*) ptype + read(inp_unit,*) afmt + read(inp_unit,*) part + + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,rhs_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,kmethd) + call psb_bcast(ctxt,ptype) + call psb_bcast(ctxt,afmt) + call psb_bcast(ctxt,part) + + if (ip >= 7) then + read(inp_unit,*) nrhs + else + nrhs=4 + endif + if (ip >= 8) then + read(inp_unit,*) istopc + else + istopc=1 + endif + if (ip >= 9) then + read(inp_unit,*) itmax + else + itmax=500 + endif + if (ip >= 10) then + read(inp_unit,*) itrace + else + itrace=-1 + endif + if (ip >= 11) then + read(inp_unit,*) itrs + else + itrs = 1 + endif + if (ip >= 12) then + read(inp_unit,*) eps + else + eps=1.d-6 + endif + inparms(1) = nrhs + inparms(2) = istopc + inparms(3) = itmax + inparms(4) = itrace + inparms(5) = itrs + call psb_bcast(ctxt,inparms(1:5)) + call psb_bcast(ctxt,eps) + + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Solving matrix : ",a)') mtrx_file + write(psb_out_unit,'("Number of processors : ",i1)') np + write(psb_out_unit,'("Data distribution : ",a)') part + write(psb_out_unit,'("Iterative method : ",a)') kmethd + write(psb_out_unit,'("Preconditioner : ",a)') ptype + write(psb_out_unit,'("Number of RHS : ",i1)') nrhs + write(psb_out_unit,'("Number of iterations : ",i1)') itrs + write(psb_out_unit,'("Storage format : ",a)') afmt(1:3) + write(psb_out_unit,'(" ")') + else + write(psb_err_unit,*) 'Wrong format for input file' + call psb_abort(ctxt) + stop 1 + end if + if (inp_unit /= psb_inp_unit) then + close(inp_unit) + end if + else + ! Receive Parameters + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,rhs_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,kmethd) + call psb_bcast(ctxt,ptype) + call psb_bcast(ctxt,afmt) + call psb_bcast(ctxt,part) + + call psb_bcast(ctxt,inparms(1:5)) + nrhs = inparms(1) + istopc = inparms(2) + itmax = inparms(3) + itrace = inparms(4) + itrs = inparms(5) + call psb_bcast(ctxt,eps) + + end if + + end subroutine get_dparms + +end module getp diff --git a/test/block_krylov/psb_dbf_sample.f90 b/test/block_krylov/psb_dbf_sample.f90 new file mode 100644 index 00000000..74216b3d --- /dev/null +++ b/test/block_krylov/psb_dbf_sample.f90 @@ -0,0 +1,265 @@ +program psb_dbf_sample + + use psb_base_mod + use psb_prec_mod + use psb_krylov_mod + use psb_util_mod + use getp + + implicit none + + ! input parameters + character(len=40) :: kmethd, ptype, mtrx_file, rhs_file + + ! sparse matrices + type(psb_dspmat_type) :: a + type(psb_ldspmat_type) :: aux_a + + ! preconditioner data + type(psb_dprec_type) :: prec + + ! dense matrices + real(psb_dpk_), allocatable, target :: aux_b(:,:) + real(psb_dpk_), allocatable, save :: x_mv_glob(:,:), r_mv_glob(:,:) + real(psb_dpk_), pointer :: b_mv_glob(:,:) + type(psb_d_multivect_type) :: b_mv, x_mv, r_mv + integer(psb_ipk_) :: m, nrhs + real(psb_dpk_) :: random_value + + ! communications data structure + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: iam, np + integer(psb_lpk_) :: lnp + + ! solver paramters + integer(psb_ipk_) :: iter, itmax, ierr, itrace, ircode, methd, istopc, itrs + integer(psb_epk_) :: amatsize, precsize, descsize + real(psb_dpk_) :: err, eps + + ! input parameters + character(len=5) :: afmt + character(len=20) :: name, part + character(len=2) :: filefmt + integer(psb_ipk_), parameter :: iunit=12 + + ! other variables + integer(psb_ipk_) :: i, j, info + real(psb_dpk_) :: t1, t2, tprec + real(psb_dpk_) :: resmx, resmxp + integer(psb_ipk_), allocatable :: ivg(:) + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + name='psb_dbf_sample' + if(psb_errstatus_fatal()) goto 9999 + info=psb_success_ + call psb_set_errverbosity(itwo) + ! + ! Hello world + ! + if (iam == psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Welcome to PSBLAS version: ",a)') psb_version_string_ + write(psb_out_unit,'("This is the ",a," sample program")') trim(name) + write(psb_out_unit,'(" ")') + end if + ! + ! get parameters + ! + call get_parms(ctxt,mtrx_file,rhs_file,filefmt,kmethd,ptype,& + & part,afmt,nrhs,istopc,itmax,itrace,itrs,eps) + + call psb_barrier(ctxt) + t1 = psb_wtime() + + if (iam == psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + if (info == psb_success_) then + if (rhs_file /= 'NONE') then + call mm_array_read(aux_b,info,iunit=iunit,filename=rhs_file) + end if + end if + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,b=aux_b,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + + if (info /= psb_success_) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + m = aux_a%get_nrows() + call psb_bcast(ctxt,m) + + ! At this point aux_b may still be unallocated + if (size(aux_b) == m*nrhs) then + ! if any rhs were present, broadcast the first one + write(psb_err_unit,'("Ok, got an rhs ")') + b_mv_glob =>aux_b(:,:) + else + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'("Number of RHS: ",i1)') nrhs + write(psb_out_unit,'(" ")') + call psb_realloc(m,nrhs,aux_b,ircode) + if (ircode /= 0) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + endif + + b_mv_glob => aux_b(:,:) + do i=1, m + do j=1, nrhs + !b_mv_glob(i,j) = done + call random_number(random_value) + b_mv_glob(i,j) = random_value + enddo + enddo + endif + + else + call psb_bcast(ctxt,m) + + end if + + ! switch over different partition types + select case(psb_toupper(part)) + case('BLOCK') + if (iam == psb_root_) write(psb_out_unit,'("Partition type: block")') + call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt=afmt,parts=part_block) + + case('GRAPH') + if (iam == psb_root_) then + write(psb_out_unit,'("Partition type: graph vector")') + write(psb_out_unit,'(" ")') + call aux_a%cscnv(info,type='csr') + lnp = np + call build_mtpart(aux_a,lnp) + + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt=afmt,vg=ivg) + + case default + if (iam == psb_root_) write(psb_out_unit,'("Partition type: block")') + call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt=afmt,parts=part_block) + end select + + call psb_scatter(b_mv_glob,b_mv,desc_a,info,root=psb_root_) + call psb_geall(x_mv,desc_a,info,nrhs) + call x_mv%zero() + call psb_geasb(x_mv,desc_a,info) + call psb_geall(r_mv,desc_a,info,nrhs) + call r_mv%zero() + call psb_geasb(r_mv,desc_a,info) + + t2 = psb_wtime() - t1 + call psb_amx(ctxt, t2) + + if (iam == psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + + ! building the preconditioner + call prec%init(ctxt,ptype,info) + t1 = psb_wtime() + call prec%build(a,desc_a,info) + tprec = psb_wtime()-t1 + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_precbld') + goto 9999 + end if + + call psb_amx(ctxt,tprec) + + if(iam == psb_root_) then + write(psb_out_unit,'("Preconditioner time: ",es12.5)')tprec + write(psb_out_unit,'(" ")') + end if + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_krylov(kmethd,a,prec,b_mv,x_mv,eps,desc_a,info,& + & itmax=itmax,iter=iter,err=err,itrace=itrace,& + & itrs=itrs,istop=istopc) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + call psb_geaxpby(done,b_mv,dzero,r_mv,desc_a,info) + call psb_spmm(-done,a,x_mv,done,r_mv,desc_a,info) + + resmx = psb_genrm2(r_mv,desc_a,info) + resmxp = psb_geamax(r_mv,desc_a,info) + + amatsize = a%sizeof() + descsize = desc_a%sizeof() + precsize = prec%sizeof() + + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + call psb_sum(ctxt,precsize) + + if (iam == psb_root_) then + call prec%descr(info) + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Computed solution on: ",i8," processors")')np + write(psb_out_unit,'("Matrix: ",a)')mtrx_file + write(psb_out_unit,'("Storage format for A: ",a)')a%get_fmt() + write(psb_out_unit,'("Storage format for DESC_A: ",a)')desc_a%get_fmt() + write(psb_out_unit,'("Total memory occupation for A: ",i12)')amatsize + write(psb_out_unit,'("Total memory occupation for PREC: ",i12)')precsize + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + write(psb_out_unit,'("Iterations to convergence: ",i12)')iter + write(psb_out_unit,'("Error estimate on exit: ",es12.5)')err + write(psb_out_unit,'("Time to buil prec.: ",es12.5)')tprec + write(psb_out_unit,'("Time to solve system: ",es12.5)')t2 + write(psb_out_unit,'("Time per iteration: ",es12.5)')t2/(iter) + write(psb_out_unit,'("Total time: ",es12.5)')t2+tprec + write(psb_out_unit,'("Residual norm 2: ",es12.5)')resmx + write(psb_out_unit,'("Residual norm inf: ",es12.5)')resmxp + write(psb_out_unit,'(" ")') + end if + + call psb_gather(x_mv_glob,x_mv,desc_a,info,root=psb_root_) + if (info == psb_success_) call psb_gather(r_mv_glob,r_mv,desc_a,info,root=psb_root_) + if (info /= psb_success_) goto 9999 + +998 format(i8,4(2x,g20.14)) +993 format(i6,4(1x,e12.6)) + + call psb_gefree(b_mv, desc_a,info) + call psb_gefree(x_mv, desc_a,info) + call psb_spfree(a, desc_a,info) + call prec%free(info) + call psb_cdfree(desc_a,info) + call psb_exit(ctxt) + + return + +9999 call psb_error(ctxt) + + return + +end program psb_dbf_sample