From e2bd21d9bec0cd5b4cee4c411831feb39c5407c3 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sun, 15 Dec 2019 18:37:28 +0000 Subject: [PATCH] Update internal documentation. --- base/comm/internals/psi_covrl_restr.f90 | 7 +- base/comm/internals/psi_covrl_restr_a.f90 | 5 + base/comm/internals/psi_covrl_save.f90 | 5 + base/comm/internals/psi_covrl_save_a.f90 | 8 +- base/comm/internals/psi_covrl_upd.f90 | 8 +- base/comm/internals/psi_covrl_upd_a.f90 | 6 + base/comm/internals/psi_cswapdata.F90 | 31 +- base/comm/internals/psi_cswapdata_a.F90 | 20 +- base/comm/internals/psi_cswaptran.F90 | 35 +- base/comm/internals/psi_cswaptran_a.F90 | 18 +- base/comm/internals/psi_dovrl_restr.f90 | 7 +- base/comm/internals/psi_dovrl_restr_a.f90 | 5 + base/comm/internals/psi_dovrl_save.f90 | 5 + base/comm/internals/psi_dovrl_save_a.f90 | 8 +- base/comm/internals/psi_dovrl_upd.f90 | 8 +- base/comm/internals/psi_dovrl_upd_a.f90 | 6 + base/comm/internals/psi_dswapdata.F90 | 31 +- base/comm/internals/psi_dswapdata_a.F90 | 20 +- base/comm/internals/psi_dswaptran.F90 | 35 +- base/comm/internals/psi_dswaptran_a.F90 | 18 +- base/comm/internals/psi_eovrl_restr_a.f90 | 5 + base/comm/internals/psi_eovrl_save_a.f90 | 8 +- base/comm/internals/psi_eovrl_upd_a.f90 | 6 + base/comm/internals/psi_eswapdata_a.F90 | 20 +- base/comm/internals/psi_eswaptran_a.F90 | 18 +- base/comm/internals/psi_iovrl_restr.f90 | 7 +- base/comm/internals/psi_iovrl_save.f90 | 5 + base/comm/internals/psi_iovrl_upd.f90 | 8 +- base/comm/internals/psi_iswapdata.F90 | 31 +- base/comm/internals/psi_iswaptran.F90 | 35 +- base/comm/internals/psi_lovrl_restr.f90 | 7 +- base/comm/internals/psi_lovrl_save.f90 | 5 + base/comm/internals/psi_lovrl_upd.f90 | 8 +- base/comm/internals/psi_lswapdata.F90 | 31 +- base/comm/internals/psi_lswaptran.F90 | 35 +- base/comm/internals/psi_movrl_restr_a.f90 | 5 + base/comm/internals/psi_movrl_save_a.f90 | 8 +- base/comm/internals/psi_movrl_upd_a.f90 | 6 + base/comm/internals/psi_mswapdata_a.F90 | 20 +- base/comm/internals/psi_mswaptran_a.F90 | 18 +- base/comm/internals/psi_sovrl_restr.f90 | 7 +- base/comm/internals/psi_sovrl_restr_a.f90 | 5 + base/comm/internals/psi_sovrl_save.f90 | 5 + base/comm/internals/psi_sovrl_save_a.f90 | 8 +- base/comm/internals/psi_sovrl_upd.f90 | 8 +- base/comm/internals/psi_sovrl_upd_a.f90 | 6 + base/comm/internals/psi_sswapdata.F90 | 31 +- base/comm/internals/psi_sswapdata_a.F90 | 20 +- base/comm/internals/psi_sswaptran.F90 | 35 +- base/comm/internals/psi_sswaptran_a.F90 | 18 +- base/comm/internals/psi_zovrl_restr.f90 | 7 +- base/comm/internals/psi_zovrl_restr_a.f90 | 5 + base/comm/internals/psi_zovrl_save.f90 | 5 + base/comm/internals/psi_zovrl_save_a.f90 | 8 +- base/comm/internals/psi_zovrl_upd.f90 | 8 +- base/comm/internals/psi_zovrl_upd_a.f90 | 6 + base/comm/internals/psi_zswapdata.F90 | 31 +- base/comm/internals/psi_zswapdata_a.F90 | 20 +- base/comm/internals/psi_zswaptran.F90 | 35 +- base/comm/internals/psi_zswaptran_a.F90 | 18 +- base/comm/psb_cgather.f90 | 12 +- base/comm/psb_cgather_a.f90 | 2 +- base/comm/psb_chalo.f90 | 32 +- base/comm/psb_covrl.f90 | 39 +- base/comm/psb_cscatter.F90 | 10 +- base/comm/psb_cscatter_a.F90 | 4 +- base/comm/psb_cspgather.F90 | 11 + base/comm/psb_dgather.f90 | 12 +- base/comm/psb_dgather_a.f90 | 2 +- base/comm/psb_dhalo.f90 | 32 +- base/comm/psb_dovrl.f90 | 39 +- base/comm/psb_dscatter.F90 | 10 +- base/comm/psb_dscatter_a.F90 | 4 +- base/comm/psb_dspgather.F90 | 11 + base/comm/psb_egather_a.f90 | 2 +- base/comm/psb_escatter_a.F90 | 4 +- base/comm/psb_igather.f90 | 12 +- base/comm/psb_ihalo.f90 | 32 +- base/comm/psb_iovrl.f90 | 39 +- base/comm/psb_iscatter.F90 | 10 +- base/comm/psb_ispgather.F90 | 11 + base/comm/psb_lgather.f90 | 12 +- base/comm/psb_lhalo.f90 | 32 +- base/comm/psb_lovrl.f90 | 39 +- base/comm/psb_lscatter.F90 | 10 +- base/comm/psb_lspgather.F90 | 11 + base/comm/psb_mgather_a.f90 | 2 +- base/comm/psb_mscatter_a.F90 | 4 +- base/comm/psb_sgather.f90 | 12 +- base/comm/psb_sgather_a.f90 | 2 +- base/comm/psb_shalo.f90 | 32 +- base/comm/psb_sovrl.f90 | 39 +- base/comm/psb_sscatter.F90 | 10 +- base/comm/psb_sscatter_a.F90 | 4 +- base/comm/psb_sspgather.F90 | 11 + base/comm/psb_zgather.f90 | 12 +- base/comm/psb_zgather_a.f90 | 2 +- base/comm/psb_zhalo.f90 | 32 +- base/comm/psb_zovrl.f90 | 39 +- base/comm/psb_zscatter.F90 | 10 +- base/comm/psb_zscatter_a.F90 | 4 +- base/comm/psb_zspgather.F90 | 11 + base/psblas/psb_camax.f90 | 22 +- base/psblas/psb_casum.f90 | 13 + base/psblas/psb_caxpby.f90 | 19 +- base/psblas/psb_cdot.f90 | 49 ++- base/psblas/psb_cnrm2.f90 | 29 +- base/psblas/psb_cnrmi.f90 | 5 +- base/psblas/psb_cspmm.f90 | 441 +++++++++++----------- base/psblas/psb_cspnrm1.f90 | 1 + base/psblas/psb_cspsm.f90 | 392 +++++++++---------- base/psblas/psb_damax.f90 | 22 +- base/psblas/psb_dasum.f90 | 13 + base/psblas/psb_daxpby.f90 | 19 +- base/psblas/psb_ddot.f90 | 49 ++- base/psblas/psb_dnrm2.f90 | 29 +- base/psblas/psb_dnrmi.f90 | 5 +- base/psblas/psb_dspmm.f90 | 441 +++++++++++----------- base/psblas/psb_dspnrm1.f90 | 1 + base/psblas/psb_dspsm.f90 | 392 +++++++++---------- base/psblas/psb_samax.f90 | 22 +- base/psblas/psb_sasum.f90 | 13 + base/psblas/psb_saxpby.f90 | 19 +- base/psblas/psb_sdot.f90 | 49 ++- base/psblas/psb_snrm2.f90 | 29 +- base/psblas/psb_snrmi.f90 | 5 +- base/psblas/psb_sspmm.f90 | 441 +++++++++++----------- base/psblas/psb_sspnrm1.f90 | 1 + base/psblas/psb_sspsm.f90 | 392 +++++++++---------- base/psblas/psb_zamax.f90 | 22 +- base/psblas/psb_zasum.f90 | 13 + base/psblas/psb_zaxpby.f90 | 19 +- base/psblas/psb_zdot.f90 | 49 ++- base/psblas/psb_znrm2.f90 | 29 +- base/psblas/psb_znrmi.f90 | 5 +- base/psblas/psb_zspmm.f90 | 441 +++++++++++----------- base/psblas/psb_zspnrm1.f90 | 1 + base/psblas/psb_zspsm.f90 | 392 +++++++++---------- base/serial/psb_camax_s.f90 | 2 +- base/serial/psb_casum_s.f90 | 4 +- base/serial/psb_damax_s.f90 | 2 +- base/serial/psb_dasum_s.f90 | 4 +- base/serial/psb_samax_s.f90 | 2 +- base/serial/psb_sasum_s.f90 | 4 +- base/serial/psb_zamax_s.f90 | 2 +- base/serial/psb_zasum_s.f90 | 4 +- test/pargen/psb_d_pde3d.f90 | 50 +-- test/pargen/runs/ppde.inp | 2 +- 148 files changed, 3186 insertions(+), 2292 deletions(-) diff --git a/base/comm/internals/psi_covrl_restr.f90 b/base/comm/internals/psi_covrl_restr.f90 index 98ecdd8b..996e85ad 100644 --- a/base/comm/internals/psi_covrl_restr.f90 +++ b/base/comm/internals/psi_covrl_restr.f90 @@ -28,8 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_covrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! +! ! - subroutine psi_covrl_restr_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_covrl_restr_vect use psb_c_base_vect_mod diff --git a/base/comm/internals/psi_covrl_restr_a.f90 b/base/comm/internals/psi_covrl_restr_a.f90 index 46eb0563..3c814a81 100644 --- a/base/comm/internals/psi_covrl_restr_a.f90 +++ b/base/comm/internals/psi_covrl_restr_a.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_covrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_covrl_restrr1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_covrl_restrr1 diff --git a/base/comm/internals/psi_covrl_save.f90 b/base/comm/internals/psi_covrl_save.f90 index 41e65943..c48d2ade 100644 --- a/base/comm/internals/psi_covrl_save.f90 +++ b/base/comm/internals/psi_covrl_save.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_covrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_covrl_save_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_covrl_save_vect diff --git a/base/comm/internals/psi_covrl_save_a.f90 b/base/comm/internals/psi_covrl_save_a.f90 index d853db8d..d017d921 100644 --- a/base/comm/internals/psi_covrl_save_a.f90 +++ b/base/comm/internals/psi_covrl_save_a.f90 @@ -28,8 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -! - +! +! +! Subroutine: psi_covrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! subroutine psi_covrl_saver1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_covrl_saver1 diff --git a/base/comm/internals/psi_covrl_upd.f90 b/base/comm/internals/psi_covrl_upd.f90 index f99cbb66..ce33845e 100644 --- a/base/comm/internals/psi_covrl_upd.f90 +++ b/base/comm/internals/psi_covrl_upd.f90 @@ -28,8 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_covrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! +! ! - subroutine psi_covrl_upd_vect(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_covrl_upd_vect use psb_realloc_mod diff --git a/base/comm/internals/psi_covrl_upd_a.f90 b/base/comm/internals/psi_covrl_upd_a.f90 index 633747e5..33297731 100644 --- a/base/comm/internals/psi_covrl_upd_a.f90 +++ b/base/comm/internals/psi_covrl_upd_a.f90 @@ -28,6 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_covrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! ! subroutine psi_covrl_updr1(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_covrl_updr1 diff --git a/base/comm/internals/psi_cswapdata.F90 b/base/comm/internals/psi_cswapdata.F90 index 5f349de9..fed8afbf 100644 --- a/base/comm/internals/psi_cswapdata.F90 +++ b/base/comm/internals/psi_cswapdata.F90 @@ -32,8 +32,10 @@ ! ! File: psi_cswapdata.F90 ! -! Subroutine: psi_cswapdatam -! Does the data exchange among processes. Essentially this is doing +! +! +! Subroutine: psi_cswapdata_vect +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -43,12 +45,15 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - complex Choose overwrite or sum. +! y - type(psb_@x@_vect_type) The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -83,14 +88,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! -! -! -! Subroutine: psi_cswapdata_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! -! ! subroutine psi_cswapdata_vect(flag,beta,y,desc_a,work,info,data) @@ -426,7 +423,7 @@ end subroutine psi_cswap_vidx_vect ! Subroutine: psi_cswapdata_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_cswapdata_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_cswapdata_a.F90 b/base/comm/internals/psi_cswapdata_a.F90 index e700d7fd..2a113b17 100644 --- a/base/comm/internals/psi_cswapdata_a.F90 +++ b/base/comm/internals/psi_cswapdata_a.F90 @@ -33,9 +33,9 @@ ! File: psi_cswapdata.F90 ! ! Subroutine: psi_cswapdatam -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but -! it is capable of pruning empty exchanges, which are very likely in out +! it is capable of pruning empty exchanges, which are very likely in our ! application environment. All the variants have the same structure ! In all these subroutines X may be: I Integer ! S real(psb_spk_) @@ -49,6 +49,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +70,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - complex Choose overwrite or sum. +! y(:,:) - complex The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -507,7 +508,7 @@ end subroutine psi_cswapidxm ! ! ! Subroutine: psi_cswapdatav -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -523,6 +524,7 @@ end subroutine psi_cswapidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -543,10 +545,10 @@ end subroutine psi_cswapidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - complex Choose overwrite or sum. +! y(:) - complex The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_cswaptran.F90 b/base/comm/internals/psi_cswaptran.F90 index ccfce96d..152cb045 100644 --- a/base/comm/internals/psi_cswaptran.F90 +++ b/base/comm/internals/psi_cswaptran.F90 @@ -32,8 +32,8 @@ ! ! File: psi_cswaptran.F90 ! -! Subroutine: psi_cswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Subroutine: psi_cswaptran_vect +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -47,12 +47,16 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them +! ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +77,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area -! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! beta - complex Choose overwrite or sum. +! y - type(psb_c_vect_type) The data area +! desc_a - type(psb_desc_type). The communication descriptor. +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -86,13 +90,6 @@ ! psb_comm_ovrl_ use ovrl_index ! psb_comm_mov_ use ovr_mst_idx ! -! -! -! Subroutine: psi_cswaptran_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! ! subroutine psi_cswaptran_vect(flag,beta,y,desc_a,work,info,data) @@ -171,7 +168,7 @@ end subroutine psi_cswaptran_vect ! Subroutine: psi_ctran_vidx_vect ! Data exchange among processes. ! -! Takes care of Y an exanspulated vector. Relies on the gather/scatter methods +! Takes care of Y an encapsulated vector. Relies on the gather/scatter methods ! of vectors. ! ! The real workhorse: the outer routine will only choose the index list @@ -436,10 +433,10 @@ end subroutine psi_ctran_vidx_vect ! ! ! -! Subroutine: psi_cswaptran_vect +! Subroutine: psi_cswaptran_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_cswaptran_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_cswaptran_a.F90 b/base/comm/internals/psi_cswaptran_a.F90 index e48e0b81..ed061be6 100644 --- a/base/comm/internals/psi_cswaptran_a.F90 +++ b/base/comm/internals/psi_cswaptran_a.F90 @@ -33,7 +33,7 @@ ! File: psi_cswaptran.F90 ! ! Subroutine: psi_cswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -53,6 +53,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - complex Choose overwrite or sum. +! y(:,:) - complex The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -516,7 +517,7 @@ end subroutine psi_ctranidxm ! ! ! Subroutine: psi_cswaptranv -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -536,6 +537,7 @@ end subroutine psi_ctranidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -556,10 +558,10 @@ end subroutine psi_ctranidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - complex Choose overwrite or sum. +! y(:) - complex The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_dovrl_restr.f90 b/base/comm/internals/psi_dovrl_restr.f90 index 326f32df..1d6695cc 100644 --- a/base/comm/internals/psi_dovrl_restr.f90 +++ b/base/comm/internals/psi_dovrl_restr.f90 @@ -28,8 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_dovrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! +! ! - subroutine psi_dovrl_restr_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_dovrl_restr_vect use psb_d_base_vect_mod diff --git a/base/comm/internals/psi_dovrl_restr_a.f90 b/base/comm/internals/psi_dovrl_restr_a.f90 index 2d83c416..df47403b 100644 --- a/base/comm/internals/psi_dovrl_restr_a.f90 +++ b/base/comm/internals/psi_dovrl_restr_a.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_dovrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_dovrl_restrr1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_dovrl_restrr1 diff --git a/base/comm/internals/psi_dovrl_save.f90 b/base/comm/internals/psi_dovrl_save.f90 index 3f8a923b..0689ee1b 100644 --- a/base/comm/internals/psi_dovrl_save.f90 +++ b/base/comm/internals/psi_dovrl_save.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_dovrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_dovrl_save_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_dovrl_save_vect diff --git a/base/comm/internals/psi_dovrl_save_a.f90 b/base/comm/internals/psi_dovrl_save_a.f90 index 6bf57d87..fcc19e08 100644 --- a/base/comm/internals/psi_dovrl_save_a.f90 +++ b/base/comm/internals/psi_dovrl_save_a.f90 @@ -28,8 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -! - +! +! +! Subroutine: psi_dovrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! subroutine psi_dovrl_saver1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_dovrl_saver1 diff --git a/base/comm/internals/psi_dovrl_upd.f90 b/base/comm/internals/psi_dovrl_upd.f90 index 867281ff..efbb2495 100644 --- a/base/comm/internals/psi_dovrl_upd.f90 +++ b/base/comm/internals/psi_dovrl_upd.f90 @@ -28,8 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_dovrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! +! ! - subroutine psi_dovrl_upd_vect(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_dovrl_upd_vect use psb_realloc_mod diff --git a/base/comm/internals/psi_dovrl_upd_a.f90 b/base/comm/internals/psi_dovrl_upd_a.f90 index ccdfba89..8ad3db6b 100644 --- a/base/comm/internals/psi_dovrl_upd_a.f90 +++ b/base/comm/internals/psi_dovrl_upd_a.f90 @@ -28,6 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_dovrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! ! subroutine psi_dovrl_updr1(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_dovrl_updr1 diff --git a/base/comm/internals/psi_dswapdata.F90 b/base/comm/internals/psi_dswapdata.F90 index 58dd5d59..a59b9b79 100644 --- a/base/comm/internals/psi_dswapdata.F90 +++ b/base/comm/internals/psi_dswapdata.F90 @@ -32,8 +32,10 @@ ! ! File: psi_dswapdata.F90 ! -! Subroutine: psi_dswapdatam -! Does the data exchange among processes. Essentially this is doing +! +! +! Subroutine: psi_dswapdata_vect +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -43,12 +45,15 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - real Choose overwrite or sum. +! y - type(psb_@x@_vect_type) The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -83,14 +88,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! -! -! -! Subroutine: psi_dswapdata_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! -! ! subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data) @@ -426,7 +423,7 @@ end subroutine psi_dswap_vidx_vect ! Subroutine: psi_dswapdata_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_dswapdata_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_dswapdata_a.F90 b/base/comm/internals/psi_dswapdata_a.F90 index 00108cd6..b9d0aaae 100644 --- a/base/comm/internals/psi_dswapdata_a.F90 +++ b/base/comm/internals/psi_dswapdata_a.F90 @@ -33,9 +33,9 @@ ! File: psi_dswapdata.F90 ! ! Subroutine: psi_dswapdatam -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but -! it is capable of pruning empty exchanges, which are very likely in out +! it is capable of pruning empty exchanges, which are very likely in our ! application environment. All the variants have the same structure ! In all these subroutines X may be: I Integer ! S real(psb_spk_) @@ -49,6 +49,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +70,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - real Choose overwrite or sum. +! y(:,:) - real The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -507,7 +508,7 @@ end subroutine psi_dswapidxm ! ! ! Subroutine: psi_dswapdatav -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -523,6 +524,7 @@ end subroutine psi_dswapidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -543,10 +545,10 @@ end subroutine psi_dswapidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - real Choose overwrite or sum. +! y(:) - real The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_dswaptran.F90 b/base/comm/internals/psi_dswaptran.F90 index f4a0c78d..1feee33c 100644 --- a/base/comm/internals/psi_dswaptran.F90 +++ b/base/comm/internals/psi_dswaptran.F90 @@ -32,8 +32,8 @@ ! ! File: psi_dswaptran.F90 ! -! Subroutine: psi_dswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Subroutine: psi_dswaptran_vect +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -47,12 +47,16 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them +! ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +77,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area -! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! beta - real Choose overwrite or sum. +! y - type(psb_d_vect_type) The data area +! desc_a - type(psb_desc_type). The communication descriptor. +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -86,13 +90,6 @@ ! psb_comm_ovrl_ use ovrl_index ! psb_comm_mov_ use ovr_mst_idx ! -! -! -! Subroutine: psi_dswaptran_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! ! subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data) @@ -171,7 +168,7 @@ end subroutine psi_dswaptran_vect ! Subroutine: psi_dtran_vidx_vect ! Data exchange among processes. ! -! Takes care of Y an exanspulated vector. Relies on the gather/scatter methods +! Takes care of Y an encapsulated vector. Relies on the gather/scatter methods ! of vectors. ! ! The real workhorse: the outer routine will only choose the index list @@ -436,10 +433,10 @@ end subroutine psi_dtran_vidx_vect ! ! ! -! Subroutine: psi_dswaptran_vect +! Subroutine: psi_dswaptran_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_dswaptran_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_dswaptran_a.F90 b/base/comm/internals/psi_dswaptran_a.F90 index 411a34cc..aad6348e 100644 --- a/base/comm/internals/psi_dswaptran_a.F90 +++ b/base/comm/internals/psi_dswaptran_a.F90 @@ -33,7 +33,7 @@ ! File: psi_dswaptran.F90 ! ! Subroutine: psi_dswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -53,6 +53,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - real Choose overwrite or sum. +! y(:,:) - real The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -516,7 +517,7 @@ end subroutine psi_dtranidxm ! ! ! Subroutine: psi_dswaptranv -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -536,6 +537,7 @@ end subroutine psi_dtranidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -556,10 +558,10 @@ end subroutine psi_dtranidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - real Choose overwrite or sum. +! y(:) - real The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_eovrl_restr_a.f90 b/base/comm/internals/psi_eovrl_restr_a.f90 index fe981855..3dbb2ac4 100644 --- a/base/comm/internals/psi_eovrl_restr_a.f90 +++ b/base/comm/internals/psi_eovrl_restr_a.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_eovrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_eovrl_restrr1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_eovrl_restrr1 diff --git a/base/comm/internals/psi_eovrl_save_a.f90 b/base/comm/internals/psi_eovrl_save_a.f90 index de6878f0..4f0b7d30 100644 --- a/base/comm/internals/psi_eovrl_save_a.f90 +++ b/base/comm/internals/psi_eovrl_save_a.f90 @@ -28,8 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -! - +! +! +! Subroutine: psi_eovrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! subroutine psi_eovrl_saver1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_eovrl_saver1 diff --git a/base/comm/internals/psi_eovrl_upd_a.f90 b/base/comm/internals/psi_eovrl_upd_a.f90 index f8589e41..e8e40738 100644 --- a/base/comm/internals/psi_eovrl_upd_a.f90 +++ b/base/comm/internals/psi_eovrl_upd_a.f90 @@ -28,6 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_eovrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! ! subroutine psi_eovrl_updr1(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_eovrl_updr1 diff --git a/base/comm/internals/psi_eswapdata_a.F90 b/base/comm/internals/psi_eswapdata_a.F90 index 55571aa0..f7c67ac3 100644 --- a/base/comm/internals/psi_eswapdata_a.F90 +++ b/base/comm/internals/psi_eswapdata_a.F90 @@ -33,9 +33,9 @@ ! File: psi_eswapdata.F90 ! ! Subroutine: psi_eswapdatam -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but -! it is capable of pruning empty exchanges, which are very likely in out +! it is capable of pruning empty exchanges, which are very likely in our ! application environment. All the variants have the same structure ! In all these subroutines X may be: I Integer ! S real(psb_spk_) @@ -49,6 +49,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +70,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - integer Choose overwrite or sum. +! y(:,:) - integer The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -507,7 +508,7 @@ end subroutine psi_eswapidxm ! ! ! Subroutine: psi_eswapdatav -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -523,6 +524,7 @@ end subroutine psi_eswapidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -543,10 +545,10 @@ end subroutine psi_eswapidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - integer Choose overwrite or sum. +! y(:) - integer The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_eswaptran_a.F90 b/base/comm/internals/psi_eswaptran_a.F90 index 41f7abb9..42954369 100644 --- a/base/comm/internals/psi_eswaptran_a.F90 +++ b/base/comm/internals/psi_eswaptran_a.F90 @@ -33,7 +33,7 @@ ! File: psi_eswaptran.F90 ! ! Subroutine: psi_eswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -53,6 +53,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - integer Choose overwrite or sum. +! y(:,:) - integer The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -516,7 +517,7 @@ end subroutine psi_etranidxm ! ! ! Subroutine: psi_eswaptranv -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -536,6 +537,7 @@ end subroutine psi_etranidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -556,10 +558,10 @@ end subroutine psi_etranidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - integer Choose overwrite or sum. +! y(:) - integer The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_iovrl_restr.f90 b/base/comm/internals/psi_iovrl_restr.f90 index 89ff5ee0..b0efa2ee 100644 --- a/base/comm/internals/psi_iovrl_restr.f90 +++ b/base/comm/internals/psi_iovrl_restr.f90 @@ -28,8 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_iovrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! +! ! - subroutine psi_iovrl_restr_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_iovrl_restr_vect use psb_i_base_vect_mod diff --git a/base/comm/internals/psi_iovrl_save.f90 b/base/comm/internals/psi_iovrl_save.f90 index b48a7dbc..cc7619f9 100644 --- a/base/comm/internals/psi_iovrl_save.f90 +++ b/base/comm/internals/psi_iovrl_save.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_iovrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_iovrl_save_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_iovrl_save_vect diff --git a/base/comm/internals/psi_iovrl_upd.f90 b/base/comm/internals/psi_iovrl_upd.f90 index a84a1cc0..988bf006 100644 --- a/base/comm/internals/psi_iovrl_upd.f90 +++ b/base/comm/internals/psi_iovrl_upd.f90 @@ -28,8 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_iovrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! +! ! - subroutine psi_iovrl_upd_vect(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_iovrl_upd_vect use psb_realloc_mod diff --git a/base/comm/internals/psi_iswapdata.F90 b/base/comm/internals/psi_iswapdata.F90 index 335de5d7..d05ab8ac 100644 --- a/base/comm/internals/psi_iswapdata.F90 +++ b/base/comm/internals/psi_iswapdata.F90 @@ -32,8 +32,10 @@ ! ! File: psi_iswapdata.F90 ! -! Subroutine: psi_iswapdatam -! Does the data exchange among processes. Essentially this is doing +! +! +! Subroutine: psi_iswapdata_vect +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -43,12 +45,15 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - integer Choose overwrite or sum. +! y - type(psb_@x@_vect_type) The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -83,14 +88,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! -! -! -! Subroutine: psi_iswapdata_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! -! ! subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data) @@ -426,7 +423,7 @@ end subroutine psi_iswap_vidx_vect ! Subroutine: psi_iswapdata_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_iswapdata_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_iswaptran.F90 b/base/comm/internals/psi_iswaptran.F90 index d77cfd66..e5719735 100644 --- a/base/comm/internals/psi_iswaptran.F90 +++ b/base/comm/internals/psi_iswaptran.F90 @@ -32,8 +32,8 @@ ! ! File: psi_iswaptran.F90 ! -! Subroutine: psi_iswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Subroutine: psi_iswaptran_vect +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -47,12 +47,16 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them +! ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +77,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area -! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! beta - integer Choose overwrite or sum. +! y - type(psb_i_vect_type) The data area +! desc_a - type(psb_desc_type). The communication descriptor. +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -86,13 +90,6 @@ ! psb_comm_ovrl_ use ovrl_index ! psb_comm_mov_ use ovr_mst_idx ! -! -! -! Subroutine: psi_iswaptran_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! ! subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data) @@ -171,7 +168,7 @@ end subroutine psi_iswaptran_vect ! Subroutine: psi_itran_vidx_vect ! Data exchange among processes. ! -! Takes care of Y an exanspulated vector. Relies on the gather/scatter methods +! Takes care of Y an encapsulated vector. Relies on the gather/scatter methods ! of vectors. ! ! The real workhorse: the outer routine will only choose the index list @@ -436,10 +433,10 @@ end subroutine psi_itran_vidx_vect ! ! ! -! Subroutine: psi_iswaptran_vect +! Subroutine: psi_iswaptran_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_iswaptran_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_lovrl_restr.f90 b/base/comm/internals/psi_lovrl_restr.f90 index ba96e9c0..dc33bbb1 100644 --- a/base/comm/internals/psi_lovrl_restr.f90 +++ b/base/comm/internals/psi_lovrl_restr.f90 @@ -28,8 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_lovrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! +! ! - subroutine psi_lovrl_restr_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_lovrl_restr_vect use psb_l_base_vect_mod diff --git a/base/comm/internals/psi_lovrl_save.f90 b/base/comm/internals/psi_lovrl_save.f90 index 5a06c697..496eec90 100644 --- a/base/comm/internals/psi_lovrl_save.f90 +++ b/base/comm/internals/psi_lovrl_save.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_lovrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_lovrl_save_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_lovrl_save_vect diff --git a/base/comm/internals/psi_lovrl_upd.f90 b/base/comm/internals/psi_lovrl_upd.f90 index 4364ac6f..d20d80cc 100644 --- a/base/comm/internals/psi_lovrl_upd.f90 +++ b/base/comm/internals/psi_lovrl_upd.f90 @@ -28,8 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_lovrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! +! ! - subroutine psi_lovrl_upd_vect(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_lovrl_upd_vect use psb_realloc_mod diff --git a/base/comm/internals/psi_lswapdata.F90 b/base/comm/internals/psi_lswapdata.F90 index 67a4f0b8..939f9596 100644 --- a/base/comm/internals/psi_lswapdata.F90 +++ b/base/comm/internals/psi_lswapdata.F90 @@ -32,8 +32,10 @@ ! ! File: psi_lswapdata.F90 ! -! Subroutine: psi_lswapdatam -! Does the data exchange among processes. Essentially this is doing +! +! +! Subroutine: psi_lswapdata_vect +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -43,12 +45,15 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - integer Choose overwrite or sum. +! y - type(psb_@x@_vect_type) The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -83,14 +88,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! -! -! -! Subroutine: psi_lswapdata_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! -! ! subroutine psi_lswapdata_vect(flag,beta,y,desc_a,work,info,data) @@ -426,7 +423,7 @@ end subroutine psi_lswap_vidx_vect ! Subroutine: psi_lswapdata_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_lswapdata_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_lswaptran.F90 b/base/comm/internals/psi_lswaptran.F90 index 28b0311a..ccc1b6e3 100644 --- a/base/comm/internals/psi_lswaptran.F90 +++ b/base/comm/internals/psi_lswaptran.F90 @@ -32,8 +32,8 @@ ! ! File: psi_lswaptran.F90 ! -! Subroutine: psi_lswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Subroutine: psi_lswaptran_vect +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -47,12 +47,16 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them +! ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +77,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area -! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! beta - integer Choose overwrite or sum. +! y - type(psb_l_vect_type) The data area +! desc_a - type(psb_desc_type). The communication descriptor. +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -86,13 +90,6 @@ ! psb_comm_ovrl_ use ovrl_index ! psb_comm_mov_ use ovr_mst_idx ! -! -! -! Subroutine: psi_lswaptran_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! ! subroutine psi_lswaptran_vect(flag,beta,y,desc_a,work,info,data) @@ -171,7 +168,7 @@ end subroutine psi_lswaptran_vect ! Subroutine: psi_ltran_vidx_vect ! Data exchange among processes. ! -! Takes care of Y an exanspulated vector. Relies on the gather/scatter methods +! Takes care of Y an encapsulated vector. Relies on the gather/scatter methods ! of vectors. ! ! The real workhorse: the outer routine will only choose the index list @@ -436,10 +433,10 @@ end subroutine psi_ltran_vidx_vect ! ! ! -! Subroutine: psi_lswaptran_vect +! Subroutine: psi_lswaptran_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_lswaptran_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_movrl_restr_a.f90 b/base/comm/internals/psi_movrl_restr_a.f90 index a5759f98..92e06793 100644 --- a/base/comm/internals/psi_movrl_restr_a.f90 +++ b/base/comm/internals/psi_movrl_restr_a.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_movrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_movrl_restrr1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_movrl_restrr1 diff --git a/base/comm/internals/psi_movrl_save_a.f90 b/base/comm/internals/psi_movrl_save_a.f90 index 7935333f..fbc021cf 100644 --- a/base/comm/internals/psi_movrl_save_a.f90 +++ b/base/comm/internals/psi_movrl_save_a.f90 @@ -28,8 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -! - +! +! +! Subroutine: psi_movrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! subroutine psi_movrl_saver1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_movrl_saver1 diff --git a/base/comm/internals/psi_movrl_upd_a.f90 b/base/comm/internals/psi_movrl_upd_a.f90 index 92bfccce..03670659 100644 --- a/base/comm/internals/psi_movrl_upd_a.f90 +++ b/base/comm/internals/psi_movrl_upd_a.f90 @@ -28,6 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_movrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! ! subroutine psi_movrl_updr1(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_movrl_updr1 diff --git a/base/comm/internals/psi_mswapdata_a.F90 b/base/comm/internals/psi_mswapdata_a.F90 index 2dc1dedf..b71e61ef 100644 --- a/base/comm/internals/psi_mswapdata_a.F90 +++ b/base/comm/internals/psi_mswapdata_a.F90 @@ -33,9 +33,9 @@ ! File: psi_mswapdata.F90 ! ! Subroutine: psi_mswapdatam -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but -! it is capable of pruning empty exchanges, which are very likely in out +! it is capable of pruning empty exchanges, which are very likely in our ! application environment. All the variants have the same structure ! In all these subroutines X may be: I Integer ! S real(psb_spk_) @@ -49,6 +49,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +70,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - integer Choose overwrite or sum. +! y(:,:) - integer The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -507,7 +508,7 @@ end subroutine psi_mswapidxm ! ! ! Subroutine: psi_mswapdatav -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -523,6 +524,7 @@ end subroutine psi_mswapidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -543,10 +545,10 @@ end subroutine psi_mswapidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - integer Choose overwrite or sum. +! y(:) - integer The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_mswaptran_a.F90 b/base/comm/internals/psi_mswaptran_a.F90 index 7c7693d7..add5a608 100644 --- a/base/comm/internals/psi_mswaptran_a.F90 +++ b/base/comm/internals/psi_mswaptran_a.F90 @@ -33,7 +33,7 @@ ! File: psi_mswaptran.F90 ! ! Subroutine: psi_mswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -53,6 +53,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - integer Choose overwrite or sum. +! y(:,:) - integer The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -516,7 +517,7 @@ end subroutine psi_mtranidxm ! ! ! Subroutine: psi_mswaptranv -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -536,6 +537,7 @@ end subroutine psi_mtranidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -556,10 +558,10 @@ end subroutine psi_mtranidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - integer Choose overwrite or sum. +! y(:) - integer The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - integer Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_sovrl_restr.f90 b/base/comm/internals/psi_sovrl_restr.f90 index b74c4335..3854040c 100644 --- a/base/comm/internals/psi_sovrl_restr.f90 +++ b/base/comm/internals/psi_sovrl_restr.f90 @@ -28,8 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_sovrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! +! ! - subroutine psi_sovrl_restr_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_sovrl_restr_vect use psb_s_base_vect_mod diff --git a/base/comm/internals/psi_sovrl_restr_a.f90 b/base/comm/internals/psi_sovrl_restr_a.f90 index 5ff1a9f7..e66297d5 100644 --- a/base/comm/internals/psi_sovrl_restr_a.f90 +++ b/base/comm/internals/psi_sovrl_restr_a.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_sovrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_sovrl_restrr1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_sovrl_restrr1 diff --git a/base/comm/internals/psi_sovrl_save.f90 b/base/comm/internals/psi_sovrl_save.f90 index a10ae218..9b06fef7 100644 --- a/base/comm/internals/psi_sovrl_save.f90 +++ b/base/comm/internals/psi_sovrl_save.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_sovrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_sovrl_save_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_sovrl_save_vect diff --git a/base/comm/internals/psi_sovrl_save_a.f90 b/base/comm/internals/psi_sovrl_save_a.f90 index 5766dede..cf400127 100644 --- a/base/comm/internals/psi_sovrl_save_a.f90 +++ b/base/comm/internals/psi_sovrl_save_a.f90 @@ -28,8 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -! - +! +! +! Subroutine: psi_sovrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! subroutine psi_sovrl_saver1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_sovrl_saver1 diff --git a/base/comm/internals/psi_sovrl_upd.f90 b/base/comm/internals/psi_sovrl_upd.f90 index b95a49ba..636261b1 100644 --- a/base/comm/internals/psi_sovrl_upd.f90 +++ b/base/comm/internals/psi_sovrl_upd.f90 @@ -28,8 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_sovrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! +! ! - subroutine psi_sovrl_upd_vect(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_sovrl_upd_vect use psb_realloc_mod diff --git a/base/comm/internals/psi_sovrl_upd_a.f90 b/base/comm/internals/psi_sovrl_upd_a.f90 index d553b5f7..82211657 100644 --- a/base/comm/internals/psi_sovrl_upd_a.f90 +++ b/base/comm/internals/psi_sovrl_upd_a.f90 @@ -28,6 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_sovrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! ! subroutine psi_sovrl_updr1(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_sovrl_updr1 diff --git a/base/comm/internals/psi_sswapdata.F90 b/base/comm/internals/psi_sswapdata.F90 index 3dc55de7..b5185198 100644 --- a/base/comm/internals/psi_sswapdata.F90 +++ b/base/comm/internals/psi_sswapdata.F90 @@ -32,8 +32,10 @@ ! ! File: psi_sswapdata.F90 ! -! Subroutine: psi_sswapdatam -! Does the data exchange among processes. Essentially this is doing +! +! +! Subroutine: psi_sswapdata_vect +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -43,12 +45,15 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - real Choose overwrite or sum. +! y - type(psb_@x@_vect_type) The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -83,14 +88,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! -! -! -! Subroutine: psi_sswapdata_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! -! ! subroutine psi_sswapdata_vect(flag,beta,y,desc_a,work,info,data) @@ -426,7 +423,7 @@ end subroutine psi_sswap_vidx_vect ! Subroutine: psi_sswapdata_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_sswapdata_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_sswapdata_a.F90 b/base/comm/internals/psi_sswapdata_a.F90 index fa8b1afb..de8587c1 100644 --- a/base/comm/internals/psi_sswapdata_a.F90 +++ b/base/comm/internals/psi_sswapdata_a.F90 @@ -33,9 +33,9 @@ ! File: psi_sswapdata.F90 ! ! Subroutine: psi_sswapdatam -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but -! it is capable of pruning empty exchanges, which are very likely in out +! it is capable of pruning empty exchanges, which are very likely in our ! application environment. All the variants have the same structure ! In all these subroutines X may be: I Integer ! S real(psb_spk_) @@ -49,6 +49,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +70,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - real Choose overwrite or sum. +! y(:,:) - real The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -507,7 +508,7 @@ end subroutine psi_sswapidxm ! ! ! Subroutine: psi_sswapdatav -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -523,6 +524,7 @@ end subroutine psi_sswapidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -543,10 +545,10 @@ end subroutine psi_sswapidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - real Choose overwrite or sum. +! y(:) - real The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_sswaptran.F90 b/base/comm/internals/psi_sswaptran.F90 index cdde2cf3..cb3ccc75 100644 --- a/base/comm/internals/psi_sswaptran.F90 +++ b/base/comm/internals/psi_sswaptran.F90 @@ -32,8 +32,8 @@ ! ! File: psi_sswaptran.F90 ! -! Subroutine: psi_sswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Subroutine: psi_sswaptran_vect +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -47,12 +47,16 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them +! ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +77,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area -! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! beta - real Choose overwrite or sum. +! y - type(psb_s_vect_type) The data area +! desc_a - type(psb_desc_type). The communication descriptor. +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -86,13 +90,6 @@ ! psb_comm_ovrl_ use ovrl_index ! psb_comm_mov_ use ovr_mst_idx ! -! -! -! Subroutine: psi_sswaptran_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! ! subroutine psi_sswaptran_vect(flag,beta,y,desc_a,work,info,data) @@ -171,7 +168,7 @@ end subroutine psi_sswaptran_vect ! Subroutine: psi_stran_vidx_vect ! Data exchange among processes. ! -! Takes care of Y an exanspulated vector. Relies on the gather/scatter methods +! Takes care of Y an encapsulated vector. Relies on the gather/scatter methods ! of vectors. ! ! The real workhorse: the outer routine will only choose the index list @@ -436,10 +433,10 @@ end subroutine psi_stran_vidx_vect ! ! ! -! Subroutine: psi_sswaptran_vect +! Subroutine: psi_sswaptran_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_sswaptran_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_sswaptran_a.F90 b/base/comm/internals/psi_sswaptran_a.F90 index 49f0d89f..f0f82965 100644 --- a/base/comm/internals/psi_sswaptran_a.F90 +++ b/base/comm/internals/psi_sswaptran_a.F90 @@ -33,7 +33,7 @@ ! File: psi_sswaptran.F90 ! ! Subroutine: psi_sswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -53,6 +53,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - real Choose overwrite or sum. +! y(:,:) - real The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -516,7 +517,7 @@ end subroutine psi_stranidxm ! ! ! Subroutine: psi_sswaptranv -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -536,6 +537,7 @@ end subroutine psi_stranidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -556,10 +558,10 @@ end subroutine psi_stranidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - real Choose overwrite or sum. +! y(:) - real The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - real Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_zovrl_restr.f90 b/base/comm/internals/psi_zovrl_restr.f90 index bd83e618..e52232e7 100644 --- a/base/comm/internals/psi_zovrl_restr.f90 +++ b/base/comm/internals/psi_zovrl_restr.f90 @@ -28,8 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_zovrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! +! ! - subroutine psi_zovrl_restr_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_zovrl_restr_vect use psb_z_base_vect_mod diff --git a/base/comm/internals/psi_zovrl_restr_a.f90 b/base/comm/internals/psi_zovrl_restr_a.f90 index a0a22a31..379efdcb 100644 --- a/base/comm/internals/psi_zovrl_restr_a.f90 +++ b/base/comm/internals/psi_zovrl_restr_a.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_zovrl_restr +! These subroutines restore the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_zovrl_restrr1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_zovrl_restrr1 diff --git a/base/comm/internals/psi_zovrl_save.f90 b/base/comm/internals/psi_zovrl_save.f90 index 16238573..acf65181 100644 --- a/base/comm/internals/psi_zovrl_save.f90 +++ b/base/comm/internals/psi_zovrl_save.f90 @@ -28,6 +28,11 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_zovrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! ! subroutine psi_zovrl_save_vect(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_zovrl_save_vect diff --git a/base/comm/internals/psi_zovrl_save_a.f90 b/base/comm/internals/psi_zovrl_save_a.f90 index 32d5d212..1e8fdb89 100644 --- a/base/comm/internals/psi_zovrl_save_a.f90 +++ b/base/comm/internals/psi_zovrl_save_a.f90 @@ -28,8 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -! - +! +! +! Subroutine: psi_zovrl_save +! These subroutines save the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap. +! subroutine psi_zovrl_saver1(x,xs,desc_a,info) use psi_mod, psi_protect_name => psi_zovrl_saver1 diff --git a/base/comm/internals/psi_zovrl_upd.f90 b/base/comm/internals/psi_zovrl_upd.f90 index 83fcf702..cd7ea0de 100644 --- a/base/comm/internals/psi_zovrl_upd.f90 +++ b/base/comm/internals/psi_zovrl_upd.f90 @@ -28,8 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_zovrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! +! ! - subroutine psi_zovrl_upd_vect(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_zovrl_upd_vect use psb_realloc_mod diff --git a/base/comm/internals/psi_zovrl_upd_a.f90 b/base/comm/internals/psi_zovrl_upd_a.f90 index b149addb..9ea2fbae 100644 --- a/base/comm/internals/psi_zovrl_upd_a.f90 +++ b/base/comm/internals/psi_zovrl_upd_a.f90 @@ -28,6 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! +! Subroutine: psi_zovrl_update +! These subroutines update the overlap region of a vector; they are used +! for the transpose matrix-vector product when there is a nonempty overlap, +! or for the application of Additive Schwarz preconditioners. +! ! subroutine psi_zovrl_updr1(x,desc_a,update,info) use psi_mod, psi_protect_name => psi_zovrl_updr1 diff --git a/base/comm/internals/psi_zswapdata.F90 b/base/comm/internals/psi_zswapdata.F90 index 014a37e5..7d34012e 100644 --- a/base/comm/internals/psi_zswapdata.F90 +++ b/base/comm/internals/psi_zswapdata.F90 @@ -32,8 +32,10 @@ ! ! File: psi_zswapdata.F90 ! -! Subroutine: psi_zswapdatam -! Does the data exchange among processes. Essentially this is doing +! +! +! Subroutine: psi_zswapdata_vect +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -43,12 +45,15 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - complex Choose overwrite or sum. +! y - type(psb_@x@_vect_type) The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -83,14 +88,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! -! -! -! Subroutine: psi_zswapdata_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! -! ! subroutine psi_zswapdata_vect(flag,beta,y,desc_a,work,info,data) @@ -426,7 +423,7 @@ end subroutine psi_zswap_vidx_vect ! Subroutine: psi_zswapdata_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_zswapdata_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_zswapdata_a.F90 b/base/comm/internals/psi_zswapdata_a.F90 index ebc74a6d..6e821f4f 100644 --- a/base/comm/internals/psi_zswapdata_a.F90 +++ b/base/comm/internals/psi_zswapdata_a.F90 @@ -33,9 +33,9 @@ ! File: psi_zswapdata.F90 ! ! Subroutine: psi_zswapdatam -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but -! it is capable of pruning empty exchanges, which are very likely in out +! it is capable of pruning empty exchanges, which are very likely in our ! application environment. All the variants have the same structure ! In all these subroutines X may be: I Integer ! S real(psb_spk_) @@ -49,6 +49,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -69,10 +70,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - complex Choose overwrite or sum. +! y(:,:) - complex The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -507,7 +508,7 @@ end subroutine psi_zswapidxm ! ! ! Subroutine: psi_zswapdatav -! Does the data exchange among processes. Essentially this is doing +! Implements the data exchange among processes. Essentially this is doing ! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but ! it is capable of pruning empty exchanges, which are very likely in out ! application environment. All the variants have the same structure @@ -523,6 +524,7 @@ end subroutine psi_zswapidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -543,10 +545,10 @@ end subroutine psi_zswapidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - complex Choose overwrite or sum. +! y(:) - complex The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/internals/psi_zswaptran.F90 b/base/comm/internals/psi_zswaptran.F90 index 04e3584e..c78a5e1a 100644 --- a/base/comm/internals/psi_zswaptran.F90 +++ b/base/comm/internals/psi_zswaptran.F90 @@ -32,8 +32,8 @@ ! ! File: psi_zswaptran.F90 ! -! Subroutine: psi_zswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Subroutine: psi_zswaptran_vect +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -47,12 +47,16 @@ ! C complex(psb_spk_) ! Z complex(psb_dpk_) ! Basically the operation is as follows: on each process, we identify -! sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. +! sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y)))); +! then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y +! but only on the elements involved in the SCT operation. ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. +! This version works on encapsulated vectors, and uses their methods to do GTH and SCT, +! so that special versions (i.e. GPU vectors can override them +! ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +77,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area -! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! beta - complex Choose overwrite or sum. +! y - type(psb_z_vect_type) The data area +! desc_a - type(psb_desc_type). The communication descriptor. +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -86,13 +90,6 @@ ! psb_comm_ovrl_ use ovrl_index ! psb_comm_mov_ use ovr_mst_idx ! -! -! -! Subroutine: psi_zswaptran_vect -! Data exchange among processes. -! -! Takes care of Y an exanspulated vector. -! ! subroutine psi_zswaptran_vect(flag,beta,y,desc_a,work,info,data) @@ -171,7 +168,7 @@ end subroutine psi_zswaptran_vect ! Subroutine: psi_ztran_vidx_vect ! Data exchange among processes. ! -! Takes care of Y an exanspulated vector. Relies on the gather/scatter methods +! Takes care of Y an encapsulated vector. Relies on the gather/scatter methods ! of vectors. ! ! The real workhorse: the outer routine will only choose the index list @@ -436,10 +433,10 @@ end subroutine psi_ztran_vidx_vect ! ! ! -! Subroutine: psi_zswaptran_vect +! Subroutine: psi_zswaptran_multivect ! Data exchange among processes. ! -! Takes care of Y an encaspulated vector. +! Takes care of Y an encaspulated multivector. ! ! subroutine psi_zswaptran_multivect(flag,beta,y,desc_a,work,info,data) diff --git a/base/comm/internals/psi_zswaptran_a.F90 b/base/comm/internals/psi_zswaptran_a.F90 index c2e1111a..aaf305ac 100644 --- a/base/comm/internals/psi_zswaptran_a.F90 +++ b/base/comm/internals/psi_zswaptran_a.F90 @@ -33,7 +33,7 @@ ! File: psi_zswaptran.F90 ! ! Subroutine: psi_zswaptranm -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -53,6 +53,7 @@ ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -73,10 +74,10 @@ ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area +! beta - complex Choose overwrite or sum. +! y(:,:) - complex The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data @@ -516,7 +517,7 @@ end subroutine psi_ztranidxm ! ! ! Subroutine: psi_zswaptranv -! Does the data exchange among processes. This is similar to Xswapdata, but +! Implements the data exchange among processes. This is similar to Xswapdata, but ! the list is read "in reverse", i.e. indices that are normally SENT are used ! for the RECEIVE part and vice-versa. This is the basic data exchange operation ! for doing the product of a sparse matrix by a vector. @@ -536,6 +537,7 @@ end subroutine psi_ztranidxm ! Thus: for halo data exchange, the receive section is confined in the ! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. +! The first routine picks the desired exchange index list and passes it to the second. ! ! Arguments: ! flag - integer Choose the algorithm for data exchange: @@ -556,10 +558,10 @@ end subroutine psi_ztranidxm ! ! ! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area +! beta - complex Choose overwrite or sum. +! y(:) - complex The data area ! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! work(:) - complex Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data diff --git a/base/comm/psb_cgather.f90 b/base/comm/psb_cgather.f90 index d4e675c2..f4ed3f4a 100644 --- a/base/comm/psb_cgather.f90 +++ b/base/comm/psb_cgather.f90 @@ -31,14 +31,14 @@ ! ! File: psb_cgather.f90 ! -! Subroutine: psb_cgatherm -! This subroutine gathers pieces of a distributed dense matrix into a local one. +! Subroutine: psb_cgather_vect +! This subroutine gathers pieces of a distributed vector into a local one. ! ! Arguments: -! globx - complex,dimension(:,:). The local matrix into which gather +! globx - complex,dimension(:). The local matrix into which gather ! the distributed pieces. -! locx - complex,dimension(:,:). The local piece of the distributed -! matrix to be gathered. +! locx - type(psb_c_vect_type@ The local piece of the distributed +! vector to be gathered. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer. The process that has to own the @@ -159,6 +159,8 @@ subroutine psb_cgather_vect(globx, locx, desc_a, info, iroot) end subroutine psb_cgather_vect +! Subroutine: psb_cgather_multivect +! This subroutine gathers pieces of a distributed multivector into a local one. subroutine psb_cgather_multivect(globx, locx, desc_a, info, iroot) use psb_base_mod, psb_protect_name => psb_cgather_multivect diff --git a/base/comm/psb_cgather_a.f90 b/base/comm/psb_cgather_a.f90 index 09e35678..5f75abd6 100644 --- a/base/comm/psb_cgather_a.f90 +++ b/base/comm/psb_cgather_a.f90 @@ -31,7 +31,7 @@ ! ! File: psb_cgather.f90 ! -! Subroutine: psb_cgatherm +! Subroutine: psb_cgather ! This subroutine gathers pieces of a distributed dense matrix into a local one. ! ! Arguments: diff --git a/base/comm/psb_chalo.f90 b/base/comm/psb_chalo.f90 index e34168c6..ad483a51 100644 --- a/base/comm/psb_chalo.f90 +++ b/base/comm/psb_chalo.f90 @@ -31,17 +31,17 @@ ! ! File: psb_chalo.f90 ! -! Subroutine: psb_chalom +! Subroutine: psb_chalo_vect ! This subroutine performs the exchange of the halo elements in a -! distributed dense matrix between all the processes. +! distributed vector between all the processes. ! ! Arguments: -! x - complex,dimension(:,:). The local part of the dense matrix. +! x - type(psb_c_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code ! jx - integer(optional). The starting column of the global matrix. ! ik - integer(optional). The number of columns to gather. -! work - complex(optional). Work area. +! work - complex(optional). Work area. ! tran - character(optional). Transpose exchange. ! mode - integer(optional). Communication mode (see Swapdata) ! data - integer Which index list in desc_a should be used @@ -52,7 +52,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! - subroutine psb_chalo_vect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_chalo_vect use psi_mod @@ -185,7 +184,28 @@ subroutine psb_chalo_vect(x,desc_a,info,work,tran,mode,data) return end subroutine psb_chalo_vect - +! +! Subroutine: psb_chalo_multivect +! This subroutine performs the exchange of the halo elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_c_multivect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The starting column of the global matrix. +! ik - integer(optional). The number of columns to gather. +! work - complex(optional). Work area. +! tran - character(optional). Transpose exchange. +! mode - integer(optional). Communication mode (see Swapdata) +! data - integer Which index list in desc_a should be used +! to retrieve rows, default psb_comm_halo_ +! psb_comm_halo_ use halo_index +! psb_comm_ext_ use ext_index +! psb_comm_ovrl_ use ovrl_index +! psb_comm_mov_ use ovr_mst_idx +! +! subroutine psb_chalo_multivect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_chalo_multivect use psi_mod diff --git a/base/comm/psb_covrl.f90 b/base/comm/psb_covrl.f90 index f6155ffe..7ac851b6 100644 --- a/base/comm/psb_covrl.f90 +++ b/base/comm/psb_covrl.f90 @@ -32,12 +32,12 @@ ! ! File: psb_covrl.f90 ! -! Subroutine: psb_covrlm +! Subroutine: psb_covrl_vect ! This subroutine performs the exchange of the overlap elements in a -! distributed dense matrix between all the processes. +! distributed dense vector between all the processes. ! ! Arguments: -! x(:,:) - complex The local part of the dense matrix. +! x - type(psb_c_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code. ! jx - integer(optional). The starting column of the global matrix @@ -180,7 +180,38 @@ subroutine psb_covrl_vect(x,desc_a,info,work,update,mode) return end subroutine psb_covrl_vect - +! +! Subroutine: psb_covrl_multivect +! This subroutine performs the exchange of the overlap elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_c_vect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code. +! jx - integer(optional). The starting column of the global matrix +! ik - integer(optional). The number of columns to gather. +! work - complex(optional). A work area. +! update - integer(optional). Type of update: +! psb_none_ do nothing +! psb_sum_ sum of overlaps +! psb_avg_ average of overlaps +! mode - integer(optional). Choose the algorithm for data exchange: +! this is chosen through bit fields. +! - swap_mpi = iand(flag,psb_swap_mpi_) /= 0 +! - swap_sync = iand(flag,psb_swap_sync_) /= 0 +! - swap_send = iand(flag,psb_swap_send_) /= 0 +! - swap_recv = iand(flag,psb_swap_recv_) /= 0 +! - if (swap_mpi): use underlying MPI_ALLTOALLV. +! - if (swap_sync): use PSB_SND and PSB_RCV in +! synchronized pairs +! - if (swap_send .and. swap_recv): use mpi_irecv +! and mpi_send +! - if (swap_send): use psb_snd (but need another +! call with swap_recv to complete) +! - if (swap_recv): use psb_rcv (completing a +! previous call with swap_send) +! subroutine psb_covrl_multivect(x,desc_a,info,work,update,mode) use psb_base_mod, psb_protect_name => psb_covrl_multivect use psi_mod diff --git a/base/comm/psb_cscatter.F90 b/base/comm/psb_cscatter.F90 index 77036d04..7ac22a68 100644 --- a/base/comm/psb_cscatter.F90 +++ b/base/comm/psb_cscatter.F90 @@ -31,13 +31,13 @@ ! ! File: psb_cscatter.f90 ! -! Subroutine: psb_cscatterm -! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! Subroutine: psb_cscatter_vect +! This subroutine scatters a global vector locally owned by one process +! into pieces that are local to all the processes. ! ! Arguments: -! globx - complex,dimension(:,:). The global matrix to scatter. -! locx - complex,dimension(:,:). The local piece of the distributed matrix. +! globx - complex,dimension(:) The global matrix to scatter. +! locx - type(psb_c_vect_type) The local piece of the distributed matrix. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer(optional). The process that owns the global matrix. diff --git a/base/comm/psb_cscatter_a.F90 b/base/comm/psb_cscatter_a.F90 index 1cadd168..d16ae980 100644 --- a/base/comm/psb_cscatter_a.F90 +++ b/base/comm/psb_cscatter_a.F90 @@ -33,7 +33,7 @@ ! ! Subroutine: psb_cscatterm ! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - complex,dimension(:,:). The global matrix to scatter. @@ -278,7 +278,7 @@ end subroutine psb_cscatterm ! Subroutine: psb_cscatterv ! This subroutine scatters a global vector locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - complex,dimension(:). The global vector to scatter. diff --git a/base/comm/psb_cspgather.F90 b/base/comm/psb_cspgather.F90 index b9401061..45dcc667 100644 --- a/base/comm/psb_cspgather.F90 +++ b/base/comm/psb_cspgather.F90 @@ -30,6 +30,17 @@ ! ! ! File: psb_cspgather.f90 +! +! Gathers a sparse matrix onto a single process. +! Two variants: +! 1. Gathers to PSB_c_SPARSE_MAT (i.e. to matrix with IPK_ indices) +! 2. Gathers to PSB_lc_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! +! Note: this function uses MPI_ALLGATHERV. At this time, the size of the +! resulting matrix must be within the range of 4 bytes because of the +! restriction on MPI displacements to be 4 bytes. +! +! subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) #if defined(HAVE_ISO_FORTRAN_ENV) use iso_fortran_env diff --git a/base/comm/psb_dgather.f90 b/base/comm/psb_dgather.f90 index 3659b487..c767c8ec 100644 --- a/base/comm/psb_dgather.f90 +++ b/base/comm/psb_dgather.f90 @@ -31,14 +31,14 @@ ! ! File: psb_dgather.f90 ! -! Subroutine: psb_dgatherm -! This subroutine gathers pieces of a distributed dense matrix into a local one. +! Subroutine: psb_dgather_vect +! This subroutine gathers pieces of a distributed vector into a local one. ! ! Arguments: -! globx - real,dimension(:,:). The local matrix into which gather +! globx - real,dimension(:). The local matrix into which gather ! the distributed pieces. -! locx - real,dimension(:,:). The local piece of the distributed -! matrix to be gathered. +! locx - type(psb_d_vect_type@ The local piece of the distributed +! vector to be gathered. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer. The process that has to own the @@ -159,6 +159,8 @@ subroutine psb_dgather_vect(globx, locx, desc_a, info, iroot) end subroutine psb_dgather_vect +! Subroutine: psb_dgather_multivect +! This subroutine gathers pieces of a distributed multivector into a local one. subroutine psb_dgather_multivect(globx, locx, desc_a, info, iroot) use psb_base_mod, psb_protect_name => psb_dgather_multivect diff --git a/base/comm/psb_dgather_a.f90 b/base/comm/psb_dgather_a.f90 index 277eb3d0..5ae9ed50 100644 --- a/base/comm/psb_dgather_a.f90 +++ b/base/comm/psb_dgather_a.f90 @@ -31,7 +31,7 @@ ! ! File: psb_dgather.f90 ! -! Subroutine: psb_dgatherm +! Subroutine: psb_dgather ! This subroutine gathers pieces of a distributed dense matrix into a local one. ! ! Arguments: diff --git a/base/comm/psb_dhalo.f90 b/base/comm/psb_dhalo.f90 index 85caa81f..b5f584dc 100644 --- a/base/comm/psb_dhalo.f90 +++ b/base/comm/psb_dhalo.f90 @@ -31,17 +31,17 @@ ! ! File: psb_dhalo.f90 ! -! Subroutine: psb_dhalom +! Subroutine: psb_dhalo_vect ! This subroutine performs the exchange of the halo elements in a -! distributed dense matrix between all the processes. +! distributed vector between all the processes. ! ! Arguments: -! x - real,dimension(:,:). The local part of the dense matrix. +! x - type(psb_d_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code ! jx - integer(optional). The starting column of the global matrix. ! ik - integer(optional). The number of columns to gather. -! work - real(optional). Work area. +! work - real(optional). Work area. ! tran - character(optional). Transpose exchange. ! mode - integer(optional). Communication mode (see Swapdata) ! data - integer Which index list in desc_a should be used @@ -52,7 +52,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! - subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_dhalo_vect use psi_mod @@ -185,7 +184,28 @@ subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data) return end subroutine psb_dhalo_vect - +! +! Subroutine: psb_dhalo_multivect +! This subroutine performs the exchange of the halo elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_d_multivect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The starting column of the global matrix. +! ik - integer(optional). The number of columns to gather. +! work - real(optional). Work area. +! tran - character(optional). Transpose exchange. +! mode - integer(optional). Communication mode (see Swapdata) +! data - integer Which index list in desc_a should be used +! to retrieve rows, default psb_comm_halo_ +! psb_comm_halo_ use halo_index +! psb_comm_ext_ use ext_index +! psb_comm_ovrl_ use ovrl_index +! psb_comm_mov_ use ovr_mst_idx +! +! subroutine psb_dhalo_multivect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_dhalo_multivect use psi_mod diff --git a/base/comm/psb_dovrl.f90 b/base/comm/psb_dovrl.f90 index 28b2cd24..1177be29 100644 --- a/base/comm/psb_dovrl.f90 +++ b/base/comm/psb_dovrl.f90 @@ -32,12 +32,12 @@ ! ! File: psb_dovrl.f90 ! -! Subroutine: psb_dovrlm +! Subroutine: psb_dovrl_vect ! This subroutine performs the exchange of the overlap elements in a -! distributed dense matrix between all the processes. +! distributed dense vector between all the processes. ! ! Arguments: -! x(:,:) - real The local part of the dense matrix. +! x - type(psb_d_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code. ! jx - integer(optional). The starting column of the global matrix @@ -180,7 +180,38 @@ subroutine psb_dovrl_vect(x,desc_a,info,work,update,mode) return end subroutine psb_dovrl_vect - +! +! Subroutine: psb_dovrl_multivect +! This subroutine performs the exchange of the overlap elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_d_vect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code. +! jx - integer(optional). The starting column of the global matrix +! ik - integer(optional). The number of columns to gather. +! work - real(optional). A work area. +! update - integer(optional). Type of update: +! psb_none_ do nothing +! psb_sum_ sum of overlaps +! psb_avg_ average of overlaps +! mode - integer(optional). Choose the algorithm for data exchange: +! this is chosen through bit fields. +! - swap_mpi = iand(flag,psb_swap_mpi_) /= 0 +! - swap_sync = iand(flag,psb_swap_sync_) /= 0 +! - swap_send = iand(flag,psb_swap_send_) /= 0 +! - swap_recv = iand(flag,psb_swap_recv_) /= 0 +! - if (swap_mpi): use underlying MPI_ALLTOALLV. +! - if (swap_sync): use PSB_SND and PSB_RCV in +! synchronized pairs +! - if (swap_send .and. swap_recv): use mpi_irecv +! and mpi_send +! - if (swap_send): use psb_snd (but need another +! call with swap_recv to complete) +! - if (swap_recv): use psb_rcv (completing a +! previous call with swap_send) +! subroutine psb_dovrl_multivect(x,desc_a,info,work,update,mode) use psb_base_mod, psb_protect_name => psb_dovrl_multivect use psi_mod diff --git a/base/comm/psb_dscatter.F90 b/base/comm/psb_dscatter.F90 index d559fa12..157af3c6 100644 --- a/base/comm/psb_dscatter.F90 +++ b/base/comm/psb_dscatter.F90 @@ -31,13 +31,13 @@ ! ! File: psb_dscatter.f90 ! -! Subroutine: psb_dscatterm -! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! Subroutine: psb_dscatter_vect +! This subroutine scatters a global vector locally owned by one process +! into pieces that are local to all the processes. ! ! Arguments: -! globx - real,dimension(:,:). The global matrix to scatter. -! locx - real,dimension(:,:). The local piece of the distributed matrix. +! globx - real,dimension(:) The global matrix to scatter. +! locx - type(psb_d_vect_type) The local piece of the distributed matrix. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer(optional). The process that owns the global matrix. diff --git a/base/comm/psb_dscatter_a.F90 b/base/comm/psb_dscatter_a.F90 index 8285afc1..e0b39f57 100644 --- a/base/comm/psb_dscatter_a.F90 +++ b/base/comm/psb_dscatter_a.F90 @@ -33,7 +33,7 @@ ! ! Subroutine: psb_dscatterm ! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - real,dimension(:,:). The global matrix to scatter. @@ -278,7 +278,7 @@ end subroutine psb_dscatterm ! Subroutine: psb_dscatterv ! This subroutine scatters a global vector locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - real,dimension(:). The global vector to scatter. diff --git a/base/comm/psb_dspgather.F90 b/base/comm/psb_dspgather.F90 index d5593504..62084d48 100644 --- a/base/comm/psb_dspgather.F90 +++ b/base/comm/psb_dspgather.F90 @@ -30,6 +30,17 @@ ! ! ! File: psb_dspgather.f90 +! +! Gathers a sparse matrix onto a single process. +! Two variants: +! 1. Gathers to PSB_d_SPARSE_MAT (i.e. to matrix with IPK_ indices) +! 2. Gathers to PSB_ld_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! +! Note: this function uses MPI_ALLGATHERV. At this time, the size of the +! resulting matrix must be within the range of 4 bytes because of the +! restriction on MPI displacements to be 4 bytes. +! +! subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) #if defined(HAVE_ISO_FORTRAN_ENV) use iso_fortran_env diff --git a/base/comm/psb_egather_a.f90 b/base/comm/psb_egather_a.f90 index 54b77bc9..b910a4f7 100644 --- a/base/comm/psb_egather_a.f90 +++ b/base/comm/psb_egather_a.f90 @@ -31,7 +31,7 @@ ! ! File: psb_egather.f90 ! -! Subroutine: psb_egatherm +! Subroutine: psb_egather ! This subroutine gathers pieces of a distributed dense matrix into a local one. ! ! Arguments: diff --git a/base/comm/psb_escatter_a.F90 b/base/comm/psb_escatter_a.F90 index e8b4b062..dbb2026f 100644 --- a/base/comm/psb_escatter_a.F90 +++ b/base/comm/psb_escatter_a.F90 @@ -33,7 +33,7 @@ ! ! Subroutine: psb_escatterm ! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - integer,dimension(:,:). The global matrix to scatter. @@ -278,7 +278,7 @@ end subroutine psb_escatterm ! Subroutine: psb_escatterv ! This subroutine scatters a global vector locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - integer,dimension(:). The global vector to scatter. diff --git a/base/comm/psb_igather.f90 b/base/comm/psb_igather.f90 index a6e59497..48a4f2fe 100644 --- a/base/comm/psb_igather.f90 +++ b/base/comm/psb_igather.f90 @@ -31,14 +31,14 @@ ! ! File: psb_igather.f90 ! -! Subroutine: psb_igatherm -! This subroutine gathers pieces of a distributed dense matrix into a local one. +! Subroutine: psb_igather_vect +! This subroutine gathers pieces of a distributed vector into a local one. ! ! Arguments: -! globx - integer,dimension(:,:). The local matrix into which gather +! globx - integer,dimension(:). The local matrix into which gather ! the distributed pieces. -! locx - integer,dimension(:,:). The local piece of the distributed -! matrix to be gathered. +! locx - type(psb_i_vect_type@ The local piece of the distributed +! vector to be gathered. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer. The process that has to own the @@ -159,6 +159,8 @@ subroutine psb_igather_vect(globx, locx, desc_a, info, iroot) end subroutine psb_igather_vect +! Subroutine: psb_igather_multivect +! This subroutine gathers pieces of a distributed multivector into a local one. subroutine psb_igather_multivect(globx, locx, desc_a, info, iroot) use psb_base_mod, psb_protect_name => psb_igather_multivect diff --git a/base/comm/psb_ihalo.f90 b/base/comm/psb_ihalo.f90 index ed4d2c10..4bccfc10 100644 --- a/base/comm/psb_ihalo.f90 +++ b/base/comm/psb_ihalo.f90 @@ -31,17 +31,17 @@ ! ! File: psb_ihalo.f90 ! -! Subroutine: psb_ihalom +! Subroutine: psb_ihalo_vect ! This subroutine performs the exchange of the halo elements in a -! distributed dense matrix between all the processes. +! distributed vector between all the processes. ! ! Arguments: -! x - integer,dimension(:,:). The local part of the dense matrix. +! x - type(psb_i_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code ! jx - integer(optional). The starting column of the global matrix. ! ik - integer(optional). The number of columns to gather. -! work - integer(optional). Work area. +! work - integer(optional). Work area. ! tran - character(optional). Transpose exchange. ! mode - integer(optional). Communication mode (see Swapdata) ! data - integer Which index list in desc_a should be used @@ -52,7 +52,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! - subroutine psb_ihalo_vect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_ihalo_vect use psi_mod @@ -185,7 +184,28 @@ subroutine psb_ihalo_vect(x,desc_a,info,work,tran,mode,data) return end subroutine psb_ihalo_vect - +! +! Subroutine: psb_ihalo_multivect +! This subroutine performs the exchange of the halo elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_i_multivect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The starting column of the global matrix. +! ik - integer(optional). The number of columns to gather. +! work - integer(optional). Work area. +! tran - character(optional). Transpose exchange. +! mode - integer(optional). Communication mode (see Swapdata) +! data - integer Which index list in desc_a should be used +! to retrieve rows, default psb_comm_halo_ +! psb_comm_halo_ use halo_index +! psb_comm_ext_ use ext_index +! psb_comm_ovrl_ use ovrl_index +! psb_comm_mov_ use ovr_mst_idx +! +! subroutine psb_ihalo_multivect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_ihalo_multivect use psi_mod diff --git a/base/comm/psb_iovrl.f90 b/base/comm/psb_iovrl.f90 index 1e3ca4f3..06f720b0 100644 --- a/base/comm/psb_iovrl.f90 +++ b/base/comm/psb_iovrl.f90 @@ -32,12 +32,12 @@ ! ! File: psb_iovrl.f90 ! -! Subroutine: psb_iovrlm +! Subroutine: psb_iovrl_vect ! This subroutine performs the exchange of the overlap elements in a -! distributed dense matrix between all the processes. +! distributed dense vector between all the processes. ! ! Arguments: -! x(:,:) - integer The local part of the dense matrix. +! x - type(psb_i_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code. ! jx - integer(optional). The starting column of the global matrix @@ -180,7 +180,38 @@ subroutine psb_iovrl_vect(x,desc_a,info,work,update,mode) return end subroutine psb_iovrl_vect - +! +! Subroutine: psb_iovrl_multivect +! This subroutine performs the exchange of the overlap elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_i_vect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code. +! jx - integer(optional). The starting column of the global matrix +! ik - integer(optional). The number of columns to gather. +! work - integer(optional). A work area. +! update - integer(optional). Type of update: +! psb_none_ do nothing +! psb_sum_ sum of overlaps +! psb_avg_ average of overlaps +! mode - integer(optional). Choose the algorithm for data exchange: +! this is chosen through bit fields. +! - swap_mpi = iand(flag,psb_swap_mpi_) /= 0 +! - swap_sync = iand(flag,psb_swap_sync_) /= 0 +! - swap_send = iand(flag,psb_swap_send_) /= 0 +! - swap_recv = iand(flag,psb_swap_recv_) /= 0 +! - if (swap_mpi): use underlying MPI_ALLTOALLV. +! - if (swap_sync): use PSB_SND and PSB_RCV in +! synchronized pairs +! - if (swap_send .and. swap_recv): use mpi_irecv +! and mpi_send +! - if (swap_send): use psb_snd (but need another +! call with swap_recv to complete) +! - if (swap_recv): use psb_rcv (completing a +! previous call with swap_send) +! subroutine psb_iovrl_multivect(x,desc_a,info,work,update,mode) use psb_base_mod, psb_protect_name => psb_iovrl_multivect use psi_mod diff --git a/base/comm/psb_iscatter.F90 b/base/comm/psb_iscatter.F90 index 1a545e9b..f159d5b4 100644 --- a/base/comm/psb_iscatter.F90 +++ b/base/comm/psb_iscatter.F90 @@ -31,13 +31,13 @@ ! ! File: psb_iscatter.f90 ! -! Subroutine: psb_iscatterm -! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! Subroutine: psb_iscatter_vect +! This subroutine scatters a global vector locally owned by one process +! into pieces that are local to all the processes. ! ! Arguments: -! globx - integer,dimension(:,:). The global matrix to scatter. -! locx - integer,dimension(:,:). The local piece of the distributed matrix. +! globx - integer,dimension(:) The global matrix to scatter. +! locx - type(psb_i_vect_type) The local piece of the distributed matrix. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer(optional). The process that owns the global matrix. diff --git a/base/comm/psb_ispgather.F90 b/base/comm/psb_ispgather.F90 index a19d24d9..d13c2e7c 100644 --- a/base/comm/psb_ispgather.F90 +++ b/base/comm/psb_ispgather.F90 @@ -30,6 +30,17 @@ ! ! ! File: psb_ispgather.f90 +! +! Gathers a sparse matrix onto a single process. +! Two variants: +! 1. Gathers to PSB_i_SPARSE_MAT (i.e. to matrix with IPK_ indices) +! 2. Gathers to PSB_@LX@_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! +! Note: this function uses MPI_ALLGATHERV. At this time, the size of the +! resulting matrix must be within the range of 4 bytes because of the +! restriction on MPI displacements to be 4 bytes. +! +! subroutine psb_isp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) #if defined(HAVE_ISO_FORTRAN_ENV) use iso_fortran_env diff --git a/base/comm/psb_lgather.f90 b/base/comm/psb_lgather.f90 index 45ea0d94..1f6f448f 100644 --- a/base/comm/psb_lgather.f90 +++ b/base/comm/psb_lgather.f90 @@ -31,14 +31,14 @@ ! ! File: psb_lgather.f90 ! -! Subroutine: psb_lgatherm -! This subroutine gathers pieces of a distributed dense matrix into a local one. +! Subroutine: psb_lgather_vect +! This subroutine gathers pieces of a distributed vector into a local one. ! ! Arguments: -! globx - integer,dimension(:,:). The local matrix into which gather +! globx - integer,dimension(:). The local matrix into which gather ! the distributed pieces. -! locx - integer,dimension(:,:). The local piece of the distributed -! matrix to be gathered. +! locx - type(psb_l_vect_type@ The local piece of the distributed +! vector to be gathered. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer. The process that has to own the @@ -159,6 +159,8 @@ subroutine psb_lgather_vect(globx, locx, desc_a, info, iroot) end subroutine psb_lgather_vect +! Subroutine: psb_lgather_multivect +! This subroutine gathers pieces of a distributed multivector into a local one. subroutine psb_lgather_multivect(globx, locx, desc_a, info, iroot) use psb_base_mod, psb_protect_name => psb_lgather_multivect diff --git a/base/comm/psb_lhalo.f90 b/base/comm/psb_lhalo.f90 index 49b4d13d..b35613a1 100644 --- a/base/comm/psb_lhalo.f90 +++ b/base/comm/psb_lhalo.f90 @@ -31,17 +31,17 @@ ! ! File: psb_lhalo.f90 ! -! Subroutine: psb_lhalom +! Subroutine: psb_lhalo_vect ! This subroutine performs the exchange of the halo elements in a -! distributed dense matrix between all the processes. +! distributed vector between all the processes. ! ! Arguments: -! x - integer,dimension(:,:). The local part of the dense matrix. +! x - type(psb_l_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code ! jx - integer(optional). The starting column of the global matrix. ! ik - integer(optional). The number of columns to gather. -! work - integer(optional). Work area. +! work - integer(optional). Work area. ! tran - character(optional). Transpose exchange. ! mode - integer(optional). Communication mode (see Swapdata) ! data - integer Which index list in desc_a should be used @@ -52,7 +52,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! - subroutine psb_lhalo_vect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_lhalo_vect use psi_mod @@ -185,7 +184,28 @@ subroutine psb_lhalo_vect(x,desc_a,info,work,tran,mode,data) return end subroutine psb_lhalo_vect - +! +! Subroutine: psb_lhalo_multivect +! This subroutine performs the exchange of the halo elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_l_multivect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The starting column of the global matrix. +! ik - integer(optional). The number of columns to gather. +! work - integer(optional). Work area. +! tran - character(optional). Transpose exchange. +! mode - integer(optional). Communication mode (see Swapdata) +! data - integer Which index list in desc_a should be used +! to retrieve rows, default psb_comm_halo_ +! psb_comm_halo_ use halo_index +! psb_comm_ext_ use ext_index +! psb_comm_ovrl_ use ovrl_index +! psb_comm_mov_ use ovr_mst_idx +! +! subroutine psb_lhalo_multivect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_lhalo_multivect use psi_mod diff --git a/base/comm/psb_lovrl.f90 b/base/comm/psb_lovrl.f90 index 75b816b8..bb3ece65 100644 --- a/base/comm/psb_lovrl.f90 +++ b/base/comm/psb_lovrl.f90 @@ -32,12 +32,12 @@ ! ! File: psb_lovrl.f90 ! -! Subroutine: psb_lovrlm +! Subroutine: psb_lovrl_vect ! This subroutine performs the exchange of the overlap elements in a -! distributed dense matrix between all the processes. +! distributed dense vector between all the processes. ! ! Arguments: -! x(:,:) - integer The local part of the dense matrix. +! x - type(psb_l_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code. ! jx - integer(optional). The starting column of the global matrix @@ -180,7 +180,38 @@ subroutine psb_lovrl_vect(x,desc_a,info,work,update,mode) return end subroutine psb_lovrl_vect - +! +! Subroutine: psb_lovrl_multivect +! This subroutine performs the exchange of the overlap elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_l_vect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code. +! jx - integer(optional). The starting column of the global matrix +! ik - integer(optional). The number of columns to gather. +! work - integer(optional). A work area. +! update - integer(optional). Type of update: +! psb_none_ do nothing +! psb_sum_ sum of overlaps +! psb_avg_ average of overlaps +! mode - integer(optional). Choose the algorithm for data exchange: +! this is chosen through bit fields. +! - swap_mpi = iand(flag,psb_swap_mpi_) /= 0 +! - swap_sync = iand(flag,psb_swap_sync_) /= 0 +! - swap_send = iand(flag,psb_swap_send_) /= 0 +! - swap_recv = iand(flag,psb_swap_recv_) /= 0 +! - if (swap_mpi): use underlying MPI_ALLTOALLV. +! - if (swap_sync): use PSB_SND and PSB_RCV in +! synchronized pairs +! - if (swap_send .and. swap_recv): use mpi_irecv +! and mpi_send +! - if (swap_send): use psb_snd (but need another +! call with swap_recv to complete) +! - if (swap_recv): use psb_rcv (completing a +! previous call with swap_send) +! subroutine psb_lovrl_multivect(x,desc_a,info,work,update,mode) use psb_base_mod, psb_protect_name => psb_lovrl_multivect use psi_mod diff --git a/base/comm/psb_lscatter.F90 b/base/comm/psb_lscatter.F90 index ed4051fa..ceb60e4f 100644 --- a/base/comm/psb_lscatter.F90 +++ b/base/comm/psb_lscatter.F90 @@ -31,13 +31,13 @@ ! ! File: psb_lscatter.f90 ! -! Subroutine: psb_lscatterm -! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! Subroutine: psb_lscatter_vect +! This subroutine scatters a global vector locally owned by one process +! into pieces that are local to all the processes. ! ! Arguments: -! globx - integer,dimension(:,:). The global matrix to scatter. -! locx - integer,dimension(:,:). The local piece of the distributed matrix. +! globx - integer,dimension(:) The global matrix to scatter. +! locx - type(psb_l_vect_type) The local piece of the distributed matrix. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer(optional). The process that owns the global matrix. diff --git a/base/comm/psb_lspgather.F90 b/base/comm/psb_lspgather.F90 index 81c9c67c..2f588116 100644 --- a/base/comm/psb_lspgather.F90 +++ b/base/comm/psb_lspgather.F90 @@ -30,6 +30,17 @@ ! ! ! File: psb_lspgather.f90 +! +! Gathers a sparse matrix onto a single process. +! Two variants: +! 1. Gathers to PSB_l_SPARSE_MAT (i.e. to matrix with IPK_ indices) +! 2. Gathers to PSB_@LX@_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! +! Note: this function uses MPI_ALLGATHERV. At this time, the size of the +! resulting matrix must be within the range of 4 bytes because of the +! restriction on MPI displacements to be 4 bytes. +! +! subroutine psb_lsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) #if defined(HAVE_ISO_FORTRAN_ENV) use iso_fortran_env diff --git a/base/comm/psb_mgather_a.f90 b/base/comm/psb_mgather_a.f90 index 1ee8e7f4..251e06c9 100644 --- a/base/comm/psb_mgather_a.f90 +++ b/base/comm/psb_mgather_a.f90 @@ -31,7 +31,7 @@ ! ! File: psb_mgather.f90 ! -! Subroutine: psb_mgatherm +! Subroutine: psb_mgather ! This subroutine gathers pieces of a distributed dense matrix into a local one. ! ! Arguments: diff --git a/base/comm/psb_mscatter_a.F90 b/base/comm/psb_mscatter_a.F90 index 2c9baf6f..f85a3849 100644 --- a/base/comm/psb_mscatter_a.F90 +++ b/base/comm/psb_mscatter_a.F90 @@ -33,7 +33,7 @@ ! ! Subroutine: psb_mscatterm ! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - integer,dimension(:,:). The global matrix to scatter. @@ -278,7 +278,7 @@ end subroutine psb_mscatterm ! Subroutine: psb_mscatterv ! This subroutine scatters a global vector locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - integer,dimension(:). The global vector to scatter. diff --git a/base/comm/psb_sgather.f90 b/base/comm/psb_sgather.f90 index 10c6b7c2..538d1c43 100644 --- a/base/comm/psb_sgather.f90 +++ b/base/comm/psb_sgather.f90 @@ -31,14 +31,14 @@ ! ! File: psb_sgather.f90 ! -! Subroutine: psb_sgatherm -! This subroutine gathers pieces of a distributed dense matrix into a local one. +! Subroutine: psb_sgather_vect +! This subroutine gathers pieces of a distributed vector into a local one. ! ! Arguments: -! globx - real,dimension(:,:). The local matrix into which gather +! globx - real,dimension(:). The local matrix into which gather ! the distributed pieces. -! locx - real,dimension(:,:). The local piece of the distributed -! matrix to be gathered. +! locx - type(psb_s_vect_type@ The local piece of the distributed +! vector to be gathered. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer. The process that has to own the @@ -159,6 +159,8 @@ subroutine psb_sgather_vect(globx, locx, desc_a, info, iroot) end subroutine psb_sgather_vect +! Subroutine: psb_sgather_multivect +! This subroutine gathers pieces of a distributed multivector into a local one. subroutine psb_sgather_multivect(globx, locx, desc_a, info, iroot) use psb_base_mod, psb_protect_name => psb_sgather_multivect diff --git a/base/comm/psb_sgather_a.f90 b/base/comm/psb_sgather_a.f90 index 68076959..47a8c79a 100644 --- a/base/comm/psb_sgather_a.f90 +++ b/base/comm/psb_sgather_a.f90 @@ -31,7 +31,7 @@ ! ! File: psb_sgather.f90 ! -! Subroutine: psb_sgatherm +! Subroutine: psb_sgather ! This subroutine gathers pieces of a distributed dense matrix into a local one. ! ! Arguments: diff --git a/base/comm/psb_shalo.f90 b/base/comm/psb_shalo.f90 index 2948624c..78ab39e3 100644 --- a/base/comm/psb_shalo.f90 +++ b/base/comm/psb_shalo.f90 @@ -31,17 +31,17 @@ ! ! File: psb_shalo.f90 ! -! Subroutine: psb_shalom +! Subroutine: psb_shalo_vect ! This subroutine performs the exchange of the halo elements in a -! distributed dense matrix between all the processes. +! distributed vector between all the processes. ! ! Arguments: -! x - real,dimension(:,:). The local part of the dense matrix. +! x - type(psb_s_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code ! jx - integer(optional). The starting column of the global matrix. ! ik - integer(optional). The number of columns to gather. -! work - real(optional). Work area. +! work - real(optional). Work area. ! tran - character(optional). Transpose exchange. ! mode - integer(optional). Communication mode (see Swapdata) ! data - integer Which index list in desc_a should be used @@ -52,7 +52,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! - subroutine psb_shalo_vect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_shalo_vect use psi_mod @@ -185,7 +184,28 @@ subroutine psb_shalo_vect(x,desc_a,info,work,tran,mode,data) return end subroutine psb_shalo_vect - +! +! Subroutine: psb_shalo_multivect +! This subroutine performs the exchange of the halo elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_s_multivect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The starting column of the global matrix. +! ik - integer(optional). The number of columns to gather. +! work - real(optional). Work area. +! tran - character(optional). Transpose exchange. +! mode - integer(optional). Communication mode (see Swapdata) +! data - integer Which index list in desc_a should be used +! to retrieve rows, default psb_comm_halo_ +! psb_comm_halo_ use halo_index +! psb_comm_ext_ use ext_index +! psb_comm_ovrl_ use ovrl_index +! psb_comm_mov_ use ovr_mst_idx +! +! subroutine psb_shalo_multivect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_shalo_multivect use psi_mod diff --git a/base/comm/psb_sovrl.f90 b/base/comm/psb_sovrl.f90 index 25af2d48..3930645a 100644 --- a/base/comm/psb_sovrl.f90 +++ b/base/comm/psb_sovrl.f90 @@ -32,12 +32,12 @@ ! ! File: psb_sovrl.f90 ! -! Subroutine: psb_sovrlm +! Subroutine: psb_sovrl_vect ! This subroutine performs the exchange of the overlap elements in a -! distributed dense matrix between all the processes. +! distributed dense vector between all the processes. ! ! Arguments: -! x(:,:) - real The local part of the dense matrix. +! x - type(psb_s_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code. ! jx - integer(optional). The starting column of the global matrix @@ -180,7 +180,38 @@ subroutine psb_sovrl_vect(x,desc_a,info,work,update,mode) return end subroutine psb_sovrl_vect - +! +! Subroutine: psb_sovrl_multivect +! This subroutine performs the exchange of the overlap elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_s_vect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code. +! jx - integer(optional). The starting column of the global matrix +! ik - integer(optional). The number of columns to gather. +! work - real(optional). A work area. +! update - integer(optional). Type of update: +! psb_none_ do nothing +! psb_sum_ sum of overlaps +! psb_avg_ average of overlaps +! mode - integer(optional). Choose the algorithm for data exchange: +! this is chosen through bit fields. +! - swap_mpi = iand(flag,psb_swap_mpi_) /= 0 +! - swap_sync = iand(flag,psb_swap_sync_) /= 0 +! - swap_send = iand(flag,psb_swap_send_) /= 0 +! - swap_recv = iand(flag,psb_swap_recv_) /= 0 +! - if (swap_mpi): use underlying MPI_ALLTOALLV. +! - if (swap_sync): use PSB_SND and PSB_RCV in +! synchronized pairs +! - if (swap_send .and. swap_recv): use mpi_irecv +! and mpi_send +! - if (swap_send): use psb_snd (but need another +! call with swap_recv to complete) +! - if (swap_recv): use psb_rcv (completing a +! previous call with swap_send) +! subroutine psb_sovrl_multivect(x,desc_a,info,work,update,mode) use psb_base_mod, psb_protect_name => psb_sovrl_multivect use psi_mod diff --git a/base/comm/psb_sscatter.F90 b/base/comm/psb_sscatter.F90 index 60bdd342..960c5bac 100644 --- a/base/comm/psb_sscatter.F90 +++ b/base/comm/psb_sscatter.F90 @@ -31,13 +31,13 @@ ! ! File: psb_sscatter.f90 ! -! Subroutine: psb_sscatterm -! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! Subroutine: psb_sscatter_vect +! This subroutine scatters a global vector locally owned by one process +! into pieces that are local to all the processes. ! ! Arguments: -! globx - real,dimension(:,:). The global matrix to scatter. -! locx - real,dimension(:,:). The local piece of the distributed matrix. +! globx - real,dimension(:) The global matrix to scatter. +! locx - type(psb_s_vect_type) The local piece of the distributed matrix. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer(optional). The process that owns the global matrix. diff --git a/base/comm/psb_sscatter_a.F90 b/base/comm/psb_sscatter_a.F90 index bb3002a9..d756b712 100644 --- a/base/comm/psb_sscatter_a.F90 +++ b/base/comm/psb_sscatter_a.F90 @@ -33,7 +33,7 @@ ! ! Subroutine: psb_sscatterm ! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - real,dimension(:,:). The global matrix to scatter. @@ -278,7 +278,7 @@ end subroutine psb_sscatterm ! Subroutine: psb_sscatterv ! This subroutine scatters a global vector locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - real,dimension(:). The global vector to scatter. diff --git a/base/comm/psb_sspgather.F90 b/base/comm/psb_sspgather.F90 index 27444ecd..7123a988 100644 --- a/base/comm/psb_sspgather.F90 +++ b/base/comm/psb_sspgather.F90 @@ -30,6 +30,17 @@ ! ! ! File: psb_sspgather.f90 +! +! Gathers a sparse matrix onto a single process. +! Two variants: +! 1. Gathers to PSB_s_SPARSE_MAT (i.e. to matrix with IPK_ indices) +! 2. Gathers to PSB_ls_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! +! Note: this function uses MPI_ALLGATHERV. At this time, the size of the +! resulting matrix must be within the range of 4 bytes because of the +! restriction on MPI displacements to be 4 bytes. +! +! subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) #if defined(HAVE_ISO_FORTRAN_ENV) use iso_fortran_env diff --git a/base/comm/psb_zgather.f90 b/base/comm/psb_zgather.f90 index d22e6644..b8b9a6c8 100644 --- a/base/comm/psb_zgather.f90 +++ b/base/comm/psb_zgather.f90 @@ -31,14 +31,14 @@ ! ! File: psb_zgather.f90 ! -! Subroutine: psb_zgatherm -! This subroutine gathers pieces of a distributed dense matrix into a local one. +! Subroutine: psb_zgather_vect +! This subroutine gathers pieces of a distributed vector into a local one. ! ! Arguments: -! globx - complex,dimension(:,:). The local matrix into which gather +! globx - complex,dimension(:). The local matrix into which gather ! the distributed pieces. -! locx - complex,dimension(:,:). The local piece of the distributed -! matrix to be gathered. +! locx - type(psb_z_vect_type@ The local piece of the distributed +! vector to be gathered. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer. The process that has to own the @@ -159,6 +159,8 @@ subroutine psb_zgather_vect(globx, locx, desc_a, info, iroot) end subroutine psb_zgather_vect +! Subroutine: psb_zgather_multivect +! This subroutine gathers pieces of a distributed multivector into a local one. subroutine psb_zgather_multivect(globx, locx, desc_a, info, iroot) use psb_base_mod, psb_protect_name => psb_zgather_multivect diff --git a/base/comm/psb_zgather_a.f90 b/base/comm/psb_zgather_a.f90 index d2c9bd91..1c3838cb 100644 --- a/base/comm/psb_zgather_a.f90 +++ b/base/comm/psb_zgather_a.f90 @@ -31,7 +31,7 @@ ! ! File: psb_zgather.f90 ! -! Subroutine: psb_zgatherm +! Subroutine: psb_zgather ! This subroutine gathers pieces of a distributed dense matrix into a local one. ! ! Arguments: diff --git a/base/comm/psb_zhalo.f90 b/base/comm/psb_zhalo.f90 index 57fe43ee..595cbc03 100644 --- a/base/comm/psb_zhalo.f90 +++ b/base/comm/psb_zhalo.f90 @@ -31,17 +31,17 @@ ! ! File: psb_zhalo.f90 ! -! Subroutine: psb_zhalom +! Subroutine: psb_zhalo_vect ! This subroutine performs the exchange of the halo elements in a -! distributed dense matrix between all the processes. +! distributed vector between all the processes. ! ! Arguments: -! x - complex,dimension(:,:). The local part of the dense matrix. +! x - type(psb_z_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code ! jx - integer(optional). The starting column of the global matrix. ! ik - integer(optional). The number of columns to gather. -! work - complex(optional). Work area. +! work - complex(optional). Work area. ! tran - character(optional). Transpose exchange. ! mode - integer(optional). Communication mode (see Swapdata) ! data - integer Which index list in desc_a should be used @@ -52,7 +52,6 @@ ! psb_comm_mov_ use ovr_mst_idx ! ! - subroutine psb_zhalo_vect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_zhalo_vect use psi_mod @@ -185,7 +184,28 @@ subroutine psb_zhalo_vect(x,desc_a,info,work,tran,mode,data) return end subroutine psb_zhalo_vect - +! +! Subroutine: psb_zhalo_multivect +! This subroutine performs the exchange of the halo elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_z_multivect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The starting column of the global matrix. +! ik - integer(optional). The number of columns to gather. +! work - complex(optional). Work area. +! tran - character(optional). Transpose exchange. +! mode - integer(optional). Communication mode (see Swapdata) +! data - integer Which index list in desc_a should be used +! to retrieve rows, default psb_comm_halo_ +! psb_comm_halo_ use halo_index +! psb_comm_ext_ use ext_index +! psb_comm_ovrl_ use ovrl_index +! psb_comm_mov_ use ovr_mst_idx +! +! subroutine psb_zhalo_multivect(x,desc_a,info,work,tran,mode,data) use psb_base_mod, psb_protect_name => psb_zhalo_multivect use psi_mod diff --git a/base/comm/psb_zovrl.f90 b/base/comm/psb_zovrl.f90 index 66b42303..c7463e19 100644 --- a/base/comm/psb_zovrl.f90 +++ b/base/comm/psb_zovrl.f90 @@ -32,12 +32,12 @@ ! ! File: psb_zovrl.f90 ! -! Subroutine: psb_zovrlm +! Subroutine: psb_zovrl_vect ! This subroutine performs the exchange of the overlap elements in a -! distributed dense matrix between all the processes. +! distributed dense vector between all the processes. ! ! Arguments: -! x(:,:) - complex The local part of the dense matrix. +! x - type(psb_z_vect_type) The local part of the vector ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code. ! jx - integer(optional). The starting column of the global matrix @@ -180,7 +180,38 @@ subroutine psb_zovrl_vect(x,desc_a,info,work,update,mode) return end subroutine psb_zovrl_vect - +! +! Subroutine: psb_zovrl_multivect +! This subroutine performs the exchange of the overlap elements in a +! distributed multivector between all the processes. +! +! Arguments: +! x - type(psb_z_vect_type) The local part of the multivector +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code. +! jx - integer(optional). The starting column of the global matrix +! ik - integer(optional). The number of columns to gather. +! work - complex(optional). A work area. +! update - integer(optional). Type of update: +! psb_none_ do nothing +! psb_sum_ sum of overlaps +! psb_avg_ average of overlaps +! mode - integer(optional). Choose the algorithm for data exchange: +! this is chosen through bit fields. +! - swap_mpi = iand(flag,psb_swap_mpi_) /= 0 +! - swap_sync = iand(flag,psb_swap_sync_) /= 0 +! - swap_send = iand(flag,psb_swap_send_) /= 0 +! - swap_recv = iand(flag,psb_swap_recv_) /= 0 +! - if (swap_mpi): use underlying MPI_ALLTOALLV. +! - if (swap_sync): use PSB_SND and PSB_RCV in +! synchronized pairs +! - if (swap_send .and. swap_recv): use mpi_irecv +! and mpi_send +! - if (swap_send): use psb_snd (but need another +! call with swap_recv to complete) +! - if (swap_recv): use psb_rcv (completing a +! previous call with swap_send) +! subroutine psb_zovrl_multivect(x,desc_a,info,work,update,mode) use psb_base_mod, psb_protect_name => psb_zovrl_multivect use psi_mod diff --git a/base/comm/psb_zscatter.F90 b/base/comm/psb_zscatter.F90 index 008f8084..10c2c78d 100644 --- a/base/comm/psb_zscatter.F90 +++ b/base/comm/psb_zscatter.F90 @@ -31,13 +31,13 @@ ! ! File: psb_zscatter.f90 ! -! Subroutine: psb_zscatterm -! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! Subroutine: psb_zscatter_vect +! This subroutine scatters a global vector locally owned by one process +! into pieces that are local to all the processes. ! ! Arguments: -! globx - complex,dimension(:,:). The global matrix to scatter. -! locx - complex,dimension(:,:). The local piece of the distributed matrix. +! globx - complex,dimension(:) The global matrix to scatter. +! locx - type(psb_z_vect_type) The local piece of the distributed matrix. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Error code. ! iroot - integer(optional). The process that owns the global matrix. diff --git a/base/comm/psb_zscatter_a.F90 b/base/comm/psb_zscatter_a.F90 index 69e70d87..b206d8d3 100644 --- a/base/comm/psb_zscatter_a.F90 +++ b/base/comm/psb_zscatter_a.F90 @@ -33,7 +33,7 @@ ! ! Subroutine: psb_zscatterm ! This subroutine scatters a global matrix locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - complex,dimension(:,:). The global matrix to scatter. @@ -278,7 +278,7 @@ end subroutine psb_zscatterm ! Subroutine: psb_zscatterv ! This subroutine scatters a global vector locally owned by one process -! into pieces that are local to alle the processes. +! into pieces that are local to all the processes. ! ! Arguments: ! globx - complex,dimension(:). The global vector to scatter. diff --git a/base/comm/psb_zspgather.F90 b/base/comm/psb_zspgather.F90 index fbcae1f3..f4a27681 100644 --- a/base/comm/psb_zspgather.F90 +++ b/base/comm/psb_zspgather.F90 @@ -30,6 +30,17 @@ ! ! ! File: psb_zspgather.f90 +! +! Gathers a sparse matrix onto a single process. +! Two variants: +! 1. Gathers to PSB_z_SPARSE_MAT (i.e. to matrix with IPK_ indices) +! 2. Gathers to PSB_lz_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! +! Note: this function uses MPI_ALLGATHERV. At this time, the size of the +! resulting matrix must be within the range of 4 bytes because of the +! restriction on MPI displacements to be 4 bytes. +! +! subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc) #if defined(HAVE_ISO_FORTRAN_ENV) use iso_fortran_env diff --git a/base/psblas/psb_camax.f90 b/base/psblas/psb_camax.f90 index 526c5e40..0de31a0c 100644 --- a/base/psblas/psb_camax.f90 +++ b/base/psblas/psb_camax.f90 @@ -32,7 +32,7 @@ ! File: psb_camax.f90 ! ! Function: psb_camax -! Searches the absolute max of X. +! Computes the maximum absolute value of X ! ! normi := max(abs(sub(X)(i)) ! @@ -164,7 +164,7 @@ end function psb_camax !!$ ! ! Function: psb_camaxv -! Searches the absolute max of X. +! Computes the maximum absolute value of X. ! ! normi := max(abs(X(i)) ! @@ -252,6 +252,17 @@ function psb_camaxv (x,desc_a, info,global) result(res) return end function psb_camaxv +! +! Function: psb_camax_vect +! Computes the maximum absolute value of X. +! +! normi := max(abs(X(i)) +! +! Arguments: +! x - type(psb_c_vect_type) The input vector. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! function psb_camax_vect(x, desc_a, info,global) result(res) use psb_penv_mod @@ -374,17 +385,16 @@ end function psb_camax_vect !!$ ! ! Subroutine: psb_camaxvs -! Searches the absolute max of X. +! Computes the maximum absolute value of X, subroutine version ! ! normi := max(abs(sub(X)(i)) ! -! where sub( X ) denotes X(1:N,JX:). +! where sub( X ) denotes X(1:N). ! ! Arguments: ! res - real The result. -! x(:,:) - complex The input vector. +! x(:) - complex The input vector. ! desc_a - type(psb_desc_type). The communication descriptor. -! info - integer. Return code ! jx - integer(optional). The column offset. ! subroutine psb_camaxvs(res,x,desc_a, info,global) diff --git a/base/psblas/psb_casum.f90 b/base/psblas/psb_casum.f90 index 2b0beda5..a0bf5262 100644 --- a/base/psblas/psb_casum.f90 +++ b/base/psblas/psb_casum.f90 @@ -135,6 +135,19 @@ function psb_casum (x,desc_a, info, jx,global) result(res) return end function psb_casum +! Function: psb_casum_vect +! Computes norm1 of X +! +! norm1 := sum(sub( X )(i)) +! +! where sub( X ) denotes X(1:N,JX:). +! +! Arguments: +! x - type(psb_c_vect_type) The input vector. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The column offset. +! function psb_casum_vect(x, desc_a, info,global) result(res) use psb_base_mod, psb_protect_name => psb_casum_vect diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index ab00bd60..daa47829 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -31,6 +31,23 @@ ! ! File: psb_caxpby.f90 +! +! Subroutine: psb_caxpby_vect +! Adds one distributed vector to another, +! +! Y := beta * Y + alpha * X +! +! Arguments: +! alpha - complex,input The scalar used to multiply each component of X +! x - type(psb_c_vect_type) The input vector containing the entries of X +! beta - complex,input The scalar used to multiply each component of Y +! y - type(psb_c_vect_type) The input/output vector Y +! desc_a - type(psb_desc_type) The communication descriptor. +! info - integer Return code +! +! Note: from a functional point of view, X is input, but here +! it's declared INOUT because of the sync() methods. +! subroutine psb_caxpby_vect(alpha, x, beta, y,& & desc_a, info) use psb_base_mod, psb_protect_name => psb_caxpby_vect @@ -269,7 +286,7 @@ end subroutine psb_caxpby !!$ ! ! Subroutine: psb_caxpbyv -! Adds one distributed matrix to another, +! Adds one distributed vector to another, ! ! Y := beta * Y + alpha * X ! diff --git a/base/psblas/psb_cdot.f90 b/base/psblas/psb_cdot.f90 index 39cda431..5432eb32 100644 --- a/base/psblas/psb_cdot.f90 +++ b/base/psblas/psb_cdot.f90 @@ -31,22 +31,22 @@ ! ! File: psb_cdot.f90 ! -! Function: psb_cdot -! psb_cdot forms the dot product of two distributed vectors, +! Function: psb_cdot_vect +! psb_cdot computes the dot product of two distributed vectors, ! -! dot := sub( X )**C * sub( Y ) +! dot := ( X )**C * ( Y ) ! -! where sub( X ) denotes X(:,JX) -! -! sub( Y ) denotes Y(:,JY). ! ! Arguments: -! x(:,:) - complex The input vector containing the entries of sub( X ). -! y(:,:) - complex The input vector containing the entries of sub( Y ). +! x - type(psb_c_vect_type) The input vector containing the entries of sub( X ). +! y - type(psb_c_vect_type) The input vector containing the entries of sub( Y ). ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code -! jx - integer(optional). The column offset for sub( X ). -! jy - integer(optional). The column offset for sub( Y ). +! global - logical(optional) Whether to perform the global sum, default: .true. +! +! Note: from a functional point of view, X and Y are input, but here +! they are declared INOUT because of the sync() methods. +! ! function psb_cdot_vect(x, y, desc_a,info,global) result(res) use psb_desc_mod @@ -156,7 +156,25 @@ function psb_cdot_vect(x, y, desc_a,info,global) result(res) return end function psb_cdot_vect - +! +! Function: psb_cdot +! psb_cdot computes the dot product of two distributed vectors, +! +! dot := sub( X )**C * sub( Y ) +! +! where sub( X ) denotes X(:,JX) +! +! sub( Y ) denotes Y(:,JY). +! +! Arguments: +! x(:,:) - complex The input vector containing the entries of sub( X ). +! y(:,:) - complex The input vector containing the entries of sub( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The column offset for sub( X ). +! jy - integer(optional). The column offset for sub( Y ). +! global - logical(optional) Whether to perform the global sum, default: .true. +! function psb_cdot(x, y,desc_a, info, jx, jy,global) result(res) use psb_base_mod, psb_protect_name => psb_cdot implicit none @@ -298,7 +316,7 @@ end function psb_cdot !!$ ! ! Function: psb_cdotv -! psb_cdotv forms the dot product of two distributed vectors, +! psb_cdotv computes the dot product of two distributed vectors, ! ! dot := X**C * Y ! @@ -307,6 +325,7 @@ end function psb_cdot ! y(:) - complex The input vector containing the entries of Y. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! function psb_cdotv(x, y,desc_a, info,global) result(res) use psb_base_mod, psb_protect_name => psb_cdotv @@ -432,7 +451,7 @@ end function psb_cdotv !!$ ! ! Subroutine: psb_cdotvs -! psb_cdotvs forms the dot product of two distributed vectors, +! psb_cdotvs computes the dot product of two distributed vectors, ! ! res := X**C * Y ! @@ -442,6 +461,7 @@ end function psb_cdotv ! y(:) - complex The input vector containing the entries of Y. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! subroutine psb_cdotvs(res, x, y,desc_a, info,global) use psb_base_mod, psb_protect_name => psb_cdotvs @@ -565,7 +585,7 @@ end subroutine psb_cdotvs !!$ ! ! Subroutine: psb_cmdots -! psb_cmdots forms the dot product of multiple distributed vectors, +! psb_cmdots computes the dot product of multiple distributed vectors, ! ! res(i) := ( X(:,i) )**C * ( Y(:,i) ) ! @@ -575,6 +595,7 @@ end subroutine psb_cdotvs ! y(:) - complex The input vector containing the entries of sub( Y ). ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! subroutine psb_cmdots(res, x, y, desc_a, info,global) use psb_base_mod, psb_protect_name => psb_cmdots diff --git a/base/psblas/psb_cnrm2.f90 b/base/psblas/psb_cnrm2.f90 index 43ab876c..1db5773a 100644 --- a/base/psblas/psb_cnrm2.f90 +++ b/base/psblas/psb_cnrm2.f90 @@ -32,9 +32,9 @@ ! File: psb_cnrm2.f90 ! ! Function: psb_cnrm2 -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, ! -! norm2 := sqrt ( sub( X )**T * sub( X ) ) +! norm2 := sqrt ( sub( X )**C * sub( X ) ) ! ! where sub( X ) denotes X(:,JX). ! @@ -43,6 +43,7 @@ ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code ! jx - integer(optional). The column offset for sub( X ). +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_cnrm2(x, desc_a, info, jx,global) result(res) use psb_desc_mod @@ -170,14 +171,15 @@ end function psb_cnrm2 !!$ ! ! Function: psb_cnrm2 -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, ! -! norm2 := sqrt ( X**T * X) +! norm2 := sqrt ( X**C * X) ! ! Arguments: ! x(:) - complex The input vector containing the entries of X. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_cnrm2v(x, desc_a, info,global) result(res) use psb_desc_mod @@ -263,7 +265,17 @@ function psb_cnrm2v(x, desc_a, info,global) result(res) end function psb_cnrm2v - +! Function: psb_cnrm2_vect +! Computes the norm2 of a distributed vector, +! +! norm2 := sqrt ( X**C * X) +! +! Arguments: +! x - type(psb_c_vect_type) The input vector containing the entries of X. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. +! function psb_cnrm2_vect(x, desc_a, info,global) result(res) use psb_desc_mod use psb_check_mod @@ -344,7 +356,7 @@ function psb_cnrm2_vect(x, desc_a, info,global) result(res) res = res - sqrt(cone - dd*(abs(x%v%v(idx))/res)**2) end do end if - else + else res = szero end if @@ -392,15 +404,16 @@ end function psb_cnrm2_vect !!$ ! ! Subroutine: psb_cnrm2vs -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, subroutine version ! -! norm2 := sqrt ( X**T * X) +! norm2 := sqrt ( X**C * X) ! ! Arguments: ! res - real The result. ! x(:) - complex The input vector containing the entries of X. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! subroutine psb_cnrm2vs(res, x, desc_a, info,global) use psb_desc_mod diff --git a/base/psblas/psb_cnrmi.f90 b/base/psblas/psb_cnrmi.f90 index 0ac0a04b..0abece53 100644 --- a/base/psblas/psb_cnrmi.f90 +++ b/base/psblas/psb_cnrmi.f90 @@ -32,14 +32,15 @@ ! File: psb_cnrmi.f90 ! ! Function: psb_cnrmi -! Forms the infinity norm of a sparse matrix, +! Computes the infinity norm of a sparse matrix, ! -! nrmi := max_i(abs(sum(A(i,:)))) +! nrmi := max_i(sum(abs(A(i,:)))) ! ! Arguments: ! a - type(psb_cspmat_type). The sparse matrix containing A. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_cnrmi(a,desc_a,info,global) result(res) use psb_base_mod, psb_protect_name => psb_cnrmi diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index 3d1d3b7d..5a059647 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -31,6 +31,235 @@ ! ! File: psb_cspmm.f90 ! +! +! Subroutine: psb_cspm_vect +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A' * Pr * X + beta * Y, +! +! alpha and beta are scalars, X and Y are distributed +! vectors and A is a M-by-N distributed matrix. +! +! Arguments: +! alpha - complex The scalar alpha. +! a - type(psb_cspmat_type). The sparse matrix containing A. +! x - type(psb_c_vect_type) The input vector containing the entries of ( X ). +! beta - complex The scalar beta. +! y - type(psb_c_vect_type) The input vector containing the entries of ( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! trans - character(optional). Whether A or A'. Default: 'N' +! work(:) - complex,(optional). Working area. +! doswap - logical(optional). Whether to performe halo updates. +! +subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& + & trans, work, doswap) + use psb_base_mod, psb_protect_name => psb_cspmv_vect + use psi_mod + implicit none + + complex(psb_spk_), intent(in) :: alpha, beta + type(psb_c_vect_type), intent(inout) :: x + type(psb_c_vect_type), intent(inout) :: y + type(psb_cspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + complex(psb_spk_), optional, target, intent(inout) :: work(:) + character, intent(in), optional :: trans + logical, intent(in), optional :: doswap + + ! locals + integer(psb_ipk_) :: ictxt, np, me,& + & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, ib, ip, idx + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja + integer(psb_ipk_), parameter :: nb=4 + complex(psb_spk_), pointer :: iwork(:), xp(:), yp(:) + complex(psb_spk_), allocatable :: xvsave(:) + character :: trans_ + character(len=20) :: name, ch_err + logical :: aliw, doswap_ + integer(psb_ipk_) :: debug_level, debug_unit + + name='psb_cspmv' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt=desc_a%get_context() + call psb_info(ictxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(doswap)) then + doswap_ = doswap + else + doswap_ = .true. + endif + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + endif + if ( (trans_ == 'N').or.(trans_ == 'T')& + & .or.(trans_ == 'C')) then + else + info = psb_err_iarg_invalid_value_ + call psb_errpush(info,name) + goto 9999 + end if + + m = desc_a%get_global_rows() + n = desc_a%get_global_cols() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + lldx = x%get_nrows() + lldy = y%get_nrows() + if ((info == 0).and.(lldx null() + ! check for presence/size of a work area + liwork= 2*ncol + + if (present(work)) then + if (size(work) >= liwork) then + aliw =.false. + else + aliw=.true. + endif + else + aliw=.true. + end if + + if (aliw) then + allocate(iwork(liwork),stat=info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='Allocate' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + iwork => work + endif + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' Allocated work ', info + + if (trans_ == 'N') then + ! Matrix is not transposed + + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + + if(info /= psb_success_) then + info = psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + else + ! Matrix is transposed + + ! + ! Non-empty overlap, need a buffer to hold + ! the entries updated with average operator. + ! Why the average? because in this way they will contribute + ! with a proper scale factor (1/np) to the overall product. + ! + call psi_ovrl_save(x%v,xvsave,desc_a,info) + if (info == psb_success_) call psi_ovrl_upd(x%v,desc_a,psb_avg_,info) + + if (beta /= czero) call y%set(czero,nrow+1,ncol) + ! local Matrix-vector product + if (info == psb_success_) call psb_csmm(alpha,a,x,beta,y,info,trans=trans_) + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' csmm ', info + + if (info == psb_success_) call psi_ovrl_restore(x%v,xvsave,desc_a,info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='psb_csmm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (doswap_) then + call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& + & cone,y%v,desc_a,iwork,info) + if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & cone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' swaptran ', info + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='PSI_SwapTran' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + end if + + if (aliw) deallocate(iwork,stat=info) + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' deallocat ',aliw, info + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='Deallocate iwork' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nullify(iwork) + + call psb_erractionrestore(err_act) + if (debug_level >= psb_debug_comp_) then + call psb_barrier(ictxt) + write(debug_unit,*) me,' ',trim(name),' Returning ' + endif + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psb_cspmv_vect +! ! Subroutine: psb_cspmm ! Performs one of the distributed matrix-vector operations ! @@ -356,10 +585,6 @@ subroutine psb_cspmm(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_cspmm - - - - !!$ !!$ Parallel Sparse BLAS version 3.5 !!$ (C) Copyright 2006-2018 @@ -670,211 +895,3 @@ subroutine psb_cspmv(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_cspmv - - - -subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& - & trans, work, doswap) - use psb_base_mod, psb_protect_name => psb_cspmv_vect - use psi_mod - implicit none - - complex(psb_spk_), intent(in) :: alpha, beta - type(psb_c_vect_type), intent(inout) :: x - type(psb_c_vect_type), intent(inout) :: y - type(psb_cspmat_type), intent(in) :: a - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - complex(psb_spk_), optional, target, intent(inout) :: work(:) - character, intent(in), optional :: trans - logical, intent(in), optional :: doswap - - ! locals - integer(psb_ipk_) :: ictxt, np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, ib, ip, idx - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja - integer(psb_ipk_), parameter :: nb=4 - complex(psb_spk_), pointer :: iwork(:), xp(:), yp(:) - complex(psb_spk_), allocatable :: xvsave(:) - character :: trans_ - character(len=20) :: name, ch_err - logical :: aliw, doswap_ - integer(psb_ipk_) :: debug_level, debug_unit - - name='psb_cspmv' - info=psb_success_ - call psb_erractionsave(err_act) - if (psb_errstatus_fatal()) then - info = psb_err_internal_error_ ; goto 9999 - end if - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - ictxt=desc_a%get_context() - call psb_info(ictxt, me, np) - if (np == -1) then - info = psb_err_context_error_ - call psb_errpush(info,name) - goto 9999 - endif - - if (.not.allocated(x%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - if (.not.allocated(y%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - - if (present(doswap)) then - doswap_ = doswap - else - doswap_ = .true. - endif - - if (present(trans)) then - trans_ = psb_toupper(trans) - else - trans_ = 'N' - endif - if ( (trans_ == 'N').or.(trans_ == 'T')& - & .or.(trans_ == 'C')) then - else - info = psb_err_iarg_invalid_value_ - call psb_errpush(info,name) - goto 9999 - end if - - m = desc_a%get_global_rows() - n = desc_a%get_global_cols() - nrow = desc_a%get_local_rows() - ncol = desc_a%get_local_cols() - lldx = x%get_nrows() - lldy = y%get_nrows() - if ((info == 0).and.(lldx null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='Allocate' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' Allocated work ', info - - if (trans_ == 'N') then - ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - - call psb_csmm(alpha,a,x,beta,y,info) - - if(info /= psb_success_) then - info = psb_err_from_subroutine_non_ - call psb_errpush(info,name) - goto 9999 - end if - - else - ! Matrix is transposed - - ! - ! Non-empty overlap, need a buffer to hold - ! the entries updated with average operator. - ! Why the average? because in this way they will contribute - ! with a proper scale factor (1/np) to the overall product. - ! - call psi_ovrl_save(x%v,xvsave,desc_a,info) - if (info == psb_success_) call psi_ovrl_upd(x%v,desc_a,psb_avg_,info) - - if (beta /= czero) call y%set(czero,nrow+1,ncol) - ! local Matrix-vector product - if (info == psb_success_) call psb_csmm(alpha,a,x,beta,y,info,trans=trans_) - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' csmm ', info - - if (info == psb_success_) call psi_ovrl_restore(x%v,xvsave,desc_a,info) - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='psb_csmm' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - if (doswap_) then - call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& - & cone,y%v,desc_a,iwork,info) - if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & cone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' swaptran ', info - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='PSI_SwapTran' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - end if - - end if - - if (aliw) deallocate(iwork,stat=info) - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' deallocat ',aliw, info - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='Deallocate iwork' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - nullify(iwork) - - call psb_erractionrestore(err_act) - if (debug_level >= psb_debug_comp_) then - call psb_barrier(ictxt) - write(debug_unit,*) me,' ',trim(name),' Returning ' - endif - return - -9999 call psb_error_handler(ictxt,err_act) - - return -end subroutine psb_cspmv_vect diff --git a/base/psblas/psb_cspnrm1.f90 b/base/psblas/psb_cspnrm1.f90 index a5076a0a..c49fdcc1 100644 --- a/base/psblas/psb_cspnrm1.f90 +++ b/base/psblas/psb_cspnrm1.f90 @@ -40,6 +40,7 @@ ! a - type(psb_cspmat_type). The sparse matrix containing A. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_cspnrm1(a,desc_a,info,global) result(res) use psb_base_mod, psb_protect_name => psb_cspnrm1 diff --git a/base/psblas/psb_cspsm.f90 b/base/psblas/psb_cspsm.f90 index a8e0c570..3fc8138c 100644 --- a/base/psblas/psb_cspsm.f90 +++ b/base/psblas/psb_cspsm.f90 @@ -31,6 +31,203 @@ ! ! File: psb_cspsm.f90 ! +! +! Subroutine: psb_csps_vect +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A-1 * Pc * X + beta * Y, or +! +! Y := alpha * D * Pr * A-1 * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A-1 * Pc * D * X + beta * Y, or +! +! Y := alpha * Pr * A-T * Pc * X + beta * Y, or +! +! Y := alpha * D * Pr * A-T * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A-T * Pc * D * X + beta * Y, or +! +! X is a distributed +! vector and T is a M-by-M distributed triangular matrix. +! +! +! Arguments: +! alpha - complex. The scalar alpha. +! a - type(psb_cspmat_type). The sparse matrix containing A. +! x - type(psb_c_vect_type) The input vector containing the entries of ( X ). +! beta - complex The scalar beta. +! y - type(psb_c_vect_type) The input vector containing the entries of ( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! trans - character(optional). Whether A or A'. If not present 'N' is assumed. +! scale - character(optional). Specify some type of operation with +! the diagonal matrix D. +! choice - integer(optional). The kind of update to perform on overlap elements. +! d(:) - complex, optional Matrix for diagonal scaling. +! work(:) - complex, optional Working area. +! +subroutine psb_cspsv_vect(alpha,a,x,beta,y,desc_a,info,& + & trans, scale, choice, diag, work) + use psb_base_mod, psb_protect_name => psb_cspsv_vect + use psi_mod + implicit none + + complex(psb_spk_), intent(in) :: alpha, beta + type(psb_c_vect_type), intent(inout) :: x + type(psb_c_vect_type), intent(inout) :: y + type(psb_cspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + type(psb_c_vect_type), intent(inout), optional :: diag + complex(psb_spk_), optional, target, intent(inout) :: work(:) + character, intent(in), optional :: trans, scale + integer(psb_ipk_), intent(in), optional :: choice + + ! locals + integer(psb_ipk_) :: ictxt, np, me, & + & err_act, iix, jjx, ia, ja, iia, jja, lldx,lldy, choice_,& + & ix, iy, ik, jx, jy, i, lld,& + & m, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm + + character :: lscale + integer(psb_ipk_), parameter :: nb=4 + complex(psb_spk_),pointer :: iwork(:), xp(:), yp(:) + character :: itrans + character(len=20) :: name, ch_err + logical :: aliw + + name='psb_cspsv_vect' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + + ictxt=desc_a%get_context() + + call psb_info(ictxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + if (present(choice)) then + choice_ = choice + else + choice_ = psb_avg_ + endif + + if (present(scale)) then + lscale = psb_toupper(scale) + else + lscale = 'U' + endif + + if (present(trans)) then + itrans = psb_toupper(trans) + if((itrans == 'N').or.(itrans == 'T').or.(itrans == 'C')) then + ! Ok + else + info = psb_err_iarg_invalid_value_ + call psb_errpush(info,name) + goto 9999 + end if + else + itrans = 'N' + endif + + m = desc_a%get_global_rows() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + lldx = x%get_nrows() + lldy = y%get_nrows() + if ((info == 0).and.(lldx null() + ! check for presence/size of a work area + liwork= 2*ncol + + if (present(work)) then + if (size(work) >= liwork) then + aliw =.false. + else + aliw=.true. + endif + else + aliw=.true. + end if + + if (aliw) then + allocate(iwork(liwork),stat=info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + iwork => work + endif + + iwork(1)=0.d0 + + ! Perform local triangular system solve + if (present(diag)) then + call a%spsm(alpha,x,beta,y,info,scale=scale,d=diag,trans=trans) + else + call a%spsm(alpha,x,beta,y,info,scale=scale,trans=trans) + end if + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='dcssm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! update overlap elements + if (choice_ > 0) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & cone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) + + + if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') + goto 9999 + end if + end if + + if (aliw) deallocate(iwork) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psb_cspsv_vect +! ! Subroutine: psb_cspsm ! Performs one of the distributed matrix-vector operations ! @@ -283,38 +480,6 @@ subroutine psb_cspsm(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_cspsm - -!!$ -!!$ Parallel Sparse BLAS version 3.5 -!!$ (C) Copyright 2006-2018 -!!$ Salvatore Filippone University of Rome Tor Vergata -!!$ Alfredo Buttari -!!$ -!!$ Redistribution and use in source and binary forms, with or without -!!$ modification, are permitted provided that the following conditions -!!$ are met: -!!$ 1. Redistributions of source code must retain the above copyright -!!$ notice, this list of conditions and the following disclaimer. -!!$ 2. Redistributions in binary form must reproduce the above copyright -!!$ notice, this list of conditions, and the following disclaimer in the -!!$ documentation and/or other materials provided with the distribution. -!!$ 3. The name of the PSBLAS group or the names of its contributors may -!!$ not be used to endorse or promote products derived from this -!!$ software without specific written permission. -!!$ -!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -!!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ ! ! Subroutine: psb_cspsv ! Performs one of the distributed matrix-vector operations @@ -545,166 +710,3 @@ subroutine psb_cspsv(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_cspsv - -subroutine psb_cspsv_vect(alpha,a,x,beta,y,desc_a,info,& - & trans, scale, choice, diag, work) - use psb_base_mod, psb_protect_name => psb_cspsv_vect - use psi_mod - implicit none - - complex(psb_spk_), intent(in) :: alpha, beta - type(psb_c_vect_type), intent(inout) :: x - type(psb_c_vect_type), intent(inout) :: y - type(psb_cspmat_type), intent(inout) :: a - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - type(psb_c_vect_type), intent(inout), optional :: diag - complex(psb_spk_), optional, target, intent(inout) :: work(:) - character, intent(in), optional :: trans, scale - integer(psb_ipk_), intent(in), optional :: choice - - ! locals - integer(psb_ipk_) :: ictxt, np, me, & - & err_act, iix, jjx, ia, ja, iia, jja, lldx,lldy, choice_,& - & ix, iy, ik, jx, jy, i, lld,& - & m, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm - - character :: lscale - integer(psb_ipk_), parameter :: nb=4 - complex(psb_spk_),pointer :: iwork(:), xp(:), yp(:) - character :: itrans - character(len=20) :: name, ch_err - logical :: aliw - - name='psb_sspsv' - info=psb_success_ - call psb_erractionsave(err_act) - if (psb_errstatus_fatal()) then - info = psb_err_internal_error_ ; goto 9999 - end if - - ictxt=desc_a%get_context() - - call psb_info(ictxt, me, np) - if (np == -1) then - info = psb_err_context_error_ - call psb_errpush(info,name) - goto 9999 - endif - - if (.not.allocated(x%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - if (.not.allocated(y%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - - - if (present(choice)) then - choice_ = choice - else - choice_ = psb_avg_ - endif - - if (present(scale)) then - lscale = psb_toupper(scale) - else - lscale = 'U' - endif - - if (present(trans)) then - itrans = psb_toupper(trans) - if((itrans == 'N').or.(itrans == 'T').or.(itrans == 'C')) then - ! Ok - else - info = psb_err_iarg_invalid_value_ - call psb_errpush(info,name) - goto 9999 - end if - else - itrans = 'N' - endif - - m = desc_a%get_global_rows() - nrow = desc_a%get_local_rows() - ncol = desc_a%get_local_cols() - lldx = x%get_nrows() - lldy = y%get_nrows() - if ((info == 0).and.(lldx null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_realloc' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - - iwork(1)=0.d0 - - ! Perform local triangular system solve - if (present(diag)) then - call a%spsm(alpha,x,beta,y,info,scale=scale,d=diag,trans=trans) - else - call a%spsm(alpha,x,beta,y,info,scale=scale,trans=trans) - end if - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='dcssm' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - ! update overlap elements - if (choice_ > 0) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & cone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - - - if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') - goto 9999 - end if - end if - - if (aliw) deallocate(iwork) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(ictxt,err_act) - - return -end subroutine psb_cspsv_vect - diff --git a/base/psblas/psb_damax.f90 b/base/psblas/psb_damax.f90 index 33dd2dc9..0f3ea0b6 100644 --- a/base/psblas/psb_damax.f90 +++ b/base/psblas/psb_damax.f90 @@ -32,7 +32,7 @@ ! File: psb_damax.f90 ! ! Function: psb_damax -! Searches the absolute max of X. +! Computes the maximum absolute value of X ! ! normi := max(abs(sub(X)(i)) ! @@ -164,7 +164,7 @@ end function psb_damax !!$ ! ! Function: psb_damaxv -! Searches the absolute max of X. +! Computes the maximum absolute value of X. ! ! normi := max(abs(X(i)) ! @@ -252,6 +252,17 @@ function psb_damaxv (x,desc_a, info,global) result(res) return end function psb_damaxv +! +! Function: psb_damax_vect +! Computes the maximum absolute value of X. +! +! normi := max(abs(X(i)) +! +! Arguments: +! x - type(psb_d_vect_type) The input vector. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! function psb_damax_vect(x, desc_a, info,global) result(res) use psb_penv_mod @@ -374,17 +385,16 @@ end function psb_damax_vect !!$ ! ! Subroutine: psb_damaxvs -! Searches the absolute max of X. +! Computes the maximum absolute value of X, subroutine version ! ! normi := max(abs(sub(X)(i)) ! -! where sub( X ) denotes X(1:N,JX:). +! where sub( X ) denotes X(1:N). ! ! Arguments: ! res - real The result. -! x(:,:) - real The input vector. +! x(:) - real The input vector. ! desc_a - type(psb_desc_type). The communication descriptor. -! info - integer. Return code ! jx - integer(optional). The column offset. ! subroutine psb_damaxvs(res,x,desc_a, info,global) diff --git a/base/psblas/psb_dasum.f90 b/base/psblas/psb_dasum.f90 index cf2d8fe3..5de367f7 100644 --- a/base/psblas/psb_dasum.f90 +++ b/base/psblas/psb_dasum.f90 @@ -135,6 +135,19 @@ function psb_dasum (x,desc_a, info, jx,global) result(res) return end function psb_dasum +! Function: psb_dasum_vect +! Computes norm1 of X +! +! norm1 := sum(sub( X )(i)) +! +! where sub( X ) denotes X(1:N,JX:). +! +! Arguments: +! x - type(psb_d_vect_type) The input vector. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The column offset. +! function psb_dasum_vect(x, desc_a, info,global) result(res) use psb_base_mod, psb_protect_name => psb_dasum_vect diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index b1c83d3a..73f62881 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -31,6 +31,23 @@ ! ! File: psb_daxpby.f90 +! +! Subroutine: psb_daxpby_vect +! Adds one distributed vector to another, +! +! Y := beta * Y + alpha * X +! +! Arguments: +! alpha - real,input The scalar used to multiply each component of X +! x - type(psb_d_vect_type) The input vector containing the entries of X +! beta - real,input The scalar used to multiply each component of Y +! y - type(psb_d_vect_type) The input/output vector Y +! desc_a - type(psb_desc_type) The communication descriptor. +! info - integer Return code +! +! Note: from a functional point of view, X is input, but here +! it's declared INOUT because of the sync() methods. +! subroutine psb_daxpby_vect(alpha, x, beta, y,& & desc_a, info) use psb_base_mod, psb_protect_name => psb_daxpby_vect @@ -269,7 +286,7 @@ end subroutine psb_daxpby !!$ ! ! Subroutine: psb_daxpbyv -! Adds one distributed matrix to another, +! Adds one distributed vector to another, ! ! Y := beta * Y + alpha * X ! diff --git a/base/psblas/psb_ddot.f90 b/base/psblas/psb_ddot.f90 index c2231d8a..ba1d9619 100644 --- a/base/psblas/psb_ddot.f90 +++ b/base/psblas/psb_ddot.f90 @@ -31,22 +31,22 @@ ! ! File: psb_ddot.f90 ! -! Function: psb_ddot -! psb_ddot forms the dot product of two distributed vectors, +! Function: psb_ddot_vect +! psb_ddot computes the dot product of two distributed vectors, ! -! dot := sub( X )**C * sub( Y ) +! dot := ( X )**C * ( Y ) ! -! where sub( X ) denotes X(:,JX) -! -! sub( Y ) denotes Y(:,JY). ! ! Arguments: -! x(:,:) - complex The input vector containing the entries of sub( X ). -! y(:,:) - complex The input vector containing the entries of sub( Y ). +! x - type(psb_d_vect_type) The input vector containing the entries of sub( X ). +! y - type(psb_d_vect_type) The input vector containing the entries of sub( Y ). ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code -! jx - integer(optional). The column offset for sub( X ). -! jy - integer(optional). The column offset for sub( Y ). +! global - logical(optional) Whether to perform the global sum, default: .true. +! +! Note: from a functional point of view, X and Y are input, but here +! they are declared INOUT because of the sync() methods. +! ! function psb_ddot_vect(x, y, desc_a,info,global) result(res) use psb_desc_mod @@ -156,7 +156,25 @@ function psb_ddot_vect(x, y, desc_a,info,global) result(res) return end function psb_ddot_vect - +! +! Function: psb_ddot +! psb_ddot computes the dot product of two distributed vectors, +! +! dot := sub( X )**C * sub( Y ) +! +! where sub( X ) denotes X(:,JX) +! +! sub( Y ) denotes Y(:,JY). +! +! Arguments: +! x(:,:) - real The input vector containing the entries of sub( X ). +! y(:,:) - real The input vector containing the entries of sub( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The column offset for sub( X ). +! jy - integer(optional). The column offset for sub( Y ). +! global - logical(optional) Whether to perform the global sum, default: .true. +! function psb_ddot(x, y,desc_a, info, jx, jy,global) result(res) use psb_base_mod, psb_protect_name => psb_ddot implicit none @@ -298,7 +316,7 @@ end function psb_ddot !!$ ! ! Function: psb_ddotv -! psb_ddotv forms the dot product of two distributed vectors, +! psb_ddotv computes the dot product of two distributed vectors, ! ! dot := X**C * Y ! @@ -307,6 +325,7 @@ end function psb_ddot ! y(:) - real The input vector containing the entries of Y. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! function psb_ddotv(x, y,desc_a, info,global) result(res) use psb_base_mod, psb_protect_name => psb_ddotv @@ -432,7 +451,7 @@ end function psb_ddotv !!$ ! ! Subroutine: psb_ddotvs -! psb_ddotvs forms the dot product of two distributed vectors, +! psb_ddotvs computes the dot product of two distributed vectors, ! ! res := X**C * Y ! @@ -442,6 +461,7 @@ end function psb_ddotv ! y(:) - real The input vector containing the entries of Y. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! subroutine psb_ddotvs(res, x, y,desc_a, info,global) use psb_base_mod, psb_protect_name => psb_ddotvs @@ -565,7 +585,7 @@ end subroutine psb_ddotvs !!$ ! ! Subroutine: psb_dmdots -! psb_dmdots forms the dot product of multiple distributed vectors, +! psb_dmdots computes the dot product of multiple distributed vectors, ! ! res(i) := ( X(:,i) )**C * ( Y(:,i) ) ! @@ -575,6 +595,7 @@ end subroutine psb_ddotvs ! y(:) - real The input vector containing the entries of sub( Y ). ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! subroutine psb_dmdots(res, x, y, desc_a, info,global) use psb_base_mod, psb_protect_name => psb_dmdots diff --git a/base/psblas/psb_dnrm2.f90 b/base/psblas/psb_dnrm2.f90 index b72a72ee..0845aac4 100644 --- a/base/psblas/psb_dnrm2.f90 +++ b/base/psblas/psb_dnrm2.f90 @@ -32,9 +32,9 @@ ! File: psb_dnrm2.f90 ! ! Function: psb_dnrm2 -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, ! -! norm2 := sqrt ( sub( X )**T * sub( X ) ) +! norm2 := sqrt ( sub( X )**C * sub( X ) ) ! ! where sub( X ) denotes X(:,JX). ! @@ -43,6 +43,7 @@ ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code ! jx - integer(optional). The column offset for sub( X ). +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_dnrm2(x, desc_a, info, jx,global) result(res) use psb_desc_mod @@ -170,14 +171,15 @@ end function psb_dnrm2 !!$ ! ! Function: psb_dnrm2 -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, ! -! norm2 := sqrt ( X**T * X) +! norm2 := sqrt ( X**C * X) ! ! Arguments: ! x(:) - real The input vector containing the entries of X. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_dnrm2v(x, desc_a, info,global) result(res) use psb_desc_mod @@ -263,7 +265,17 @@ function psb_dnrm2v(x, desc_a, info,global) result(res) end function psb_dnrm2v - +! Function: psb_dnrm2_vect +! Computes the norm2 of a distributed vector, +! +! norm2 := sqrt ( X**C * X) +! +! Arguments: +! x - type(psb_d_vect_type) The input vector containing the entries of X. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. +! function psb_dnrm2_vect(x, desc_a, info,global) result(res) use psb_desc_mod use psb_check_mod @@ -344,7 +356,7 @@ function psb_dnrm2_vect(x, desc_a, info,global) result(res) res = res - sqrt(done - dd*(abs(x%v%v(idx))/res)**2) end do end if - else + else res = dzero end if @@ -392,15 +404,16 @@ end function psb_dnrm2_vect !!$ ! ! Subroutine: psb_dnrm2vs -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, subroutine version ! -! norm2 := sqrt ( X**T * X) +! norm2 := sqrt ( X**C * X) ! ! Arguments: ! res - real The result. ! x(:) - real The input vector containing the entries of X. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! subroutine psb_dnrm2vs(res, x, desc_a, info,global) use psb_desc_mod diff --git a/base/psblas/psb_dnrmi.f90 b/base/psblas/psb_dnrmi.f90 index 6d585981..a6a97751 100644 --- a/base/psblas/psb_dnrmi.f90 +++ b/base/psblas/psb_dnrmi.f90 @@ -32,14 +32,15 @@ ! File: psb_dnrmi.f90 ! ! Function: psb_dnrmi -! Forms the infinity norm of a sparse matrix, +! Computes the infinity norm of a sparse matrix, ! -! nrmi := max_i(abs(sum(A(i,:)))) +! nrmi := max_i(sum(abs(A(i,:)))) ! ! Arguments: ! a - type(psb_dspmat_type). The sparse matrix containing A. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_dnrmi(a,desc_a,info,global) result(res) use psb_base_mod, psb_protect_name => psb_dnrmi diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index bf262541..1fdf6171 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -31,6 +31,235 @@ ! ! File: psb_dspmm.f90 ! +! +! Subroutine: psb_dspm_vect +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A' * Pr * X + beta * Y, +! +! alpha and beta are scalars, X and Y are distributed +! vectors and A is a M-by-N distributed matrix. +! +! Arguments: +! alpha - real The scalar alpha. +! a - type(psb_dspmat_type). The sparse matrix containing A. +! x - type(psb_d_vect_type) The input vector containing the entries of ( X ). +! beta - real The scalar beta. +! y - type(psb_d_vect_type) The input vector containing the entries of ( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! trans - character(optional). Whether A or A'. Default: 'N' +! work(:) - real,(optional). Working area. +! doswap - logical(optional). Whether to performe halo updates. +! +subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& + & trans, work, doswap) + use psb_base_mod, psb_protect_name => psb_dspmv_vect + use psi_mod + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + type(psb_d_vect_type), intent(inout) :: x + type(psb_d_vect_type), intent(inout) :: y + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + real(psb_dpk_), optional, target, intent(inout) :: work(:) + character, intent(in), optional :: trans + logical, intent(in), optional :: doswap + + ! locals + integer(psb_ipk_) :: ictxt, np, me,& + & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, ib, ip, idx + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja + integer(psb_ipk_), parameter :: nb=4 + real(psb_dpk_), pointer :: iwork(:), xp(:), yp(:) + real(psb_dpk_), allocatable :: xvsave(:) + character :: trans_ + character(len=20) :: name, ch_err + logical :: aliw, doswap_ + integer(psb_ipk_) :: debug_level, debug_unit + + name='psb_dspmv' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt=desc_a%get_context() + call psb_info(ictxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(doswap)) then + doswap_ = doswap + else + doswap_ = .true. + endif + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + endif + if ( (trans_ == 'N').or.(trans_ == 'T')& + & .or.(trans_ == 'C')) then + else + info = psb_err_iarg_invalid_value_ + call psb_errpush(info,name) + goto 9999 + end if + + m = desc_a%get_global_rows() + n = desc_a%get_global_cols() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + lldx = x%get_nrows() + lldy = y%get_nrows() + if ((info == 0).and.(lldx null() + ! check for presence/size of a work area + liwork= 2*ncol + + if (present(work)) then + if (size(work) >= liwork) then + aliw =.false. + else + aliw=.true. + endif + else + aliw=.true. + end if + + if (aliw) then + allocate(iwork(liwork),stat=info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='Allocate' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + iwork => work + endif + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' Allocated work ', info + + if (trans_ == 'N') then + ! Matrix is not transposed + + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + + if(info /= psb_success_) then + info = psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + else + ! Matrix is transposed + + ! + ! Non-empty overlap, need a buffer to hold + ! the entries updated with average operator. + ! Why the average? because in this way they will contribute + ! with a proper scale factor (1/np) to the overall product. + ! + call psi_ovrl_save(x%v,xvsave,desc_a,info) + if (info == psb_success_) call psi_ovrl_upd(x%v,desc_a,psb_avg_,info) + + if (beta /= dzero) call y%set(dzero,nrow+1,ncol) + ! local Matrix-vector product + if (info == psb_success_) call psb_csmm(alpha,a,x,beta,y,info,trans=trans_) + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' csmm ', info + + if (info == psb_success_) call psi_ovrl_restore(x%v,xvsave,desc_a,info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='psb_csmm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (doswap_) then + call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& + & done,y%v,desc_a,iwork,info) + if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & done,y%v,desc_a,iwork,info,data=psb_comm_ovr_) + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' swaptran ', info + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='PSI_SwapTran' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + end if + + if (aliw) deallocate(iwork,stat=info) + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' deallocat ',aliw, info + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='Deallocate iwork' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nullify(iwork) + + call psb_erractionrestore(err_act) + if (debug_level >= psb_debug_comp_) then + call psb_barrier(ictxt) + write(debug_unit,*) me,' ',trim(name),' Returning ' + endif + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psb_dspmv_vect +! ! Subroutine: psb_dspmm ! Performs one of the distributed matrix-vector operations ! @@ -356,10 +585,6 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_dspmm - - - - !!$ !!$ Parallel Sparse BLAS version 3.5 !!$ (C) Copyright 2006-2018 @@ -670,211 +895,3 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_dspmv - - - -subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& - & trans, work, doswap) - use psb_base_mod, psb_protect_name => psb_dspmv_vect - use psi_mod - implicit none - - real(psb_dpk_), intent(in) :: alpha, beta - type(psb_d_vect_type), intent(inout) :: x - type(psb_d_vect_type), intent(inout) :: y - type(psb_dspmat_type), intent(in) :: a - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - real(psb_dpk_), optional, target, intent(inout) :: work(:) - character, intent(in), optional :: trans - logical, intent(in), optional :: doswap - - ! locals - integer(psb_ipk_) :: ictxt, np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, ib, ip, idx - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja - integer(psb_ipk_), parameter :: nb=4 - real(psb_dpk_), pointer :: iwork(:), xp(:), yp(:) - real(psb_dpk_), allocatable :: xvsave(:) - character :: trans_ - character(len=20) :: name, ch_err - logical :: aliw, doswap_ - integer(psb_ipk_) :: debug_level, debug_unit - - name='psb_dspmv' - info=psb_success_ - call psb_erractionsave(err_act) - if (psb_errstatus_fatal()) then - info = psb_err_internal_error_ ; goto 9999 - end if - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - ictxt=desc_a%get_context() - call psb_info(ictxt, me, np) - if (np == -1) then - info = psb_err_context_error_ - call psb_errpush(info,name) - goto 9999 - endif - - if (.not.allocated(x%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - if (.not.allocated(y%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - - if (present(doswap)) then - doswap_ = doswap - else - doswap_ = .true. - endif - - if (present(trans)) then - trans_ = psb_toupper(trans) - else - trans_ = 'N' - endif - if ( (trans_ == 'N').or.(trans_ == 'T')& - & .or.(trans_ == 'C')) then - else - info = psb_err_iarg_invalid_value_ - call psb_errpush(info,name) - goto 9999 - end if - - m = desc_a%get_global_rows() - n = desc_a%get_global_cols() - nrow = desc_a%get_local_rows() - ncol = desc_a%get_local_cols() - lldx = x%get_nrows() - lldy = y%get_nrows() - if ((info == 0).and.(lldx null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='Allocate' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' Allocated work ', info - - if (trans_ == 'N') then - ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - - call psb_csmm(alpha,a,x,beta,y,info) - - if(info /= psb_success_) then - info = psb_err_from_subroutine_non_ - call psb_errpush(info,name) - goto 9999 - end if - - else - ! Matrix is transposed - - ! - ! Non-empty overlap, need a buffer to hold - ! the entries updated with average operator. - ! Why the average? because in this way they will contribute - ! with a proper scale factor (1/np) to the overall product. - ! - call psi_ovrl_save(x%v,xvsave,desc_a,info) - if (info == psb_success_) call psi_ovrl_upd(x%v,desc_a,psb_avg_,info) - - if (beta /= dzero) call y%set(dzero,nrow+1,ncol) - ! local Matrix-vector product - if (info == psb_success_) call psb_csmm(alpha,a,x,beta,y,info,trans=trans_) - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' csmm ', info - - if (info == psb_success_) call psi_ovrl_restore(x%v,xvsave,desc_a,info) - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='psb_csmm' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - if (doswap_) then - call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& - & done,y%v,desc_a,iwork,info) - if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & done,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' swaptran ', info - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='PSI_SwapTran' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - end if - - end if - - if (aliw) deallocate(iwork,stat=info) - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' deallocat ',aliw, info - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='Deallocate iwork' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - nullify(iwork) - - call psb_erractionrestore(err_act) - if (debug_level >= psb_debug_comp_) then - call psb_barrier(ictxt) - write(debug_unit,*) me,' ',trim(name),' Returning ' - endif - return - -9999 call psb_error_handler(ictxt,err_act) - - return -end subroutine psb_dspmv_vect diff --git a/base/psblas/psb_dspnrm1.f90 b/base/psblas/psb_dspnrm1.f90 index 01a7de96..9d367615 100644 --- a/base/psblas/psb_dspnrm1.f90 +++ b/base/psblas/psb_dspnrm1.f90 @@ -40,6 +40,7 @@ ! a - type(psb_dspmat_type). The sparse matrix containing A. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_dspnrm1(a,desc_a,info,global) result(res) use psb_base_mod, psb_protect_name => psb_dspnrm1 diff --git a/base/psblas/psb_dspsm.f90 b/base/psblas/psb_dspsm.f90 index 4efad6b7..f1a019ad 100644 --- a/base/psblas/psb_dspsm.f90 +++ b/base/psblas/psb_dspsm.f90 @@ -31,6 +31,203 @@ ! ! File: psb_dspsm.f90 ! +! +! Subroutine: psb_dsps_vect +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A-1 * Pc * X + beta * Y, or +! +! Y := alpha * D * Pr * A-1 * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A-1 * Pc * D * X + beta * Y, or +! +! Y := alpha * Pr * A-T * Pc * X + beta * Y, or +! +! Y := alpha * D * Pr * A-T * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A-T * Pc * D * X + beta * Y, or +! +! X is a distributed +! vector and T is a M-by-M distributed triangular matrix. +! +! +! Arguments: +! alpha - real. The scalar alpha. +! a - type(psb_dspmat_type). The sparse matrix containing A. +! x - type(psb_d_vect_type) The input vector containing the entries of ( X ). +! beta - real The scalar beta. +! y - type(psb_d_vect_type) The input vector containing the entries of ( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! trans - character(optional). Whether A or A'. If not present 'N' is assumed. +! scale - character(optional). Specify some type of operation with +! the diagonal matrix D. +! choice - integer(optional). The kind of update to perform on overlap elements. +! d(:) - real, optional Matrix for diagonal scaling. +! work(:) - real, optional Working area. +! +subroutine psb_dspsv_vect(alpha,a,x,beta,y,desc_a,info,& + & trans, scale, choice, diag, work) + use psb_base_mod, psb_protect_name => psb_dspsv_vect + use psi_mod + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + type(psb_d_vect_type), intent(inout) :: x + type(psb_d_vect_type), intent(inout) :: y + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + type(psb_d_vect_type), intent(inout), optional :: diag + real(psb_dpk_), optional, target, intent(inout) :: work(:) + character, intent(in), optional :: trans, scale + integer(psb_ipk_), intent(in), optional :: choice + + ! locals + integer(psb_ipk_) :: ictxt, np, me, & + & err_act, iix, jjx, ia, ja, iia, jja, lldx,lldy, choice_,& + & ix, iy, ik, jx, jy, i, lld,& + & m, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm + + character :: lscale + integer(psb_ipk_), parameter :: nb=4 + real(psb_dpk_),pointer :: iwork(:), xp(:), yp(:) + character :: itrans + character(len=20) :: name, ch_err + logical :: aliw + + name='psb_dspsv_vect' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + + ictxt=desc_a%get_context() + + call psb_info(ictxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + if (present(choice)) then + choice_ = choice + else + choice_ = psb_avg_ + endif + + if (present(scale)) then + lscale = psb_toupper(scale) + else + lscale = 'U' + endif + + if (present(trans)) then + itrans = psb_toupper(trans) + if((itrans == 'N').or.(itrans == 'T').or.(itrans == 'C')) then + ! Ok + else + info = psb_err_iarg_invalid_value_ + call psb_errpush(info,name) + goto 9999 + end if + else + itrans = 'N' + endif + + m = desc_a%get_global_rows() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + lldx = x%get_nrows() + lldy = y%get_nrows() + if ((info == 0).and.(lldx null() + ! check for presence/size of a work area + liwork= 2*ncol + + if (present(work)) then + if (size(work) >= liwork) then + aliw =.false. + else + aliw=.true. + endif + else + aliw=.true. + end if + + if (aliw) then + allocate(iwork(liwork),stat=info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + iwork => work + endif + + iwork(1)=0.d0 + + ! Perform local triangular system solve + if (present(diag)) then + call a%spsm(alpha,x,beta,y,info,scale=scale,d=diag,trans=trans) + else + call a%spsm(alpha,x,beta,y,info,scale=scale,trans=trans) + end if + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='dcssm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! update overlap elements + if (choice_ > 0) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & done,y%v,desc_a,iwork,info,data=psb_comm_ovr_) + + + if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') + goto 9999 + end if + end if + + if (aliw) deallocate(iwork) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psb_dspsv_vect +! ! Subroutine: psb_dspsm ! Performs one of the distributed matrix-vector operations ! @@ -283,38 +480,6 @@ subroutine psb_dspsm(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_dspsm - -!!$ -!!$ Parallel Sparse BLAS version 3.5 -!!$ (C) Copyright 2006-2018 -!!$ Salvatore Filippone University of Rome Tor Vergata -!!$ Alfredo Buttari -!!$ -!!$ Redistribution and use in source and binary forms, with or without -!!$ modification, are permitted provided that the following conditions -!!$ are met: -!!$ 1. Redistributions of source code must retain the above copyright -!!$ notice, this list of conditions and the following disclaimer. -!!$ 2. Redistributions in binary form must reproduce the above copyright -!!$ notice, this list of conditions, and the following disclaimer in the -!!$ documentation and/or other materials provided with the distribution. -!!$ 3. The name of the PSBLAS group or the names of its contributors may -!!$ not be used to endorse or promote products derived from this -!!$ software without specific written permission. -!!$ -!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -!!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ ! ! Subroutine: psb_dspsv ! Performs one of the distributed matrix-vector operations @@ -545,166 +710,3 @@ subroutine psb_dspsv(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_dspsv - -subroutine psb_dspsv_vect(alpha,a,x,beta,y,desc_a,info,& - & trans, scale, choice, diag, work) - use psb_base_mod, psb_protect_name => psb_dspsv_vect - use psi_mod - implicit none - - real(psb_dpk_), intent(in) :: alpha, beta - type(psb_d_vect_type), intent(inout) :: x - type(psb_d_vect_type), intent(inout) :: y - type(psb_dspmat_type), intent(inout) :: a - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - type(psb_d_vect_type), intent(inout), optional :: diag - real(psb_dpk_), optional, target, intent(inout) :: work(:) - character, intent(in), optional :: trans, scale - integer(psb_ipk_), intent(in), optional :: choice - - ! locals - integer(psb_ipk_) :: ictxt, np, me, & - & err_act, iix, jjx, ia, ja, iia, jja, lldx,lldy, choice_,& - & ix, iy, ik, jx, jy, i, lld,& - & m, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm - - character :: lscale - integer(psb_ipk_), parameter :: nb=4 - real(psb_dpk_),pointer :: iwork(:), xp(:), yp(:) - character :: itrans - character(len=20) :: name, ch_err - logical :: aliw - - name='psb_sspsv' - info=psb_success_ - call psb_erractionsave(err_act) - if (psb_errstatus_fatal()) then - info = psb_err_internal_error_ ; goto 9999 - end if - - ictxt=desc_a%get_context() - - call psb_info(ictxt, me, np) - if (np == -1) then - info = psb_err_context_error_ - call psb_errpush(info,name) - goto 9999 - endif - - if (.not.allocated(x%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - if (.not.allocated(y%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - - - if (present(choice)) then - choice_ = choice - else - choice_ = psb_avg_ - endif - - if (present(scale)) then - lscale = psb_toupper(scale) - else - lscale = 'U' - endif - - if (present(trans)) then - itrans = psb_toupper(trans) - if((itrans == 'N').or.(itrans == 'T').or.(itrans == 'C')) then - ! Ok - else - info = psb_err_iarg_invalid_value_ - call psb_errpush(info,name) - goto 9999 - end if - else - itrans = 'N' - endif - - m = desc_a%get_global_rows() - nrow = desc_a%get_local_rows() - ncol = desc_a%get_local_cols() - lldx = x%get_nrows() - lldy = y%get_nrows() - if ((info == 0).and.(lldx null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_realloc' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - - iwork(1)=0.d0 - - ! Perform local triangular system solve - if (present(diag)) then - call a%spsm(alpha,x,beta,y,info,scale=scale,d=diag,trans=trans) - else - call a%spsm(alpha,x,beta,y,info,scale=scale,trans=trans) - end if - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='dcssm' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - ! update overlap elements - if (choice_ > 0) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & done,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - - - if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') - goto 9999 - end if - end if - - if (aliw) deallocate(iwork) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(ictxt,err_act) - - return -end subroutine psb_dspsv_vect - diff --git a/base/psblas/psb_samax.f90 b/base/psblas/psb_samax.f90 index ee70314f..174c7a28 100644 --- a/base/psblas/psb_samax.f90 +++ b/base/psblas/psb_samax.f90 @@ -32,7 +32,7 @@ ! File: psb_samax.f90 ! ! Function: psb_samax -! Searches the absolute max of X. +! Computes the maximum absolute value of X ! ! normi := max(abs(sub(X)(i)) ! @@ -164,7 +164,7 @@ end function psb_samax !!$ ! ! Function: psb_samaxv -! Searches the absolute max of X. +! Computes the maximum absolute value of X. ! ! normi := max(abs(X(i)) ! @@ -252,6 +252,17 @@ function psb_samaxv (x,desc_a, info,global) result(res) return end function psb_samaxv +! +! Function: psb_samax_vect +! Computes the maximum absolute value of X. +! +! normi := max(abs(X(i)) +! +! Arguments: +! x - type(psb_s_vect_type) The input vector. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! function psb_samax_vect(x, desc_a, info,global) result(res) use psb_penv_mod @@ -374,17 +385,16 @@ end function psb_samax_vect !!$ ! ! Subroutine: psb_samaxvs -! Searches the absolute max of X. +! Computes the maximum absolute value of X, subroutine version ! ! normi := max(abs(sub(X)(i)) ! -! where sub( X ) denotes X(1:N,JX:). +! where sub( X ) denotes X(1:N). ! ! Arguments: ! res - real The result. -! x(:,:) - real The input vector. +! x(:) - real The input vector. ! desc_a - type(psb_desc_type). The communication descriptor. -! info - integer. Return code ! jx - integer(optional). The column offset. ! subroutine psb_samaxvs(res,x,desc_a, info,global) diff --git a/base/psblas/psb_sasum.f90 b/base/psblas/psb_sasum.f90 index 2abf254d..a61f1851 100644 --- a/base/psblas/psb_sasum.f90 +++ b/base/psblas/psb_sasum.f90 @@ -135,6 +135,19 @@ function psb_sasum (x,desc_a, info, jx,global) result(res) return end function psb_sasum +! Function: psb_sasum_vect +! Computes norm1 of X +! +! norm1 := sum(sub( X )(i)) +! +! where sub( X ) denotes X(1:N,JX:). +! +! Arguments: +! x - type(psb_s_vect_type) The input vector. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The column offset. +! function psb_sasum_vect(x, desc_a, info,global) result(res) use psb_base_mod, psb_protect_name => psb_sasum_vect diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 26cecd30..d3f573dc 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -31,6 +31,23 @@ ! ! File: psb_saxpby.f90 +! +! Subroutine: psb_saxpby_vect +! Adds one distributed vector to another, +! +! Y := beta * Y + alpha * X +! +! Arguments: +! alpha - real,input The scalar used to multiply each component of X +! x - type(psb_s_vect_type) The input vector containing the entries of X +! beta - real,input The scalar used to multiply each component of Y +! y - type(psb_s_vect_type) The input/output vector Y +! desc_a - type(psb_desc_type) The communication descriptor. +! info - integer Return code +! +! Note: from a functional point of view, X is input, but here +! it's declared INOUT because of the sync() methods. +! subroutine psb_saxpby_vect(alpha, x, beta, y,& & desc_a, info) use psb_base_mod, psb_protect_name => psb_saxpby_vect @@ -269,7 +286,7 @@ end subroutine psb_saxpby !!$ ! ! Subroutine: psb_saxpbyv -! Adds one distributed matrix to another, +! Adds one distributed vector to another, ! ! Y := beta * Y + alpha * X ! diff --git a/base/psblas/psb_sdot.f90 b/base/psblas/psb_sdot.f90 index 2c8bea51..cce9e15c 100644 --- a/base/psblas/psb_sdot.f90 +++ b/base/psblas/psb_sdot.f90 @@ -31,22 +31,22 @@ ! ! File: psb_sdot.f90 ! -! Function: psb_sdot -! psb_sdot forms the dot product of two distributed vectors, +! Function: psb_sdot_vect +! psb_sdot computes the dot product of two distributed vectors, ! -! dot := sub( X )**C * sub( Y ) +! dot := ( X )**C * ( Y ) ! -! where sub( X ) denotes X(:,JX) -! -! sub( Y ) denotes Y(:,JY). ! ! Arguments: -! x(:,:) - complex The input vector containing the entries of sub( X ). -! y(:,:) - complex The input vector containing the entries of sub( Y ). +! x - type(psb_s_vect_type) The input vector containing the entries of sub( X ). +! y - type(psb_s_vect_type) The input vector containing the entries of sub( Y ). ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code -! jx - integer(optional). The column offset for sub( X ). -! jy - integer(optional). The column offset for sub( Y ). +! global - logical(optional) Whether to perform the global sum, default: .true. +! +! Note: from a functional point of view, X and Y are input, but here +! they are declared INOUT because of the sync() methods. +! ! function psb_sdot_vect(x, y, desc_a,info,global) result(res) use psb_desc_mod @@ -156,7 +156,25 @@ function psb_sdot_vect(x, y, desc_a,info,global) result(res) return end function psb_sdot_vect - +! +! Function: psb_sdot +! psb_sdot computes the dot product of two distributed vectors, +! +! dot := sub( X )**C * sub( Y ) +! +! where sub( X ) denotes X(:,JX) +! +! sub( Y ) denotes Y(:,JY). +! +! Arguments: +! x(:,:) - real The input vector containing the entries of sub( X ). +! y(:,:) - real The input vector containing the entries of sub( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The column offset for sub( X ). +! jy - integer(optional). The column offset for sub( Y ). +! global - logical(optional) Whether to perform the global sum, default: .true. +! function psb_sdot(x, y,desc_a, info, jx, jy,global) result(res) use psb_base_mod, psb_protect_name => psb_sdot implicit none @@ -298,7 +316,7 @@ end function psb_sdot !!$ ! ! Function: psb_sdotv -! psb_sdotv forms the dot product of two distributed vectors, +! psb_sdotv computes the dot product of two distributed vectors, ! ! dot := X**C * Y ! @@ -307,6 +325,7 @@ end function psb_sdot ! y(:) - real The input vector containing the entries of Y. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! function psb_sdotv(x, y,desc_a, info,global) result(res) use psb_base_mod, psb_protect_name => psb_sdotv @@ -432,7 +451,7 @@ end function psb_sdotv !!$ ! ! Subroutine: psb_sdotvs -! psb_sdotvs forms the dot product of two distributed vectors, +! psb_sdotvs computes the dot product of two distributed vectors, ! ! res := X**C * Y ! @@ -442,6 +461,7 @@ end function psb_sdotv ! y(:) - real The input vector containing the entries of Y. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! subroutine psb_sdotvs(res, x, y,desc_a, info,global) use psb_base_mod, psb_protect_name => psb_sdotvs @@ -565,7 +585,7 @@ end subroutine psb_sdotvs !!$ ! ! Subroutine: psb_smdots -! psb_smdots forms the dot product of multiple distributed vectors, +! psb_smdots computes the dot product of multiple distributed vectors, ! ! res(i) := ( X(:,i) )**C * ( Y(:,i) ) ! @@ -575,6 +595,7 @@ end subroutine psb_sdotvs ! y(:) - real The input vector containing the entries of sub( Y ). ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! subroutine psb_smdots(res, x, y, desc_a, info,global) use psb_base_mod, psb_protect_name => psb_smdots diff --git a/base/psblas/psb_snrm2.f90 b/base/psblas/psb_snrm2.f90 index d182a2cd..35260ef7 100644 --- a/base/psblas/psb_snrm2.f90 +++ b/base/psblas/psb_snrm2.f90 @@ -32,9 +32,9 @@ ! File: psb_snrm2.f90 ! ! Function: psb_snrm2 -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, ! -! norm2 := sqrt ( sub( X )**T * sub( X ) ) +! norm2 := sqrt ( sub( X )**C * sub( X ) ) ! ! where sub( X ) denotes X(:,JX). ! @@ -43,6 +43,7 @@ ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code ! jx - integer(optional). The column offset for sub( X ). +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_snrm2(x, desc_a, info, jx,global) result(res) use psb_desc_mod @@ -170,14 +171,15 @@ end function psb_snrm2 !!$ ! ! Function: psb_snrm2 -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, ! -! norm2 := sqrt ( X**T * X) +! norm2 := sqrt ( X**C * X) ! ! Arguments: ! x(:) - real The input vector containing the entries of X. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_snrm2v(x, desc_a, info,global) result(res) use psb_desc_mod @@ -263,7 +265,17 @@ function psb_snrm2v(x, desc_a, info,global) result(res) end function psb_snrm2v - +! Function: psb_snrm2_vect +! Computes the norm2 of a distributed vector, +! +! norm2 := sqrt ( X**C * X) +! +! Arguments: +! x - type(psb_s_vect_type) The input vector containing the entries of X. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. +! function psb_snrm2_vect(x, desc_a, info,global) result(res) use psb_desc_mod use psb_check_mod @@ -344,7 +356,7 @@ function psb_snrm2_vect(x, desc_a, info,global) result(res) res = res - sqrt(sone - dd*(abs(x%v%v(idx))/res)**2) end do end if - else + else res = szero end if @@ -392,15 +404,16 @@ end function psb_snrm2_vect !!$ ! ! Subroutine: psb_snrm2vs -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, subroutine version ! -! norm2 := sqrt ( X**T * X) +! norm2 := sqrt ( X**C * X) ! ! Arguments: ! res - real The result. ! x(:) - real The input vector containing the entries of X. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! subroutine psb_snrm2vs(res, x, desc_a, info,global) use psb_desc_mod diff --git a/base/psblas/psb_snrmi.f90 b/base/psblas/psb_snrmi.f90 index 07b88de2..9fc41073 100644 --- a/base/psblas/psb_snrmi.f90 +++ b/base/psblas/psb_snrmi.f90 @@ -32,14 +32,15 @@ ! File: psb_snrmi.f90 ! ! Function: psb_snrmi -! Forms the infinity norm of a sparse matrix, +! Computes the infinity norm of a sparse matrix, ! -! nrmi := max_i(abs(sum(A(i,:)))) +! nrmi := max_i(sum(abs(A(i,:)))) ! ! Arguments: ! a - type(psb_sspmat_type). The sparse matrix containing A. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_snrmi(a,desc_a,info,global) result(res) use psb_base_mod, psb_protect_name => psb_snrmi diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index a84962dc..56b881b8 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -31,6 +31,235 @@ ! ! File: psb_sspmm.f90 ! +! +! Subroutine: psb_sspm_vect +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A' * Pr * X + beta * Y, +! +! alpha and beta are scalars, X and Y are distributed +! vectors and A is a M-by-N distributed matrix. +! +! Arguments: +! alpha - real The scalar alpha. +! a - type(psb_sspmat_type). The sparse matrix containing A. +! x - type(psb_s_vect_type) The input vector containing the entries of ( X ). +! beta - real The scalar beta. +! y - type(psb_s_vect_type) The input vector containing the entries of ( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! trans - character(optional). Whether A or A'. Default: 'N' +! work(:) - real,(optional). Working area. +! doswap - logical(optional). Whether to performe halo updates. +! +subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& + & trans, work, doswap) + use psb_base_mod, psb_protect_name => psb_sspmv_vect + use psi_mod + implicit none + + real(psb_spk_), intent(in) :: alpha, beta + type(psb_s_vect_type), intent(inout) :: x + type(psb_s_vect_type), intent(inout) :: y + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + real(psb_spk_), optional, target, intent(inout) :: work(:) + character, intent(in), optional :: trans + logical, intent(in), optional :: doswap + + ! locals + integer(psb_ipk_) :: ictxt, np, me,& + & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, ib, ip, idx + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja + integer(psb_ipk_), parameter :: nb=4 + real(psb_spk_), pointer :: iwork(:), xp(:), yp(:) + real(psb_spk_), allocatable :: xvsave(:) + character :: trans_ + character(len=20) :: name, ch_err + logical :: aliw, doswap_ + integer(psb_ipk_) :: debug_level, debug_unit + + name='psb_sspmv' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt=desc_a%get_context() + call psb_info(ictxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(doswap)) then + doswap_ = doswap + else + doswap_ = .true. + endif + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + endif + if ( (trans_ == 'N').or.(trans_ == 'T')& + & .or.(trans_ == 'C')) then + else + info = psb_err_iarg_invalid_value_ + call psb_errpush(info,name) + goto 9999 + end if + + m = desc_a%get_global_rows() + n = desc_a%get_global_cols() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + lldx = x%get_nrows() + lldy = y%get_nrows() + if ((info == 0).and.(lldx null() + ! check for presence/size of a work area + liwork= 2*ncol + + if (present(work)) then + if (size(work) >= liwork) then + aliw =.false. + else + aliw=.true. + endif + else + aliw=.true. + end if + + if (aliw) then + allocate(iwork(liwork),stat=info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='Allocate' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + iwork => work + endif + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' Allocated work ', info + + if (trans_ == 'N') then + ! Matrix is not transposed + + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + + if(info /= psb_success_) then + info = psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + else + ! Matrix is transposed + + ! + ! Non-empty overlap, need a buffer to hold + ! the entries updated with average operator. + ! Why the average? because in this way they will contribute + ! with a proper scale factor (1/np) to the overall product. + ! + call psi_ovrl_save(x%v,xvsave,desc_a,info) + if (info == psb_success_) call psi_ovrl_upd(x%v,desc_a,psb_avg_,info) + + if (beta /= szero) call y%set(szero,nrow+1,ncol) + ! local Matrix-vector product + if (info == psb_success_) call psb_csmm(alpha,a,x,beta,y,info,trans=trans_) + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' csmm ', info + + if (info == psb_success_) call psi_ovrl_restore(x%v,xvsave,desc_a,info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='psb_csmm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (doswap_) then + call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& + & sone,y%v,desc_a,iwork,info) + if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & sone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' swaptran ', info + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='PSI_SwapTran' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + end if + + if (aliw) deallocate(iwork,stat=info) + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' deallocat ',aliw, info + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='Deallocate iwork' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nullify(iwork) + + call psb_erractionrestore(err_act) + if (debug_level >= psb_debug_comp_) then + call psb_barrier(ictxt) + write(debug_unit,*) me,' ',trim(name),' Returning ' + endif + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psb_sspmv_vect +! ! Subroutine: psb_sspmm ! Performs one of the distributed matrix-vector operations ! @@ -356,10 +585,6 @@ subroutine psb_sspmm(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_sspmm - - - - !!$ !!$ Parallel Sparse BLAS version 3.5 !!$ (C) Copyright 2006-2018 @@ -670,211 +895,3 @@ subroutine psb_sspmv(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_sspmv - - - -subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& - & trans, work, doswap) - use psb_base_mod, psb_protect_name => psb_sspmv_vect - use psi_mod - implicit none - - real(psb_spk_), intent(in) :: alpha, beta - type(psb_s_vect_type), intent(inout) :: x - type(psb_s_vect_type), intent(inout) :: y - type(psb_sspmat_type), intent(in) :: a - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - real(psb_spk_), optional, target, intent(inout) :: work(:) - character, intent(in), optional :: trans - logical, intent(in), optional :: doswap - - ! locals - integer(psb_ipk_) :: ictxt, np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, ib, ip, idx - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja - integer(psb_ipk_), parameter :: nb=4 - real(psb_spk_), pointer :: iwork(:), xp(:), yp(:) - real(psb_spk_), allocatable :: xvsave(:) - character :: trans_ - character(len=20) :: name, ch_err - logical :: aliw, doswap_ - integer(psb_ipk_) :: debug_level, debug_unit - - name='psb_sspmv' - info=psb_success_ - call psb_erractionsave(err_act) - if (psb_errstatus_fatal()) then - info = psb_err_internal_error_ ; goto 9999 - end if - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - ictxt=desc_a%get_context() - call psb_info(ictxt, me, np) - if (np == -1) then - info = psb_err_context_error_ - call psb_errpush(info,name) - goto 9999 - endif - - if (.not.allocated(x%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - if (.not.allocated(y%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - - if (present(doswap)) then - doswap_ = doswap - else - doswap_ = .true. - endif - - if (present(trans)) then - trans_ = psb_toupper(trans) - else - trans_ = 'N' - endif - if ( (trans_ == 'N').or.(trans_ == 'T')& - & .or.(trans_ == 'C')) then - else - info = psb_err_iarg_invalid_value_ - call psb_errpush(info,name) - goto 9999 - end if - - m = desc_a%get_global_rows() - n = desc_a%get_global_cols() - nrow = desc_a%get_local_rows() - ncol = desc_a%get_local_cols() - lldx = x%get_nrows() - lldy = y%get_nrows() - if ((info == 0).and.(lldx null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='Allocate' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' Allocated work ', info - - if (trans_ == 'N') then - ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - - call psb_csmm(alpha,a,x,beta,y,info) - - if(info /= psb_success_) then - info = psb_err_from_subroutine_non_ - call psb_errpush(info,name) - goto 9999 - end if - - else - ! Matrix is transposed - - ! - ! Non-empty overlap, need a buffer to hold - ! the entries updated with average operator. - ! Why the average? because in this way they will contribute - ! with a proper scale factor (1/np) to the overall product. - ! - call psi_ovrl_save(x%v,xvsave,desc_a,info) - if (info == psb_success_) call psi_ovrl_upd(x%v,desc_a,psb_avg_,info) - - if (beta /= szero) call y%set(szero,nrow+1,ncol) - ! local Matrix-vector product - if (info == psb_success_) call psb_csmm(alpha,a,x,beta,y,info,trans=trans_) - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' csmm ', info - - if (info == psb_success_) call psi_ovrl_restore(x%v,xvsave,desc_a,info) - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='psb_csmm' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - if (doswap_) then - call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& - & sone,y%v,desc_a,iwork,info) - if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & sone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' swaptran ', info - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='PSI_SwapTran' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - end if - - end if - - if (aliw) deallocate(iwork,stat=info) - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' deallocat ',aliw, info - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='Deallocate iwork' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - nullify(iwork) - - call psb_erractionrestore(err_act) - if (debug_level >= psb_debug_comp_) then - call psb_barrier(ictxt) - write(debug_unit,*) me,' ',trim(name),' Returning ' - endif - return - -9999 call psb_error_handler(ictxt,err_act) - - return -end subroutine psb_sspmv_vect diff --git a/base/psblas/psb_sspnrm1.f90 b/base/psblas/psb_sspnrm1.f90 index 391d60ee..9d2afeb8 100644 --- a/base/psblas/psb_sspnrm1.f90 +++ b/base/psblas/psb_sspnrm1.f90 @@ -40,6 +40,7 @@ ! a - type(psb_sspmat_type). The sparse matrix containing A. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_sspnrm1(a,desc_a,info,global) result(res) use psb_base_mod, psb_protect_name => psb_sspnrm1 diff --git a/base/psblas/psb_sspsm.f90 b/base/psblas/psb_sspsm.f90 index 8bcfdcad..1cbbd6d2 100644 --- a/base/psblas/psb_sspsm.f90 +++ b/base/psblas/psb_sspsm.f90 @@ -31,6 +31,203 @@ ! ! File: psb_sspsm.f90 ! +! +! Subroutine: psb_ssps_vect +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A-1 * Pc * X + beta * Y, or +! +! Y := alpha * D * Pr * A-1 * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A-1 * Pc * D * X + beta * Y, or +! +! Y := alpha * Pr * A-T * Pc * X + beta * Y, or +! +! Y := alpha * D * Pr * A-T * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A-T * Pc * D * X + beta * Y, or +! +! X is a distributed +! vector and T is a M-by-M distributed triangular matrix. +! +! +! Arguments: +! alpha - real. The scalar alpha. +! a - type(psb_sspmat_type). The sparse matrix containing A. +! x - type(psb_s_vect_type) The input vector containing the entries of ( X ). +! beta - real The scalar beta. +! y - type(psb_s_vect_type) The input vector containing the entries of ( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! trans - character(optional). Whether A or A'. If not present 'N' is assumed. +! scale - character(optional). Specify some type of operation with +! the diagonal matrix D. +! choice - integer(optional). The kind of update to perform on overlap elements. +! d(:) - real, optional Matrix for diagonal scaling. +! work(:) - real, optional Working area. +! +subroutine psb_sspsv_vect(alpha,a,x,beta,y,desc_a,info,& + & trans, scale, choice, diag, work) + use psb_base_mod, psb_protect_name => psb_sspsv_vect + use psi_mod + implicit none + + real(psb_spk_), intent(in) :: alpha, beta + type(psb_s_vect_type), intent(inout) :: x + type(psb_s_vect_type), intent(inout) :: y + type(psb_sspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + type(psb_s_vect_type), intent(inout), optional :: diag + real(psb_spk_), optional, target, intent(inout) :: work(:) + character, intent(in), optional :: trans, scale + integer(psb_ipk_), intent(in), optional :: choice + + ! locals + integer(psb_ipk_) :: ictxt, np, me, & + & err_act, iix, jjx, ia, ja, iia, jja, lldx,lldy, choice_,& + & ix, iy, ik, jx, jy, i, lld,& + & m, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm + + character :: lscale + integer(psb_ipk_), parameter :: nb=4 + real(psb_spk_),pointer :: iwork(:), xp(:), yp(:) + character :: itrans + character(len=20) :: name, ch_err + logical :: aliw + + name='psb_sspsv_vect' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + + ictxt=desc_a%get_context() + + call psb_info(ictxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + if (present(choice)) then + choice_ = choice + else + choice_ = psb_avg_ + endif + + if (present(scale)) then + lscale = psb_toupper(scale) + else + lscale = 'U' + endif + + if (present(trans)) then + itrans = psb_toupper(trans) + if((itrans == 'N').or.(itrans == 'T').or.(itrans == 'C')) then + ! Ok + else + info = psb_err_iarg_invalid_value_ + call psb_errpush(info,name) + goto 9999 + end if + else + itrans = 'N' + endif + + m = desc_a%get_global_rows() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + lldx = x%get_nrows() + lldy = y%get_nrows() + if ((info == 0).and.(lldx null() + ! check for presence/size of a work area + liwork= 2*ncol + + if (present(work)) then + if (size(work) >= liwork) then + aliw =.false. + else + aliw=.true. + endif + else + aliw=.true. + end if + + if (aliw) then + allocate(iwork(liwork),stat=info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + iwork => work + endif + + iwork(1)=0.d0 + + ! Perform local triangular system solve + if (present(diag)) then + call a%spsm(alpha,x,beta,y,info,scale=scale,d=diag,trans=trans) + else + call a%spsm(alpha,x,beta,y,info,scale=scale,trans=trans) + end if + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='dcssm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! update overlap elements + if (choice_ > 0) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & sone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) + + + if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') + goto 9999 + end if + end if + + if (aliw) deallocate(iwork) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psb_sspsv_vect +! ! Subroutine: psb_sspsm ! Performs one of the distributed matrix-vector operations ! @@ -283,38 +480,6 @@ subroutine psb_sspsm(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_sspsm - -!!$ -!!$ Parallel Sparse BLAS version 3.5 -!!$ (C) Copyright 2006-2018 -!!$ Salvatore Filippone University of Rome Tor Vergata -!!$ Alfredo Buttari -!!$ -!!$ Redistribution and use in source and binary forms, with or without -!!$ modification, are permitted provided that the following conditions -!!$ are met: -!!$ 1. Redistributions of source code must retain the above copyright -!!$ notice, this list of conditions and the following disclaimer. -!!$ 2. Redistributions in binary form must reproduce the above copyright -!!$ notice, this list of conditions, and the following disclaimer in the -!!$ documentation and/or other materials provided with the distribution. -!!$ 3. The name of the PSBLAS group or the names of its contributors may -!!$ not be used to endorse or promote products derived from this -!!$ software without specific written permission. -!!$ -!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -!!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ ! ! Subroutine: psb_sspsv ! Performs one of the distributed matrix-vector operations @@ -545,166 +710,3 @@ subroutine psb_sspsv(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_sspsv - -subroutine psb_sspsv_vect(alpha,a,x,beta,y,desc_a,info,& - & trans, scale, choice, diag, work) - use psb_base_mod, psb_protect_name => psb_sspsv_vect - use psi_mod - implicit none - - real(psb_spk_), intent(in) :: alpha, beta - type(psb_s_vect_type), intent(inout) :: x - type(psb_s_vect_type), intent(inout) :: y - type(psb_sspmat_type), intent(inout) :: a - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - type(psb_s_vect_type), intent(inout), optional :: diag - real(psb_spk_), optional, target, intent(inout) :: work(:) - character, intent(in), optional :: trans, scale - integer(psb_ipk_), intent(in), optional :: choice - - ! locals - integer(psb_ipk_) :: ictxt, np, me, & - & err_act, iix, jjx, ia, ja, iia, jja, lldx,lldy, choice_,& - & ix, iy, ik, jx, jy, i, lld,& - & m, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm - - character :: lscale - integer(psb_ipk_), parameter :: nb=4 - real(psb_spk_),pointer :: iwork(:), xp(:), yp(:) - character :: itrans - character(len=20) :: name, ch_err - logical :: aliw - - name='psb_sspsv' - info=psb_success_ - call psb_erractionsave(err_act) - if (psb_errstatus_fatal()) then - info = psb_err_internal_error_ ; goto 9999 - end if - - ictxt=desc_a%get_context() - - call psb_info(ictxt, me, np) - if (np == -1) then - info = psb_err_context_error_ - call psb_errpush(info,name) - goto 9999 - endif - - if (.not.allocated(x%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - if (.not.allocated(y%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - - - if (present(choice)) then - choice_ = choice - else - choice_ = psb_avg_ - endif - - if (present(scale)) then - lscale = psb_toupper(scale) - else - lscale = 'U' - endif - - if (present(trans)) then - itrans = psb_toupper(trans) - if((itrans == 'N').or.(itrans == 'T').or.(itrans == 'C')) then - ! Ok - else - info = psb_err_iarg_invalid_value_ - call psb_errpush(info,name) - goto 9999 - end if - else - itrans = 'N' - endif - - m = desc_a%get_global_rows() - nrow = desc_a%get_local_rows() - ncol = desc_a%get_local_cols() - lldx = x%get_nrows() - lldy = y%get_nrows() - if ((info == 0).and.(lldx null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_realloc' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - - iwork(1)=0.d0 - - ! Perform local triangular system solve - if (present(diag)) then - call a%spsm(alpha,x,beta,y,info,scale=scale,d=diag,trans=trans) - else - call a%spsm(alpha,x,beta,y,info,scale=scale,trans=trans) - end if - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='dcssm' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - ! update overlap elements - if (choice_ > 0) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & sone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - - - if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') - goto 9999 - end if - end if - - if (aliw) deallocate(iwork) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(ictxt,err_act) - - return -end subroutine psb_sspsv_vect - diff --git a/base/psblas/psb_zamax.f90 b/base/psblas/psb_zamax.f90 index 21a82b39..5e768023 100644 --- a/base/psblas/psb_zamax.f90 +++ b/base/psblas/psb_zamax.f90 @@ -32,7 +32,7 @@ ! File: psb_zamax.f90 ! ! Function: psb_zamax -! Searches the absolute max of X. +! Computes the maximum absolute value of X ! ! normi := max(abs(sub(X)(i)) ! @@ -164,7 +164,7 @@ end function psb_zamax !!$ ! ! Function: psb_zamaxv -! Searches the absolute max of X. +! Computes the maximum absolute value of X. ! ! normi := max(abs(X(i)) ! @@ -252,6 +252,17 @@ function psb_zamaxv (x,desc_a, info,global) result(res) return end function psb_zamaxv +! +! Function: psb_zamax_vect +! Computes the maximum absolute value of X. +! +! normi := max(abs(X(i)) +! +! Arguments: +! x - type(psb_z_vect_type) The input vector. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! function psb_zamax_vect(x, desc_a, info,global) result(res) use psb_penv_mod @@ -374,17 +385,16 @@ end function psb_zamax_vect !!$ ! ! Subroutine: psb_zamaxvs -! Searches the absolute max of X. +! Computes the maximum absolute value of X, subroutine version ! ! normi := max(abs(sub(X)(i)) ! -! where sub( X ) denotes X(1:N,JX:). +! where sub( X ) denotes X(1:N). ! ! Arguments: ! res - real The result. -! x(:,:) - complex The input vector. +! x(:) - complex The input vector. ! desc_a - type(psb_desc_type). The communication descriptor. -! info - integer. Return code ! jx - integer(optional). The column offset. ! subroutine psb_zamaxvs(res,x,desc_a, info,global) diff --git a/base/psblas/psb_zasum.f90 b/base/psblas/psb_zasum.f90 index 6bfd01ca..9b6fddd7 100644 --- a/base/psblas/psb_zasum.f90 +++ b/base/psblas/psb_zasum.f90 @@ -135,6 +135,19 @@ function psb_zasum (x,desc_a, info, jx,global) result(res) return end function psb_zasum +! Function: psb_zasum_vect +! Computes norm1 of X +! +! norm1 := sum(sub( X )(i)) +! +! where sub( X ) denotes X(1:N,JX:). +! +! Arguments: +! x - type(psb_z_vect_type) The input vector. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The column offset. +! function psb_zasum_vect(x, desc_a, info,global) result(res) use psb_base_mod, psb_protect_name => psb_zasum_vect diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 273aa882..b48bb5ff 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -31,6 +31,23 @@ ! ! File: psb_zaxpby.f90 +! +! Subroutine: psb_zaxpby_vect +! Adds one distributed vector to another, +! +! Y := beta * Y + alpha * X +! +! Arguments: +! alpha - complex,input The scalar used to multiply each component of X +! x - type(psb_z_vect_type) The input vector containing the entries of X +! beta - complex,input The scalar used to multiply each component of Y +! y - type(psb_z_vect_type) The input/output vector Y +! desc_a - type(psb_desc_type) The communication descriptor. +! info - integer Return code +! +! Note: from a functional point of view, X is input, but here +! it's declared INOUT because of the sync() methods. +! subroutine psb_zaxpby_vect(alpha, x, beta, y,& & desc_a, info) use psb_base_mod, psb_protect_name => psb_zaxpby_vect @@ -269,7 +286,7 @@ end subroutine psb_zaxpby !!$ ! ! Subroutine: psb_zaxpbyv -! Adds one distributed matrix to another, +! Adds one distributed vector to another, ! ! Y := beta * Y + alpha * X ! diff --git a/base/psblas/psb_zdot.f90 b/base/psblas/psb_zdot.f90 index 8f673c02..ad21b1d8 100644 --- a/base/psblas/psb_zdot.f90 +++ b/base/psblas/psb_zdot.f90 @@ -31,22 +31,22 @@ ! ! File: psb_zdot.f90 ! -! Function: psb_zdot -! psb_zdot forms the dot product of two distributed vectors, +! Function: psb_zdot_vect +! psb_zdot computes the dot product of two distributed vectors, ! -! dot := sub( X )**C * sub( Y ) +! dot := ( X )**C * ( Y ) ! -! where sub( X ) denotes X(:,JX) -! -! sub( Y ) denotes Y(:,JY). ! ! Arguments: -! x(:,:) - complex The input vector containing the entries of sub( X ). -! y(:,:) - complex The input vector containing the entries of sub( Y ). +! x - type(psb_z_vect_type) The input vector containing the entries of sub( X ). +! y - type(psb_z_vect_type) The input vector containing the entries of sub( Y ). ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code -! jx - integer(optional). The column offset for sub( X ). -! jy - integer(optional). The column offset for sub( Y ). +! global - logical(optional) Whether to perform the global sum, default: .true. +! +! Note: from a functional point of view, X and Y are input, but here +! they are declared INOUT because of the sync() methods. +! ! function psb_zdot_vect(x, y, desc_a,info,global) result(res) use psb_desc_mod @@ -156,7 +156,25 @@ function psb_zdot_vect(x, y, desc_a,info,global) result(res) return end function psb_zdot_vect - +! +! Function: psb_zdot +! psb_zdot computes the dot product of two distributed vectors, +! +! dot := sub( X )**C * sub( Y ) +! +! where sub( X ) denotes X(:,JX) +! +! sub( Y ) denotes Y(:,JY). +! +! Arguments: +! x(:,:) - complex The input vector containing the entries of sub( X ). +! y(:,:) - complex The input vector containing the entries of sub( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! jx - integer(optional). The column offset for sub( X ). +! jy - integer(optional). The column offset for sub( Y ). +! global - logical(optional) Whether to perform the global sum, default: .true. +! function psb_zdot(x, y,desc_a, info, jx, jy,global) result(res) use psb_base_mod, psb_protect_name => psb_zdot implicit none @@ -298,7 +316,7 @@ end function psb_zdot !!$ ! ! Function: psb_zdotv -! psb_zdotv forms the dot product of two distributed vectors, +! psb_zdotv computes the dot product of two distributed vectors, ! ! dot := X**C * Y ! @@ -307,6 +325,7 @@ end function psb_zdot ! y(:) - complex The input vector containing the entries of Y. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! function psb_zdotv(x, y,desc_a, info,global) result(res) use psb_base_mod, psb_protect_name => psb_zdotv @@ -432,7 +451,7 @@ end function psb_zdotv !!$ ! ! Subroutine: psb_zdotvs -! psb_zdotvs forms the dot product of two distributed vectors, +! psb_zdotvs computes the dot product of two distributed vectors, ! ! res := X**C * Y ! @@ -442,6 +461,7 @@ end function psb_zdotv ! y(:) - complex The input vector containing the entries of Y. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! subroutine psb_zdotvs(res, x, y,desc_a, info,global) use psb_base_mod, psb_protect_name => psb_zdotvs @@ -565,7 +585,7 @@ end subroutine psb_zdotvs !!$ ! ! Subroutine: psb_zmdots -! psb_zmdots forms the dot product of multiple distributed vectors, +! psb_zmdots computes the dot product of multiple distributed vectors, ! ! res(i) := ( X(:,i) )**C * ( Y(:,i) ) ! @@ -575,6 +595,7 @@ end subroutine psb_zdotvs ! y(:) - complex The input vector containing the entries of sub( Y ). ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global sum, default: .true. ! subroutine psb_zmdots(res, x, y, desc_a, info,global) use psb_base_mod, psb_protect_name => psb_zmdots diff --git a/base/psblas/psb_znrm2.f90 b/base/psblas/psb_znrm2.f90 index 1e1cac26..9f327773 100644 --- a/base/psblas/psb_znrm2.f90 +++ b/base/psblas/psb_znrm2.f90 @@ -32,9 +32,9 @@ ! File: psb_znrm2.f90 ! ! Function: psb_znrm2 -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, ! -! norm2 := sqrt ( sub( X )**T * sub( X ) ) +! norm2 := sqrt ( sub( X )**C * sub( X ) ) ! ! where sub( X ) denotes X(:,JX). ! @@ -43,6 +43,7 @@ ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code ! jx - integer(optional). The column offset for sub( X ). +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_znrm2(x, desc_a, info, jx,global) result(res) use psb_desc_mod @@ -170,14 +171,15 @@ end function psb_znrm2 !!$ ! ! Function: psb_znrm2 -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, ! -! norm2 := sqrt ( X**T * X) +! norm2 := sqrt ( X**C * X) ! ! Arguments: ! x(:) - complex The input vector containing the entries of X. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_znrm2v(x, desc_a, info,global) result(res) use psb_desc_mod @@ -263,7 +265,17 @@ function psb_znrm2v(x, desc_a, info,global) result(res) end function psb_znrm2v - +! Function: psb_znrm2_vect +! Computes the norm2 of a distributed vector, +! +! norm2 := sqrt ( X**C * X) +! +! Arguments: +! x - type(psb_z_vect_type) The input vector containing the entries of X. +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. +! function psb_znrm2_vect(x, desc_a, info,global) result(res) use psb_desc_mod use psb_check_mod @@ -344,7 +356,7 @@ function psb_znrm2_vect(x, desc_a, info,global) result(res) res = res - sqrt(zone - dd*(abs(x%v%v(idx))/res)**2) end do end if - else + else res = dzero end if @@ -392,15 +404,16 @@ end function psb_znrm2_vect !!$ ! ! Subroutine: psb_znrm2vs -! Forms the norm2 of a distributed vector, +! Computes the norm2 of a distributed vector, subroutine version ! -! norm2 := sqrt ( X**T * X) +! norm2 := sqrt ( X**C * X) ! ! Arguments: ! res - real The result. ! x(:) - complex The input vector containing the entries of X. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! subroutine psb_znrm2vs(res, x, desc_a, info,global) use psb_desc_mod diff --git a/base/psblas/psb_znrmi.f90 b/base/psblas/psb_znrmi.f90 index 9e0440ff..19b071e4 100644 --- a/base/psblas/psb_znrmi.f90 +++ b/base/psblas/psb_znrmi.f90 @@ -32,14 +32,15 @@ ! File: psb_znrmi.f90 ! ! Function: psb_znrmi -! Forms the infinity norm of a sparse matrix, +! Computes the infinity norm of a sparse matrix, ! -! nrmi := max_i(abs(sum(A(i,:)))) +! nrmi := max_i(sum(abs(A(i,:)))) ! ! Arguments: ! a - type(psb_zspmat_type). The sparse matrix containing A. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_znrmi(a,desc_a,info,global) result(res) use psb_base_mod, psb_protect_name => psb_znrmi diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index 83c42bc0..add3205b 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -31,6 +31,235 @@ ! ! File: psb_zspmm.f90 ! +! +! Subroutine: psb_zspm_vect +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A' * Pr * X + beta * Y, +! +! alpha and beta are scalars, X and Y are distributed +! vectors and A is a M-by-N distributed matrix. +! +! Arguments: +! alpha - complex The scalar alpha. +! a - type(psb_zspmat_type). The sparse matrix containing A. +! x - type(psb_z_vect_type) The input vector containing the entries of ( X ). +! beta - complex The scalar beta. +! y - type(psb_z_vect_type) The input vector containing the entries of ( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! trans - character(optional). Whether A or A'. Default: 'N' +! work(:) - complex,(optional). Working area. +! doswap - logical(optional). Whether to performe halo updates. +! +subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& + & trans, work, doswap) + use psb_base_mod, psb_protect_name => psb_zspmv_vect + use psi_mod + implicit none + + complex(psb_dpk_), intent(in) :: alpha, beta + type(psb_z_vect_type), intent(inout) :: x + type(psb_z_vect_type), intent(inout) :: y + type(psb_zspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + complex(psb_dpk_), optional, target, intent(inout) :: work(:) + character, intent(in), optional :: trans + logical, intent(in), optional :: doswap + + ! locals + integer(psb_ipk_) :: ictxt, np, me,& + & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, ib, ip, idx + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja + integer(psb_ipk_), parameter :: nb=4 + complex(psb_dpk_), pointer :: iwork(:), xp(:), yp(:) + complex(psb_dpk_), allocatable :: xvsave(:) + character :: trans_ + character(len=20) :: name, ch_err + logical :: aliw, doswap_ + integer(psb_ipk_) :: debug_level, debug_unit + + name='psb_zspmv' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt=desc_a%get_context() + call psb_info(ictxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(doswap)) then + doswap_ = doswap + else + doswap_ = .true. + endif + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + endif + if ( (trans_ == 'N').or.(trans_ == 'T')& + & .or.(trans_ == 'C')) then + else + info = psb_err_iarg_invalid_value_ + call psb_errpush(info,name) + goto 9999 + end if + + m = desc_a%get_global_rows() + n = desc_a%get_global_cols() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + lldx = x%get_nrows() + lldy = y%get_nrows() + if ((info == 0).and.(lldx null() + ! check for presence/size of a work area + liwork= 2*ncol + + if (present(work)) then + if (size(work) >= liwork) then + aliw =.false. + else + aliw=.true. + endif + else + aliw=.true. + end if + + if (aliw) then + allocate(iwork(liwork),stat=info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='Allocate' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + iwork => work + endif + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' Allocated work ', info + + if (trans_ == 'N') then + ! Matrix is not transposed + + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + + if(info /= psb_success_) then + info = psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + else + ! Matrix is transposed + + ! + ! Non-empty overlap, need a buffer to hold + ! the entries updated with average operator. + ! Why the average? because in this way they will contribute + ! with a proper scale factor (1/np) to the overall product. + ! + call psi_ovrl_save(x%v,xvsave,desc_a,info) + if (info == psb_success_) call psi_ovrl_upd(x%v,desc_a,psb_avg_,info) + + if (beta /= zzero) call y%set(zzero,nrow+1,ncol) + ! local Matrix-vector product + if (info == psb_success_) call psb_csmm(alpha,a,x,beta,y,info,trans=trans_) + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' csmm ', info + + if (info == psb_success_) call psi_ovrl_restore(x%v,xvsave,desc_a,info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='psb_csmm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (doswap_) then + call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& + & zone,y%v,desc_a,iwork,info) + if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & zone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) + + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' swaptran ', info + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='PSI_SwapTran' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + end if + + if (aliw) deallocate(iwork,stat=info) + if (debug_level >= psb_debug_comp_) & + & write(debug_unit,*) me,' ',trim(name),' deallocat ',aliw, info + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='Deallocate iwork' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nullify(iwork) + + call psb_erractionrestore(err_act) + if (debug_level >= psb_debug_comp_) then + call psb_barrier(ictxt) + write(debug_unit,*) me,' ',trim(name),' Returning ' + endif + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psb_zspmv_vect +! ! Subroutine: psb_zspmm ! Performs one of the distributed matrix-vector operations ! @@ -356,10 +585,6 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_zspmm - - - - !!$ !!$ Parallel Sparse BLAS version 3.5 !!$ (C) Copyright 2006-2018 @@ -670,211 +895,3 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_zspmv - - - -subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& - & trans, work, doswap) - use psb_base_mod, psb_protect_name => psb_zspmv_vect - use psi_mod - implicit none - - complex(psb_dpk_), intent(in) :: alpha, beta - type(psb_z_vect_type), intent(inout) :: x - type(psb_z_vect_type), intent(inout) :: y - type(psb_zspmat_type), intent(in) :: a - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - complex(psb_dpk_), optional, target, intent(inout) :: work(:) - character, intent(in), optional :: trans - logical, intent(in), optional :: doswap - - ! locals - integer(psb_ipk_) :: ictxt, np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, ib, ip, idx - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja - integer(psb_ipk_), parameter :: nb=4 - complex(psb_dpk_), pointer :: iwork(:), xp(:), yp(:) - complex(psb_dpk_), allocatable :: xvsave(:) - character :: trans_ - character(len=20) :: name, ch_err - logical :: aliw, doswap_ - integer(psb_ipk_) :: debug_level, debug_unit - - name='psb_zspmv' - info=psb_success_ - call psb_erractionsave(err_act) - if (psb_errstatus_fatal()) then - info = psb_err_internal_error_ ; goto 9999 - end if - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - ictxt=desc_a%get_context() - call psb_info(ictxt, me, np) - if (np == -1) then - info = psb_err_context_error_ - call psb_errpush(info,name) - goto 9999 - endif - - if (.not.allocated(x%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - if (.not.allocated(y%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - - if (present(doswap)) then - doswap_ = doswap - else - doswap_ = .true. - endif - - if (present(trans)) then - trans_ = psb_toupper(trans) - else - trans_ = 'N' - endif - if ( (trans_ == 'N').or.(trans_ == 'T')& - & .or.(trans_ == 'C')) then - else - info = psb_err_iarg_invalid_value_ - call psb_errpush(info,name) - goto 9999 - end if - - m = desc_a%get_global_rows() - n = desc_a%get_global_cols() - nrow = desc_a%get_local_rows() - ncol = desc_a%get_local_cols() - lldx = x%get_nrows() - lldy = y%get_nrows() - if ((info == 0).and.(lldx null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='Allocate' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' Allocated work ', info - - if (trans_ == 'N') then - ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - - call psb_csmm(alpha,a,x,beta,y,info) - - if(info /= psb_success_) then - info = psb_err_from_subroutine_non_ - call psb_errpush(info,name) - goto 9999 - end if - - else - ! Matrix is transposed - - ! - ! Non-empty overlap, need a buffer to hold - ! the entries updated with average operator. - ! Why the average? because in this way they will contribute - ! with a proper scale factor (1/np) to the overall product. - ! - call psi_ovrl_save(x%v,xvsave,desc_a,info) - if (info == psb_success_) call psi_ovrl_upd(x%v,desc_a,psb_avg_,info) - - if (beta /= zzero) call y%set(zzero,nrow+1,ncol) - ! local Matrix-vector product - if (info == psb_success_) call psb_csmm(alpha,a,x,beta,y,info,trans=trans_) - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' csmm ', info - - if (info == psb_success_) call psi_ovrl_restore(x%v,xvsave,desc_a,info) - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='psb_csmm' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - if (doswap_) then - call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& - & zone,y%v,desc_a,iwork,info) - if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & zone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' swaptran ', info - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='PSI_SwapTran' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - end if - - end if - - if (aliw) deallocate(iwork,stat=info) - if (debug_level >= psb_debug_comp_) & - & write(debug_unit,*) me,' ',trim(name),' deallocat ',aliw, info - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='Deallocate iwork' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - nullify(iwork) - - call psb_erractionrestore(err_act) - if (debug_level >= psb_debug_comp_) then - call psb_barrier(ictxt) - write(debug_unit,*) me,' ',trim(name),' Returning ' - endif - return - -9999 call psb_error_handler(ictxt,err_act) - - return -end subroutine psb_zspmv_vect diff --git a/base/psblas/psb_zspnrm1.f90 b/base/psblas/psb_zspnrm1.f90 index 3c292a74..7bc5fa15 100644 --- a/base/psblas/psb_zspnrm1.f90 +++ b/base/psblas/psb_zspnrm1.f90 @@ -40,6 +40,7 @@ ! a - type(psb_zspmat_type). The sparse matrix containing A. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code +! global - logical(optional) Whether to perform the global reduction, default: .true. ! function psb_zspnrm1(a,desc_a,info,global) result(res) use psb_base_mod, psb_protect_name => psb_zspnrm1 diff --git a/base/psblas/psb_zspsm.f90 b/base/psblas/psb_zspsm.f90 index 13e00d3f..63cbe783 100644 --- a/base/psblas/psb_zspsm.f90 +++ b/base/psblas/psb_zspsm.f90 @@ -31,6 +31,203 @@ ! ! File: psb_zspsm.f90 ! +! +! Subroutine: psb_zsps_vect +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A-1 * Pc * X + beta * Y, or +! +! Y := alpha * D * Pr * A-1 * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A-1 * Pc * D * X + beta * Y, or +! +! Y := alpha * Pr * A-T * Pc * X + beta * Y, or +! +! Y := alpha * D * Pr * A-T * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A-T * Pc * D * X + beta * Y, or +! +! X is a distributed +! vector and T is a M-by-M distributed triangular matrix. +! +! +! Arguments: +! alpha - complex. The scalar alpha. +! a - type(psb_zspmat_type). The sparse matrix containing A. +! x - type(psb_z_vect_type) The input vector containing the entries of ( X ). +! beta - complex The scalar beta. +! y - type(psb_z_vect_type) The input vector containing the entries of ( Y ). +! desc_a - type(psb_desc_type). The communication descriptor. +! info - integer. Return code +! trans - character(optional). Whether A or A'. If not present 'N' is assumed. +! scale - character(optional). Specify some type of operation with +! the diagonal matrix D. +! choice - integer(optional). The kind of update to perform on overlap elements. +! d(:) - complex, optional Matrix for diagonal scaling. +! work(:) - complex, optional Working area. +! +subroutine psb_zspsv_vect(alpha,a,x,beta,y,desc_a,info,& + & trans, scale, choice, diag, work) + use psb_base_mod, psb_protect_name => psb_zspsv_vect + use psi_mod + implicit none + + complex(psb_dpk_), intent(in) :: alpha, beta + type(psb_z_vect_type), intent(inout) :: x + type(psb_z_vect_type), intent(inout) :: y + type(psb_zspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_ipk_), intent(out) :: info + type(psb_z_vect_type), intent(inout), optional :: diag + complex(psb_dpk_), optional, target, intent(inout) :: work(:) + character, intent(in), optional :: trans, scale + integer(psb_ipk_), intent(in), optional :: choice + + ! locals + integer(psb_ipk_) :: ictxt, np, me, & + & err_act, iix, jjx, ia, ja, iia, jja, lldx,lldy, choice_,& + & ix, iy, ik, jx, jy, i, lld,& + & m, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm + + character :: lscale + integer(psb_ipk_), parameter :: nb=4 + complex(psb_dpk_),pointer :: iwork(:), xp(:), yp(:) + character :: itrans + character(len=20) :: name, ch_err + logical :: aliw + + name='psb_zspsv_vect' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + + ictxt=desc_a%get_context() + + call psb_info(ictxt, me, np) + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + if (present(choice)) then + choice_ = choice + else + choice_ = psb_avg_ + endif + + if (present(scale)) then + lscale = psb_toupper(scale) + else + lscale = 'U' + endif + + if (present(trans)) then + itrans = psb_toupper(trans) + if((itrans == 'N').or.(itrans == 'T').or.(itrans == 'C')) then + ! Ok + else + info = psb_err_iarg_invalid_value_ + call psb_errpush(info,name) + goto 9999 + end if + else + itrans = 'N' + endif + + m = desc_a%get_global_rows() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + lldx = x%get_nrows() + lldy = y%get_nrows() + if ((info == 0).and.(lldx null() + ! check for presence/size of a work area + liwork= 2*ncol + + if (present(work)) then + if (size(work) >= liwork) then + aliw =.false. + else + aliw=.true. + endif + else + aliw=.true. + end if + + if (aliw) then + allocate(iwork(liwork),stat=info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + iwork => work + endif + + iwork(1)=0.d0 + + ! Perform local triangular system solve + if (present(diag)) then + call a%spsm(alpha,x,beta,y,info,scale=scale,d=diag,trans=trans) + else + call a%spsm(alpha,x,beta,y,info,scale=scale,trans=trans) + end if + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err='dcssm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! update overlap elements + if (choice_ > 0) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & zone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) + + + if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') + goto 9999 + end if + end if + + if (aliw) deallocate(iwork) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psb_zspsv_vect +! ! Subroutine: psb_zspsm ! Performs one of the distributed matrix-vector operations ! @@ -283,38 +480,6 @@ subroutine psb_zspsm(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_zspsm - -!!$ -!!$ Parallel Sparse BLAS version 3.5 -!!$ (C) Copyright 2006-2018 -!!$ Salvatore Filippone University of Rome Tor Vergata -!!$ Alfredo Buttari -!!$ -!!$ Redistribution and use in source and binary forms, with or without -!!$ modification, are permitted provided that the following conditions -!!$ are met: -!!$ 1. Redistributions of source code must retain the above copyright -!!$ notice, this list of conditions and the following disclaimer. -!!$ 2. Redistributions in binary form must reproduce the above copyright -!!$ notice, this list of conditions, and the following disclaimer in the -!!$ documentation and/or other materials provided with the distribution. -!!$ 3. The name of the PSBLAS group or the names of its contributors may -!!$ not be used to endorse or promote products derived from this -!!$ software without specific written permission. -!!$ -!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -!!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ ! ! Subroutine: psb_zspsv ! Performs one of the distributed matrix-vector operations @@ -545,166 +710,3 @@ subroutine psb_zspsv(alpha,a,x,beta,y,desc_a,info,& return end subroutine psb_zspsv - -subroutine psb_zspsv_vect(alpha,a,x,beta,y,desc_a,info,& - & trans, scale, choice, diag, work) - use psb_base_mod, psb_protect_name => psb_zspsv_vect - use psi_mod - implicit none - - complex(psb_dpk_), intent(in) :: alpha, beta - type(psb_z_vect_type), intent(inout) :: x - type(psb_z_vect_type), intent(inout) :: y - type(psb_zspmat_type), intent(inout) :: a - type(psb_desc_type), intent(in) :: desc_a - integer(psb_ipk_), intent(out) :: info - type(psb_z_vect_type), intent(inout), optional :: diag - complex(psb_dpk_), optional, target, intent(inout) :: work(:) - character, intent(in), optional :: trans, scale - integer(psb_ipk_), intent(in), optional :: choice - - ! locals - integer(psb_ipk_) :: ictxt, np, me, & - & err_act, iix, jjx, ia, ja, iia, jja, lldx,lldy, choice_,& - & ix, iy, ik, jx, jy, i, lld,& - & m, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm - - character :: lscale - integer(psb_ipk_), parameter :: nb=4 - complex(psb_dpk_),pointer :: iwork(:), xp(:), yp(:) - character :: itrans - character(len=20) :: name, ch_err - logical :: aliw - - name='psb_sspsv' - info=psb_success_ - call psb_erractionsave(err_act) - if (psb_errstatus_fatal()) then - info = psb_err_internal_error_ ; goto 9999 - end if - - ictxt=desc_a%get_context() - - call psb_info(ictxt, me, np) - if (np == -1) then - info = psb_err_context_error_ - call psb_errpush(info,name) - goto 9999 - endif - - if (.not.allocated(x%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - if (.not.allocated(y%v)) then - info = psb_err_invalid_vect_state_ - call psb_errpush(info,name) - goto 9999 - endif - - - if (present(choice)) then - choice_ = choice - else - choice_ = psb_avg_ - endif - - if (present(scale)) then - lscale = psb_toupper(scale) - else - lscale = 'U' - endif - - if (present(trans)) then - itrans = psb_toupper(trans) - if((itrans == 'N').or.(itrans == 'T').or.(itrans == 'C')) then - ! Ok - else - info = psb_err_iarg_invalid_value_ - call psb_errpush(info,name) - goto 9999 - end if - else - itrans = 'N' - endif - - m = desc_a%get_global_rows() - nrow = desc_a%get_local_rows() - ncol = desc_a%get_local_cols() - lldx = x%get_nrows() - lldy = y%get_nrows() - if ((info == 0).and.(lldx null() - ! check for presence/size of a work area - liwork= 2*ncol - - if (present(work)) then - if (size(work) >= liwork) then - aliw =.false. - else - aliw=.true. - endif - else - aliw=.true. - end if - - if (aliw) then - allocate(iwork(liwork),stat=info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_realloc' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - else - iwork => work - endif - - iwork(1)=0.d0 - - ! Perform local triangular system solve - if (present(diag)) then - call a%spsm(alpha,x,beta,y,info,scale=scale,d=diag,trans=trans) - else - call a%spsm(alpha,x,beta,y,info,scale=scale,trans=trans) - end if - if(info /= psb_success_) then - info = psb_err_from_subroutine_ - ch_err='dcssm' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - ! update overlap elements - if (choice_ > 0) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & zone,y%v,desc_a,iwork,info,data=psb_comm_ovr_) - - - if (info == psb_success_) call psi_ovrl_upd(y%v,desc_a,choice_,info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner updates') - goto 9999 - end if - end if - - if (aliw) deallocate(iwork) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(ictxt,err_act) - - return -end subroutine psb_zspsv_vect - diff --git a/base/serial/psb_camax_s.f90 b/base/serial/psb_camax_s.f90 index 7e608c8e..d9073d0f 100644 --- a/base/serial/psb_camax_s.f90 +++ b/base/serial/psb_camax_s.f90 @@ -31,7 +31,7 @@ ! ! ! Function: psb_camax_s -! Searches the absolute max of X. +! Computes the max absolute value of X. ! ! normi := max(abs(X(i)) ! diff --git a/base/serial/psb_casum_s.f90 b/base/serial/psb_casum_s.f90 index cf799cdc..1c357cc8 100644 --- a/base/serial/psb_casum_s.f90 +++ b/base/serial/psb_casum_s.f90 @@ -31,9 +31,9 @@ ! ! ! Function: psb_casum_s -! Searches the absolute max of X. +! Computes the sum of absolute values of X. ! -! normi := max(abs(X(i)) +! asum := sum(abs(X(:)) ! ! Arguments: ! n - integer size of X diff --git a/base/serial/psb_damax_s.f90 b/base/serial/psb_damax_s.f90 index f376f0ba..66aef288 100644 --- a/base/serial/psb_damax_s.f90 +++ b/base/serial/psb_damax_s.f90 @@ -31,7 +31,7 @@ ! ! ! Function: psb_damax_s -! Searches the absolute max of X. +! Computes the max absolute value of X. ! ! normi := max(abs(X(i)) ! diff --git a/base/serial/psb_dasum_s.f90 b/base/serial/psb_dasum_s.f90 index 6fb6e4f6..1ab42614 100644 --- a/base/serial/psb_dasum_s.f90 +++ b/base/serial/psb_dasum_s.f90 @@ -31,9 +31,9 @@ ! ! ! Function: psb_dasum_s -! Searches the absolute max of X. +! Computes the sum of absolute values of X. ! -! normi := max(abs(X(i)) +! asum := sum(abs(X(:)) ! ! Arguments: ! n - integer size of X diff --git a/base/serial/psb_samax_s.f90 b/base/serial/psb_samax_s.f90 index da7fc93e..f2b1fee2 100644 --- a/base/serial/psb_samax_s.f90 +++ b/base/serial/psb_samax_s.f90 @@ -31,7 +31,7 @@ ! ! ! Function: psb_samax_s -! Searches the absolute max of X. +! Computes the max absolute value of X. ! ! normi := max(abs(X(i)) ! diff --git a/base/serial/psb_sasum_s.f90 b/base/serial/psb_sasum_s.f90 index 7a3b44e2..c692cbd4 100644 --- a/base/serial/psb_sasum_s.f90 +++ b/base/serial/psb_sasum_s.f90 @@ -31,9 +31,9 @@ ! ! ! Function: psb_sasum_s -! Searches the absolute max of X. +! Computes the sum of absolute values of X. ! -! normi := max(abs(X(i)) +! asum := sum(abs(X(:)) ! ! Arguments: ! n - integer size of X diff --git a/base/serial/psb_zamax_s.f90 b/base/serial/psb_zamax_s.f90 index 8cac2e2e..4a408566 100644 --- a/base/serial/psb_zamax_s.f90 +++ b/base/serial/psb_zamax_s.f90 @@ -31,7 +31,7 @@ ! ! ! Function: psb_zamax_s -! Searches the absolute max of X. +! Computes the max absolute value of X. ! ! normi := max(abs(X(i)) ! diff --git a/base/serial/psb_zasum_s.f90 b/base/serial/psb_zasum_s.f90 index f00e4347..35a043ed 100644 --- a/base/serial/psb_zasum_s.f90 +++ b/base/serial/psb_zasum_s.f90 @@ -31,9 +31,9 @@ ! ! ! Function: psb_zasum_s -! Searches the absolute max of X. +! Computes the sum of absolute values of X. ! -! normi := max(abs(X(i)) +! asum := sum(abs(X(:)) ! ! Arguments: ! n - integer size of X diff --git a/test/pargen/psb_d_pde3d.f90 b/test/pargen/psb_d_pde3d.f90 index 60cb9e9e..429e9a0e 100644 --- a/test/pargen/psb_d_pde3d.f90 +++ b/test/pargen/psb_d_pde3d.f90 @@ -214,8 +214,7 @@ contains ! Note: integer control variables going directly into an MPI call ! must be 4 bytes, i.e. psb_mpk_ integer(psb_mpk_) :: npdims(3), npp, minfo - integer(psb_mpk_) :: npx,npy,npz, iamx,iamy,iamz - integer(psb_ipk_) :: mynx,myny,mynz + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) ! Process grid integer(psb_ipk_) :: np, iam @@ -236,9 +235,8 @@ contains call psb_erractionsave(err_act) call psb_info(ictxt, iam, np) - call psb_cd_set_large_threshold(1000) - call psb_cd_set_maxspace(-1) - + + if (present(f)) then f_ => f else @@ -373,49 +371,7 @@ contains ! the set of global indices it owns. ! call psb_cdall(ictxt,desc_a,info,vl=myidx) - - block - ! - ! Test adjcncy methods - ! - integer(psb_mpk_), allocatable :: neighbours(:) - integer(psb_mpk_) :: cnt - logical, parameter :: debug_adj=.false. - if (debug_adj.and.(np > 1)) then - cnt = 0 - allocate(neighbours(np)) - if (iamx < npx-1) then - cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx+1,iamy,iamz,npx,npy,npz,base=0) - end if - if (iamy < npy-1) then - cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy+1,iamz,npx,npy,npz,base=0) - end if - if (iamz < npz-1) then - cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy,iamz+1,npx,npy,npz,base=0) - end if - if (iamx >0) then - cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx-1,iamy,iamz,npx,npy,npz,base=0) - end if - if (iamy >0) then - cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy-1,iamz,npx,npy,npz,base=0) - end if - if (iamz >0) then - cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy,iamz-1,npx,npy,npz,base=0) - end if - call psb_realloc(cnt, neighbours,info) - call desc_a%set_p_adjcncy(neighbours) - write(0,*) iam,' Check on neighbours: ',desc_a%get_p_adjcncy() - end if - end block - - case default write(psb_err_unit,*) iam, 'Initialization error: should not get here' info = -1 diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index 82f0bce6..a0f28e99 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,7 +2,7 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO -040 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +080 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 0100 MAXIT