Update internal documentation.

5 years ago · e2bd21d9be
parent 92dae92582
commit e2bd21d9be
148 changed files with 3186 additions and 2292 deletions
--- a/base/comm/internals/psi_covrl_restr.f90
+++ b/base/comm/internals/psi_covrl_restr.f90
@ -28,8 +28,13 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_covrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
+!    
 !    
-
 subroutine  psi_covrl_restr_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_covrl_restr_vect
  use psb_c_base_vect_mod
--- a/base/comm/internals/psi_covrl_restr_a.f90
+++ b/base/comm/internals/psi_covrl_restr_a.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_covrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_covrl_restrr1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_covrl_restrr1
--- a/base/comm/internals/psi_covrl_save.f90
+++ b/base/comm/internals/psi_covrl_save.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_covrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_covrl_save_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_covrl_save_vect
--- a/base/comm/internals/psi_covrl_save_a.f90
+++ b/base/comm/internals/psi_covrl_save_a.f90
@ -28,8 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
-!    
-
+!
+!
+! Subroutine: psi_covrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 subroutine  psi_covrl_saver1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_covrl_saver1

--- a/base/comm/internals/psi_covrl_upd.f90
+++ b/base/comm/internals/psi_covrl_upd.f90
@ -28,8 +28,14 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_covrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
+!    
 !    
-
 subroutine  psi_covrl_upd_vect(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_covrl_upd_vect
  use psb_realloc_mod
--- a/base/comm/internals/psi_covrl_upd_a.f90
+++ b/base/comm/internals/psi_covrl_upd_a.f90
@ -28,6 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_covrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
 !    
 subroutine  psi_covrl_updr1(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_covrl_updr1
--- a/base/comm/internals/psi_cswapdata.F90
+++ b/base/comm/internals/psi_cswapdata.F90
@ -32,8 +32,10 @@
 !
 ! File: psi_cswapdata.F90
 !
-! Subroutine: psi_cswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!
+!
+! Subroutine: psi_cswapdata_vect
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -43,12 +45,15 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - complex                  Choose overwrite or sum. 
+!    y        - type(psb_@x@_vect_type) The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -83,14 +88,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-!
-!
-! Subroutine: psi_cswapdata_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
-!   
 ! 
 subroutine psi_cswapdata_vect(flag,beta,y,desc_a,work,info,data)

@ -426,7 +423,7 @@ end subroutine psi_cswap_vidx_vect
 ! Subroutine: psi_cswapdata_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_cswapdata_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_cswapdata_a.F90
+++ b/base/comm/internals/psi_cswapdata_a.F90
@ -33,9 +33,9 @@
 ! File: psi_cswapdata.F90
 !
 ! Subroutine: psi_cswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
-!   it is capable of pruning empty exchanges, which are very likely in out 
+!   it is capable of pruning empty exchanges, which are very likely in our
 !   application environment. All the variants have the same structure 
 !   In all these subroutines X may be:    I    Integer
 !                                         S    real(psb_spk_)
@ -49,6 +49,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +70,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - complex                  Choose overwrite or sum. 
+!    y(:,:)   - complex                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -507,7 +508,7 @@ end subroutine psi_cswapidxm
 !
 !
 ! Subroutine: psi_cswapdatav
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -523,6 +524,7 @@ end subroutine psi_cswapidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -543,10 +545,10 @@ end subroutine psi_cswapidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - complex                  Choose overwrite or sum. 
+!    y(:)     - complex                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_cswaptran.F90
+++ b/base/comm/internals/psi_cswaptran.F90
@ -32,8 +32,8 @@
 !
 ! File: psi_cswaptran.F90
 !
-! Subroutine: psi_cswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+! Subroutine: psi_cswaptran_vect
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -47,12 +47,16 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
+! 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +77,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
-!    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    beta     - complex                  Choose overwrite or sum. 
+!    y        - type(psb_c_vect_type) The data area                        
+!    desc_a   - type(psb_desc_type).    The communication descriptor.        
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -86,13 +90,6 @@
 !                                       psb_comm_ovrl_    use ovrl_index
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
-!
-!
-! Subroutine: psi_cswaptran_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
 !   
 subroutine psi_cswaptran_vect(flag,beta,y,desc_a,work,info,data)

@ -171,7 +168,7 @@ end subroutine psi_cswaptran_vect
 ! Subroutine: psi_ctran_vidx_vect
 !   Data exchange among processes.
 !
-!   Takes care of Y an exanspulated vector. Relies on the gather/scatter methods
+!   Takes care of Y an encapsulated vector. Relies on the gather/scatter methods
 !   of vectors. 
 !   
 !   The real workhorse: the outer routine will only choose the index list
@ -436,10 +433,10 @@ end subroutine psi_ctran_vidx_vect
 !
 !
 !
-! Subroutine: psi_cswaptran_vect
+! Subroutine: psi_cswaptran_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_cswaptran_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_cswaptran_a.F90
+++ b/base/comm/internals/psi_cswaptran_a.F90
@ -33,7 +33,7 @@
 ! File: psi_cswaptran.F90
 !
 ! Subroutine: psi_cswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -53,6 +53,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - complex                  Choose overwrite or sum. 
+!    y(:,:)   - complex                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -516,7 +517,7 @@ end subroutine psi_ctranidxm
 !
 !
 ! Subroutine: psi_cswaptranv
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -536,6 +537,7 @@ end subroutine psi_ctranidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -556,10 +558,10 @@ end subroutine psi_ctranidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - complex                  Choose overwrite or sum. 
+!    y(:)     - complex                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_dovrl_restr.f90
+++ b/base/comm/internals/psi_dovrl_restr.f90
@ -28,8 +28,13 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_dovrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
+!    
 !    
-
 subroutine  psi_dovrl_restr_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_dovrl_restr_vect
  use psb_d_base_vect_mod
--- a/base/comm/internals/psi_dovrl_restr_a.f90
+++ b/base/comm/internals/psi_dovrl_restr_a.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_dovrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_dovrl_restrr1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_dovrl_restrr1
--- a/base/comm/internals/psi_dovrl_save.f90
+++ b/base/comm/internals/psi_dovrl_save.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_dovrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_dovrl_save_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_dovrl_save_vect
--- a/base/comm/internals/psi_dovrl_save_a.f90
+++ b/base/comm/internals/psi_dovrl_save_a.f90
@ -28,8 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
-!    
-
+!
+!
+! Subroutine: psi_dovrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 subroutine  psi_dovrl_saver1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_dovrl_saver1

--- a/base/comm/internals/psi_dovrl_upd.f90
+++ b/base/comm/internals/psi_dovrl_upd.f90
@ -28,8 +28,14 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_dovrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
+!    
 !    
-
 subroutine  psi_dovrl_upd_vect(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_dovrl_upd_vect
  use psb_realloc_mod
--- a/base/comm/internals/psi_dovrl_upd_a.f90
+++ b/base/comm/internals/psi_dovrl_upd_a.f90
@ -28,6 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_dovrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
 !    
 subroutine  psi_dovrl_updr1(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_dovrl_updr1
--- a/base/comm/internals/psi_dswapdata.F90
+++ b/base/comm/internals/psi_dswapdata.F90
@ -32,8 +32,10 @@
 !
 ! File: psi_dswapdata.F90
 !
-! Subroutine: psi_dswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!
+!
+! Subroutine: psi_dswapdata_vect
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -43,12 +45,15 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - real                  Choose overwrite or sum. 
+!    y        - type(psb_@x@_vect_type) The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -83,14 +88,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-!
-!
-! Subroutine: psi_dswapdata_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
-!   
 ! 
 subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)

@ -426,7 +423,7 @@ end subroutine psi_dswap_vidx_vect
 ! Subroutine: psi_dswapdata_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_dswapdata_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_dswapdata_a.F90
+++ b/base/comm/internals/psi_dswapdata_a.F90
@ -33,9 +33,9 @@
 ! File: psi_dswapdata.F90
 !
 ! Subroutine: psi_dswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
-!   it is capable of pruning empty exchanges, which are very likely in out 
+!   it is capable of pruning empty exchanges, which are very likely in our
 !   application environment. All the variants have the same structure 
 !   In all these subroutines X may be:    I    Integer
 !                                         S    real(psb_spk_)
@ -49,6 +49,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +70,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - real                  Choose overwrite or sum. 
+!    y(:,:)   - real                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -507,7 +508,7 @@ end subroutine psi_dswapidxm
 !
 !
 ! Subroutine: psi_dswapdatav
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -523,6 +524,7 @@ end subroutine psi_dswapidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -543,10 +545,10 @@ end subroutine psi_dswapidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - real                  Choose overwrite or sum. 
+!    y(:)     - real                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_dswaptran.F90
+++ b/base/comm/internals/psi_dswaptran.F90
@ -32,8 +32,8 @@
 !
 ! File: psi_dswaptran.F90
 !
-! Subroutine: psi_dswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+! Subroutine: psi_dswaptran_vect
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -47,12 +47,16 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
+! 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +77,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
-!    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    beta     - real                  Choose overwrite or sum. 
+!    y        - type(psb_d_vect_type) The data area                        
+!    desc_a   - type(psb_desc_type).    The communication descriptor.        
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -86,13 +90,6 @@
 !                                       psb_comm_ovrl_    use ovrl_index
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
-!
-!
-! Subroutine: psi_dswaptran_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
 !   
 subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data)

@ -171,7 +168,7 @@ end subroutine psi_dswaptran_vect
 ! Subroutine: psi_dtran_vidx_vect
 !   Data exchange among processes.
 !
-!   Takes care of Y an exanspulated vector. Relies on the gather/scatter methods
+!   Takes care of Y an encapsulated vector. Relies on the gather/scatter methods
 !   of vectors. 
 !   
 !   The real workhorse: the outer routine will only choose the index list
@ -436,10 +433,10 @@ end subroutine psi_dtran_vidx_vect
 !
 !
 !
-! Subroutine: psi_dswaptran_vect
+! Subroutine: psi_dswaptran_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_dswaptran_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_dswaptran_a.F90
+++ b/base/comm/internals/psi_dswaptran_a.F90
@ -33,7 +33,7 @@
 ! File: psi_dswaptran.F90
 !
 ! Subroutine: psi_dswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -53,6 +53,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - real                  Choose overwrite or sum. 
+!    y(:,:)   - real                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -516,7 +517,7 @@ end subroutine psi_dtranidxm
 !
 !
 ! Subroutine: psi_dswaptranv
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -536,6 +537,7 @@ end subroutine psi_dtranidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -556,10 +558,10 @@ end subroutine psi_dtranidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - real                  Choose overwrite or sum. 
+!    y(:)     - real                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_eovrl_restr_a.f90
+++ b/base/comm/internals/psi_eovrl_restr_a.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_eovrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_eovrl_restrr1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_eovrl_restrr1
--- a/base/comm/internals/psi_eovrl_save_a.f90
+++ b/base/comm/internals/psi_eovrl_save_a.f90
@ -28,8 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
-!    
-
+!
+!
+! Subroutine: psi_eovrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 subroutine  psi_eovrl_saver1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_eovrl_saver1

--- a/base/comm/internals/psi_eovrl_upd_a.f90
+++ b/base/comm/internals/psi_eovrl_upd_a.f90
@ -28,6 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_eovrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
 !    
 subroutine  psi_eovrl_updr1(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_eovrl_updr1
--- a/base/comm/internals/psi_eswapdata_a.F90
+++ b/base/comm/internals/psi_eswapdata_a.F90
@ -33,9 +33,9 @@
 ! File: psi_eswapdata.F90
 !
 ! Subroutine: psi_eswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
-!   it is capable of pruning empty exchanges, which are very likely in out 
+!   it is capable of pruning empty exchanges, which are very likely in our
 !   application environment. All the variants have the same structure 
 !   In all these subroutines X may be:    I    Integer
 !                                         S    real(psb_spk_)
@ -49,6 +49,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +70,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - integer                  Choose overwrite or sum. 
+!    y(:,:)   - integer                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -507,7 +508,7 @@ end subroutine psi_eswapidxm
 !
 !
 ! Subroutine: psi_eswapdatav
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -523,6 +524,7 @@ end subroutine psi_eswapidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -543,10 +545,10 @@ end subroutine psi_eswapidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - integer                  Choose overwrite or sum. 
+!    y(:)     - integer                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_eswaptran_a.F90
+++ b/base/comm/internals/psi_eswaptran_a.F90
@ -33,7 +33,7 @@
 ! File: psi_eswaptran.F90
 !
 ! Subroutine: psi_eswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -53,6 +53,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - integer                  Choose overwrite or sum. 
+!    y(:,:)   - integer                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -516,7 +517,7 @@ end subroutine psi_etranidxm
 !
 !
 ! Subroutine: psi_eswaptranv
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -536,6 +537,7 @@ end subroutine psi_etranidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -556,10 +558,10 @@ end subroutine psi_etranidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - integer                  Choose overwrite or sum. 
+!    y(:)     - integer                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_iovrl_restr.f90
+++ b/base/comm/internals/psi_iovrl_restr.f90
@ -28,8 +28,13 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_iovrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
+!    
 !    
-
 subroutine  psi_iovrl_restr_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_iovrl_restr_vect
  use psb_i_base_vect_mod
--- a/base/comm/internals/psi_iovrl_save.f90
+++ b/base/comm/internals/psi_iovrl_save.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_iovrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_iovrl_save_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_iovrl_save_vect
--- a/base/comm/internals/psi_iovrl_upd.f90
+++ b/base/comm/internals/psi_iovrl_upd.f90
@ -28,8 +28,14 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_iovrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
+!    
 !    
-
 subroutine  psi_iovrl_upd_vect(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_iovrl_upd_vect
  use psb_realloc_mod
--- a/base/comm/internals/psi_iswapdata.F90
+++ b/base/comm/internals/psi_iswapdata.F90
@ -32,8 +32,10 @@
 !
 ! File: psi_iswapdata.F90
 !
-! Subroutine: psi_iswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!
+!
+! Subroutine: psi_iswapdata_vect
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -43,12 +45,15 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - integer                  Choose overwrite or sum. 
+!    y        - type(psb_@x@_vect_type) The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -83,14 +88,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-!
-!
-! Subroutine: psi_iswapdata_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
-!   
 ! 
 subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data)

@ -426,7 +423,7 @@ end subroutine psi_iswap_vidx_vect
 ! Subroutine: psi_iswapdata_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_iswapdata_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_iswaptran.F90
+++ b/base/comm/internals/psi_iswaptran.F90
@ -32,8 +32,8 @@
 !
 ! File: psi_iswaptran.F90
 !
-! Subroutine: psi_iswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+! Subroutine: psi_iswaptran_vect
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -47,12 +47,16 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
+! 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +77,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
-!    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    beta     - integer                  Choose overwrite or sum. 
+!    y        - type(psb_i_vect_type) The data area                        
+!    desc_a   - type(psb_desc_type).    The communication descriptor.        
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -86,13 +90,6 @@
 !                                       psb_comm_ovrl_    use ovrl_index
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
-!
-!
-! Subroutine: psi_iswaptran_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
 !   
 subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data)

@ -171,7 +168,7 @@ end subroutine psi_iswaptran_vect
 ! Subroutine: psi_itran_vidx_vect
 !   Data exchange among processes.
 !
-!   Takes care of Y an exanspulated vector. Relies on the gather/scatter methods
+!   Takes care of Y an encapsulated vector. Relies on the gather/scatter methods
 !   of vectors. 
 !   
 !   The real workhorse: the outer routine will only choose the index list
@ -436,10 +433,10 @@ end subroutine psi_itran_vidx_vect
 !
 !
 !
-! Subroutine: psi_iswaptran_vect
+! Subroutine: psi_iswaptran_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_iswaptran_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_lovrl_restr.f90
+++ b/base/comm/internals/psi_lovrl_restr.f90
@ -28,8 +28,13 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_lovrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
+!    
 !    
-
 subroutine  psi_lovrl_restr_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_lovrl_restr_vect
  use psb_l_base_vect_mod
--- a/base/comm/internals/psi_lovrl_save.f90
+++ b/base/comm/internals/psi_lovrl_save.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_lovrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_lovrl_save_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_lovrl_save_vect
--- a/base/comm/internals/psi_lovrl_upd.f90
+++ b/base/comm/internals/psi_lovrl_upd.f90
@ -28,8 +28,14 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_lovrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
+!    
 !    
-
 subroutine  psi_lovrl_upd_vect(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_lovrl_upd_vect
  use psb_realloc_mod
--- a/base/comm/internals/psi_lswapdata.F90
+++ b/base/comm/internals/psi_lswapdata.F90
@ -32,8 +32,10 @@
 !
 ! File: psi_lswapdata.F90
 !
-! Subroutine: psi_lswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!
+!
+! Subroutine: psi_lswapdata_vect
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -43,12 +45,15 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - integer                  Choose overwrite or sum. 
+!    y        - type(psb_@x@_vect_type) The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -83,14 +88,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-!
-!
-! Subroutine: psi_lswapdata_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
-!   
 ! 
 subroutine psi_lswapdata_vect(flag,beta,y,desc_a,work,info,data)

@ -426,7 +423,7 @@ end subroutine psi_lswap_vidx_vect
 ! Subroutine: psi_lswapdata_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_lswapdata_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_lswaptran.F90
+++ b/base/comm/internals/psi_lswaptran.F90
@ -32,8 +32,8 @@
 !
 ! File: psi_lswaptran.F90
 !
-! Subroutine: psi_lswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+! Subroutine: psi_lswaptran_vect
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -47,12 +47,16 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
+! 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +77,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
-!    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    beta     - integer                  Choose overwrite or sum. 
+!    y        - type(psb_l_vect_type) The data area                        
+!    desc_a   - type(psb_desc_type).    The communication descriptor.        
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -86,13 +90,6 @@
 !                                       psb_comm_ovrl_    use ovrl_index
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
-!
-!
-! Subroutine: psi_lswaptran_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
 !   
 subroutine psi_lswaptran_vect(flag,beta,y,desc_a,work,info,data)

@ -171,7 +168,7 @@ end subroutine psi_lswaptran_vect
 ! Subroutine: psi_ltran_vidx_vect
 !   Data exchange among processes.
 !
-!   Takes care of Y an exanspulated vector. Relies on the gather/scatter methods
+!   Takes care of Y an encapsulated vector. Relies on the gather/scatter methods
 !   of vectors. 
 !   
 !   The real workhorse: the outer routine will only choose the index list
@ -436,10 +433,10 @@ end subroutine psi_ltran_vidx_vect
 !
 !
 !
-! Subroutine: psi_lswaptran_vect
+! Subroutine: psi_lswaptran_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_lswaptran_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_movrl_restr_a.f90
+++ b/base/comm/internals/psi_movrl_restr_a.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_movrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_movrl_restrr1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_movrl_restrr1
--- a/base/comm/internals/psi_movrl_save_a.f90
+++ b/base/comm/internals/psi_movrl_save_a.f90
@ -28,8 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
-!    
-
+!
+!
+! Subroutine: psi_movrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 subroutine  psi_movrl_saver1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_movrl_saver1

--- a/base/comm/internals/psi_movrl_upd_a.f90
+++ b/base/comm/internals/psi_movrl_upd_a.f90
@ -28,6 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_movrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
 !    
 subroutine  psi_movrl_updr1(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_movrl_updr1
--- a/base/comm/internals/psi_mswapdata_a.F90
+++ b/base/comm/internals/psi_mswapdata_a.F90
@ -33,9 +33,9 @@
 ! File: psi_mswapdata.F90
 !
 ! Subroutine: psi_mswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
-!   it is capable of pruning empty exchanges, which are very likely in out 
+!   it is capable of pruning empty exchanges, which are very likely in our
 !   application environment. All the variants have the same structure 
 !   In all these subroutines X may be:    I    Integer
 !                                         S    real(psb_spk_)
@ -49,6 +49,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +70,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - integer                  Choose overwrite or sum. 
+!    y(:,:)   - integer                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -507,7 +508,7 @@ end subroutine psi_mswapidxm
 !
 !
 ! Subroutine: psi_mswapdatav
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -523,6 +524,7 @@ end subroutine psi_mswapidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -543,10 +545,10 @@ end subroutine psi_mswapidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - integer                  Choose overwrite or sum. 
+!    y(:)     - integer                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_mswaptran_a.F90
+++ b/base/comm/internals/psi_mswaptran_a.F90
@ -33,7 +33,7 @@
 ! File: psi_mswaptran.F90
 !
 ! Subroutine: psi_mswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -53,6 +53,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - integer                  Choose overwrite or sum. 
+!    y(:,:)   - integer                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -516,7 +517,7 @@ end subroutine psi_mtranidxm
 !
 !
 ! Subroutine: psi_mswaptranv
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -536,6 +537,7 @@ end subroutine psi_mtranidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -556,10 +558,10 @@ end subroutine psi_mtranidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - integer                  Choose overwrite or sum. 
+!    y(:)     - integer                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - integer                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_sovrl_restr.f90
+++ b/base/comm/internals/psi_sovrl_restr.f90
@ -28,8 +28,13 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_sovrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
+!    
 !    
-
 subroutine  psi_sovrl_restr_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_sovrl_restr_vect
  use psb_s_base_vect_mod
--- a/base/comm/internals/psi_sovrl_restr_a.f90
+++ b/base/comm/internals/psi_sovrl_restr_a.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_sovrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_sovrl_restrr1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_sovrl_restrr1
--- a/base/comm/internals/psi_sovrl_save.f90
+++ b/base/comm/internals/psi_sovrl_save.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_sovrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_sovrl_save_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_sovrl_save_vect
--- a/base/comm/internals/psi_sovrl_save_a.f90
+++ b/base/comm/internals/psi_sovrl_save_a.f90
@ -28,8 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
-!    
-
+!
+!
+! Subroutine: psi_sovrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 subroutine  psi_sovrl_saver1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_sovrl_saver1

--- a/base/comm/internals/psi_sovrl_upd.f90
+++ b/base/comm/internals/psi_sovrl_upd.f90
@ -28,8 +28,14 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_sovrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
+!    
 !    
-
 subroutine  psi_sovrl_upd_vect(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_sovrl_upd_vect
  use psb_realloc_mod
--- a/base/comm/internals/psi_sovrl_upd_a.f90
+++ b/base/comm/internals/psi_sovrl_upd_a.f90
@ -28,6 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_sovrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
 !    
 subroutine  psi_sovrl_updr1(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_sovrl_updr1
--- a/base/comm/internals/psi_sswapdata.F90
+++ b/base/comm/internals/psi_sswapdata.F90
@ -32,8 +32,10 @@
 !
 ! File: psi_sswapdata.F90
 !
-! Subroutine: psi_sswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!
+!
+! Subroutine: psi_sswapdata_vect
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -43,12 +45,15 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - real                  Choose overwrite or sum. 
+!    y        - type(psb_@x@_vect_type) The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -83,14 +88,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-!
-!
-! Subroutine: psi_sswapdata_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
-!   
 ! 
 subroutine psi_sswapdata_vect(flag,beta,y,desc_a,work,info,data)

@ -426,7 +423,7 @@ end subroutine psi_sswap_vidx_vect
 ! Subroutine: psi_sswapdata_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_sswapdata_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_sswapdata_a.F90
+++ b/base/comm/internals/psi_sswapdata_a.F90
@ -33,9 +33,9 @@
 ! File: psi_sswapdata.F90
 !
 ! Subroutine: psi_sswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
-!   it is capable of pruning empty exchanges, which are very likely in out 
+!   it is capable of pruning empty exchanges, which are very likely in our
 !   application environment. All the variants have the same structure 
 !   In all these subroutines X may be:    I    Integer
 !                                         S    real(psb_spk_)
@ -49,6 +49,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +70,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - real                  Choose overwrite or sum. 
+!    y(:,:)   - real                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -507,7 +508,7 @@ end subroutine psi_sswapidxm
 !
 !
 ! Subroutine: psi_sswapdatav
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -523,6 +524,7 @@ end subroutine psi_sswapidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -543,10 +545,10 @@ end subroutine psi_sswapidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - real                  Choose overwrite or sum. 
+!    y(:)     - real                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_sswaptran.F90
+++ b/base/comm/internals/psi_sswaptran.F90
@ -32,8 +32,8 @@
 !
 ! File: psi_sswaptran.F90
 !
-! Subroutine: psi_sswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+! Subroutine: psi_sswaptran_vect
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -47,12 +47,16 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
+! 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +77,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
-!    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    beta     - real                  Choose overwrite or sum. 
+!    y        - type(psb_s_vect_type) The data area                        
+!    desc_a   - type(psb_desc_type).    The communication descriptor.        
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -86,13 +90,6 @@
 !                                       psb_comm_ovrl_    use ovrl_index
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
-!
-!
-! Subroutine: psi_sswaptran_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
 !   
 subroutine psi_sswaptran_vect(flag,beta,y,desc_a,work,info,data)

@ -171,7 +168,7 @@ end subroutine psi_sswaptran_vect
 ! Subroutine: psi_stran_vidx_vect
 !   Data exchange among processes.
 !
-!   Takes care of Y an exanspulated vector. Relies on the gather/scatter methods
+!   Takes care of Y an encapsulated vector. Relies on the gather/scatter methods
 !   of vectors. 
 !   
 !   The real workhorse: the outer routine will only choose the index list
@ -436,10 +433,10 @@ end subroutine psi_stran_vidx_vect
 !
 !
 !
-! Subroutine: psi_sswaptran_vect
+! Subroutine: psi_sswaptran_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_sswaptran_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_sswaptran_a.F90
+++ b/base/comm/internals/psi_sswaptran_a.F90
@ -33,7 +33,7 @@
 ! File: psi_sswaptran.F90
 !
 ! Subroutine: psi_sswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -53,6 +53,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - real                  Choose overwrite or sum. 
+!    y(:,:)   - real                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -516,7 +517,7 @@ end subroutine psi_stranidxm
 !
 !
 ! Subroutine: psi_sswaptranv
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -536,6 +537,7 @@ end subroutine psi_stranidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -556,10 +558,10 @@ end subroutine psi_stranidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - real                  Choose overwrite or sum. 
+!    y(:)     - real                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - real                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_zovrl_restr.f90
+++ b/base/comm/internals/psi_zovrl_restr.f90
@ -28,8 +28,13 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_zovrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
+!    
 !    
-
 subroutine  psi_zovrl_restr_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_zovrl_restr_vect
  use psb_z_base_vect_mod
--- a/base/comm/internals/psi_zovrl_restr_a.f90
+++ b/base/comm/internals/psi_zovrl_restr_a.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_zovrl_restr
+!   These subroutines restore the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_zovrl_restrr1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_zovrl_restrr1
--- a/base/comm/internals/psi_zovrl_save.f90
+++ b/base/comm/internals/psi_zovrl_save.f90
@ -28,6 +28,11 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_zovrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 !    
 subroutine  psi_zovrl_save_vect(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_zovrl_save_vect
--- a/base/comm/internals/psi_zovrl_save_a.f90
+++ b/base/comm/internals/psi_zovrl_save_a.f90
@ -28,8 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
-!    
-
+!
+!
+! Subroutine: psi_zovrl_save
+!   These subroutines save the overlap region of a vector; they are used
+!   for the transpose  matrix-vector product when there is a nonempty overlap.
+!                                              
 subroutine  psi_zovrl_saver1(x,xs,desc_a,info)
  use psi_mod, psi_protect_name =>   psi_zovrl_saver1

--- a/base/comm/internals/psi_zovrl_upd.f90
+++ b/base/comm/internals/psi_zovrl_upd.f90
@ -28,8 +28,14 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_zovrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
+!    
 !    
-
 subroutine  psi_zovrl_upd_vect(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_zovrl_upd_vect
  use psb_realloc_mod
--- a/base/comm/internals/psi_zovrl_upd_a.f90
+++ b/base/comm/internals/psi_zovrl_upd_a.f90
@ -28,6 +28,12 @@
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
+!
+! Subroutine: psi_zovrl_update
+!   These subroutines update the overlap region of a vector; they are  used
+!   for the transpose  matrix-vector product when there is a nonempty overlap,
+!   or for the application of Additive Schwarz preconditioners.                                           
+!    
 !    
 subroutine  psi_zovrl_updr1(x,desc_a,update,info)
  use psi_mod, psi_protect_name =>   psi_zovrl_updr1
--- a/base/comm/internals/psi_zswapdata.F90
+++ b/base/comm/internals/psi_zswapdata.F90
@ -32,8 +32,10 @@
 !
 ! File: psi_zswapdata.F90
 !
-! Subroutine: psi_zswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!
+!
+! Subroutine: psi_zswapdata_vect
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -43,12 +45,15 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - complex                  Choose overwrite or sum. 
+!    y        - type(psb_@x@_vect_type) The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -83,14 +88,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-!
-!
-! Subroutine: psi_zswapdata_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
-!   
 ! 
 subroutine psi_zswapdata_vect(flag,beta,y,desc_a,work,info,data)

@ -426,7 +423,7 @@ end subroutine psi_zswap_vidx_vect
 ! Subroutine: psi_zswapdata_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_zswapdata_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_zswapdata_a.F90
+++ b/base/comm/internals/psi_zswapdata_a.F90
@ -33,9 +33,9 @@
 ! File: psi_zswapdata.F90
 !
 ! Subroutine: psi_zswapdatam
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
-!   it is capable of pruning empty exchanges, which are very likely in out 
+!   it is capable of pruning empty exchanges, which are very likely in our
 !   application environment. All the variants have the same structure 
 !   In all these subroutines X may be:    I    Integer
 !                                         S    real(psb_spk_)
@ -49,6 +49,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -69,10 +70,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - complex                  Choose overwrite or sum. 
+!    y(:,:)   - complex                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -507,7 +508,7 @@ end subroutine psi_zswapidxm
 !
 !
 ! Subroutine: psi_zswapdatav
-!   Does the data exchange among processes. Essentially this is doing 
+!   Implements the data exchange among processes. Essentially this is doing 
 !   a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but 
 !   it is capable of pruning empty exchanges, which are very likely in out 
 !   application environment. All the variants have the same structure 
@ -523,6 +524,7 @@ end subroutine psi_zswapidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -543,10 +545,10 @@ end subroutine psi_zswapidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - complex                  Choose overwrite or sum. 
+!    y(:)     - complex                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/internals/psi_zswaptran.F90
+++ b/base/comm/internals/psi_zswaptran.F90
@ -32,8 +32,8 @@
 !
 ! File: psi_zswaptran.F90
 !
-! Subroutine: psi_zswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+! Subroutine: psi_zswaptran_vect
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -47,12 +47,16 @@
 !                                         C    complex(psb_spk_)
 !                                         Z    complex(psb_dpk_)
 !   Basically the operation is as follows: on each process, we identify 
-!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y)));
-!   then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y 
-!   but only on the elements involved in the UNPACK operation. 
+!   sections SND(Y) and RCV(Y); then we do a SEND(PACK(GTH(SND(Y))));
+!   then we receive, and we do an update with Y = SCT(RCV(Y)) + BETA * Y 
+!   but only on the elements involved in the SCT operation. 
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
+!   This version works on encapsulated vectors, and uses their methods to do  GTH and SCT,
+!   so that special versions (i.e. GPU vectors can override them 
+! 
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +77,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
-!    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    beta     - complex                  Choose overwrite or sum. 
+!    y        - type(psb_z_vect_type) The data area                        
+!    desc_a   - type(psb_desc_type).    The communication descriptor.        
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -86,13 +90,6 @@
 !                                       psb_comm_ovrl_    use ovrl_index
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
-!
-!
-! Subroutine: psi_zswaptran_vect
-!   Data exchange among processes.
-!
-!   Takes care of Y an exanspulated vector.
-!   
 !   
 subroutine psi_zswaptran_vect(flag,beta,y,desc_a,work,info,data)

@ -171,7 +168,7 @@ end subroutine psi_zswaptran_vect
 ! Subroutine: psi_ztran_vidx_vect
 !   Data exchange among processes.
 !
-!   Takes care of Y an exanspulated vector. Relies on the gather/scatter methods
+!   Takes care of Y an encapsulated vector. Relies on the gather/scatter methods
 !   of vectors. 
 !   
 !   The real workhorse: the outer routine will only choose the index list
@ -436,10 +433,10 @@ end subroutine psi_ztran_vidx_vect
 !
 !
 !
-! Subroutine: psi_zswaptran_vect
+! Subroutine: psi_zswaptran_multivect
 !   Data exchange among processes.
 !
-!   Takes care of Y an encaspulated vector.
+!   Takes care of Y an encaspulated multivector.
 !   
 !   
 subroutine psi_zswaptran_multivect(flag,beta,y,desc_a,work,info,data)
--- a/base/comm/internals/psi_zswaptran_a.F90
+++ b/base/comm/internals/psi_zswaptran_a.F90
@ -33,7 +33,7 @@
 ! File: psi_zswaptran.F90
 !
 ! Subroutine: psi_zswaptranm
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -53,6 +53,7 @@
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -73,10 +74,10 @@
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:,:)   - X                       The data area                        
+!    beta     - complex                  Choose overwrite or sum. 
+!    y(:,:)   - complex                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
@ -516,7 +517,7 @@ end subroutine psi_ztranidxm
 !
 !
 ! Subroutine: psi_zswaptranv
-!   Does the data exchange among processes. This is similar to Xswapdata, but
+!   Implements the data exchange among processes. This is similar to Xswapdata, but
 !   the list is read "in reverse", i.e. indices that are normally SENT are used 
 !   for the RECEIVE part and vice-versa. This is the basic data exchange operation
 !   for doing the product of a sparse matrix by a vector. 
@ -536,6 +537,7 @@ end subroutine psi_ztranidxm
 !   Thus: for halo data exchange, the receive section is confined in the 
 !   halo indices, and BETA=0, whereas for overlap exchange the receive section 
 !   is scattered in the owned indices, and BETA=1.
+!   The first routine picks the desired exchange index list and passes it to the second.
 ! 
 ! Arguments: 
 !    flag     - integer                 Choose the algorithm for data exchange: 
@ -556,10 +558,10 @@ end subroutine psi_ztranidxm
 !
 !
 !    n        - integer                 Number of columns in Y               
-!    beta     - X                       Choose overwrite or sum. 
-!    y(:)     - X                       The data area                        
+!    beta     - complex                  Choose overwrite or sum. 
+!    y(:)     - complex                  The data area                        
 !    desc_a   - type(psb_desc_type).  The communication descriptor.        
-!    work(:)  - X                       Buffer space. If not sufficient, will do 
+!    work(:)  - complex                  Buffer space. If not sufficient, will do 
 !                                       our own internal allocation.
 !    info     - integer.                return code.
 !    data     - integer                 which list is to be used to exchange data
--- a/base/comm/psb_cgather.f90
+++ b/base/comm/psb_cgather.f90
@ -31,14 +31,14 @@
 !    
 ! File:  psb_cgather.f90
 !
-! Subroutine: psb_cgatherm
-!   This subroutine gathers pieces of a distributed dense matrix into a local one.
+! Subroutine: psb_cgather_vect
+!   This subroutine gathers pieces of a distributed vector into a local one.
 !
 ! Arguments:
-!   globx     -  complex,dimension(:,:).          The local matrix into which gather 
+!   globx     -  complex,dimension(:).           The local matrix into which gather 
 !                                                  the distributed pieces.
-!   locx      -  complex,dimension(:,:).          The local piece of the distributed 
-!                                                  matrix to be gathered.
+!   locx      -  type(psb_c_vect_type@         The local piece of the distributed 
+!                                                  vector to be gathered.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer.                      The process that has to own the 
@ -159,6 +159,8 @@ subroutine  psb_cgather_vect(globx, locx, desc_a, info, iroot)

 end subroutine psb_cgather_vect

+! Subroutine: psb_cgather_multivect
+!   This subroutine gathers pieces of a distributed multivector into a local one.

 subroutine  psb_cgather_multivect(globx, locx, desc_a, info, iroot)
  use psb_base_mod, psb_protect_name => psb_cgather_multivect
--- a/base/comm/psb_cgather_a.f90
+++ b/base/comm/psb_cgather_a.f90
@ -31,7 +31,7 @@
 !    
 ! File:  psb_cgather.f90
 !
-! Subroutine: psb_cgatherm
+! Subroutine: psb_cgather
 !   This subroutine gathers pieces of a distributed dense matrix into a local one.
 !
 ! Arguments:
--- a/base/comm/psb_chalo.f90
+++ b/base/comm/psb_chalo.f90
@ -31,17 +31,17 @@
 !    
 ! File:  psb_chalo.f90
 !
-! Subroutine: psb_chalom
+! Subroutine: psb_chalo_vect
 !   This subroutine performs the exchange of the halo elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed vector between all the processes.
 !
 ! Arguments:
-!   x         -  complex,dimension(:,:).          The local part of the dense matrix.
+!   x         -  type(psb_c_vect_type)    The local part of the vector
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Return code
 !   jx        -  integer(optional).            The starting column of the global matrix. 
 !   ik        -  integer(optional).            The number of columns to gather. 
-!   work      -  complex(optional).            Work  area.
+!   work      -  complex(optional).             Work  area.
 !   tran      -  character(optional).          Transpose exchange.
 !   mode      -  integer(optional).            Communication mode (see Swapdata)
 !   data     - integer                 Which index list in desc_a should be used
@ -52,7 +52,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-
 subroutine  psb_chalo_vect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_chalo_vect
  use psi_mod
@ -185,7 +184,28 @@ subroutine  psb_chalo_vect(x,desc_a,info,work,tran,mode,data)
    return
 end subroutine psb_chalo_vect

-
+!
+! Subroutine: psb_chalo_multivect
+!   This subroutine performs the exchange of the halo elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x         -  type(psb_c_multivect_type)    The local part of the multivector
+!   desc_a    -  type(psb_desc_type).        The communication descriptor.
+!   info      -  integer.                      Return code
+!   jx        -  integer(optional).            The starting column of the global matrix. 
+!   ik        -  integer(optional).            The number of columns to gather. 
+!   work      -  complex(optional).             Work  area.
+!   tran      -  character(optional).          Transpose exchange.
+!   mode      -  integer(optional).            Communication mode (see Swapdata)
+!   data     - integer                 Which index list in desc_a should be used
+!                                      to retrieve rows, default psb_comm_halo_
+!                                       psb_comm_halo_    use halo_index
+!                                       psb_comm_ext_     use ext_index 
+!                                       psb_comm_ovrl_    use ovrl_index
+!                                       psb_comm_mov_     use ovr_mst_idx
+!
+!
 subroutine  psb_chalo_multivect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_chalo_multivect
  use psi_mod
--- a/base/comm/psb_covrl.f90
+++ b/base/comm/psb_covrl.f90
@ -32,12 +32,12 @@
 !
 ! File:  psb_covrl.f90
 !
-! Subroutine: psb_covrlm
+! Subroutine: psb_covrl_vect
 !   This subroutine performs the exchange of the overlap elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed dense vector between all the processes.
 !
 ! Arguments:
-!   x(:,:)      -  complex                   The local part of the dense matrix.
+!   x           -  type(psb_c_vect_type)  The local part of the vector
 !   desc_a      -  type(psb_desc_type).    The communication descriptor.
 !   info        -  integer.                  Return code.
 !   jx          -  integer(optional).        The starting column of the global matrix
@ -180,7 +180,38 @@ subroutine  psb_covrl_vect(x,desc_a,info,work,update,mode)
    return
 end subroutine psb_covrl_vect

-
+!
+! Subroutine: psb_covrl_multivect
+!   This subroutine performs the exchange of the overlap elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x           -  type(psb_c_vect_type)  The local part of the multivector
+!   desc_a      -  type(psb_desc_type).    The communication descriptor.
+!   info        -  integer.                  Return code.
+!   jx          -  integer(optional).        The starting column of the global matrix
+!   ik          -  integer(optional).        The number of columns to gather. 
+!   work        -  complex(optional).           A work area.
+!   update      -  integer(optional).        Type of update:
+!                                            psb_none_   do nothing
+!                                            psb_sum_    sum of overlaps
+!                                            psb_avg_    average of overlaps
+!   mode        -  integer(optional).        Choose the algorithm for data exchange: 
+!                                       this is chosen through bit fields. 
+!                                       - swap_mpi  = iand(flag,psb_swap_mpi_)  /= 0
+!                                       - swap_sync = iand(flag,psb_swap_sync_) /= 0
+!                                       - swap_send = iand(flag,psb_swap_send_) /= 0
+!                                       - swap_recv = iand(flag,psb_swap_recv_) /= 0
+!                                       - if (swap_mpi):  use underlying MPI_ALLTOALLV.
+!                                       - if (swap_sync): use PSB_SND and PSB_RCV in 
+!                                                       synchronized pairs
+!                                       - if (swap_send .and. swap_recv): use mpi_irecv 
+!                                                       and mpi_send
+!                                       - if (swap_send): use psb_snd (but need another 
+!                                                       call with swap_recv to complete)
+!                                       - if (swap_recv): use psb_rcv (completing a 
+!                                                       previous call with swap_send)
+!
 subroutine  psb_covrl_multivect(x,desc_a,info,work,update,mode)
  use psb_base_mod, psb_protect_name => psb_covrl_multivect
  use psi_mod
--- a/base/comm/psb_cscatter.F90
+++ b/base/comm/psb_cscatter.F90
@ -31,13 +31,13 @@
 !    
 ! File:  psb_cscatter.f90
 !
-! Subroutine: psb_cscatterm
-!   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+! Subroutine: psb_cscatter_vect
+!   This subroutine scatters a global vector locally owned by one process
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
-!   globx     -  complex,dimension(:,:).       The global matrix to scatter.
-!   locx      -  complex,dimension(:,:).       The local piece of the distributed matrix.
+!   globx     -  complex,dimension(:)          The global matrix to scatter.
+!   locx      -  type(psb_c_vect_type)      The local piece of the distributed matrix.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer(optional).            The process that owns the global matrix. 
--- a/base/comm/psb_cscatter_a.F90
+++ b/base/comm/psb_cscatter_a.F90
@ -33,7 +33,7 @@
 !
 ! Subroutine: psb_cscatterm
 !   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
 !   globx     -  complex,dimension(:,:).       The global matrix to scatter.
@ -278,7 +278,7 @@ end subroutine psb_cscatterm

 ! Subroutine: psb_cscatterv
 !   This subroutine scatters a global vector locally owned by one process
-!   into pieces that are local to alle the processes.
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
 !   globx     -  complex,dimension(:).         The global vector to scatter.
--- a/base/comm/psb_cspgather.F90
+++ b/base/comm/psb_cspgather.F90
@ -30,6 +30,17 @@
 !   
 !    
 ! File:  psb_cspgather.f90
+!
+! Gathers a sparse matrix onto a single process.
+! Two variants:
+! 1. Gathers to PSB_c_SPARSE_MAT   (i.e. to matrix with IPK_ indices)
+! 2. Gathers to PSB_lc_SPARSE_MAT  (i.e. to matrix with LPK_ indices)
+!
+! Note: this function uses MPI_ALLGATHERV. At this time, the size of the
+! resulting matrix must be within the range of 4 bytes because of the
+! restriction on MPI displacements to be 4 bytes. 
+! 
+!
 subroutine  psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc)
 #if defined(HAVE_ISO_FORTRAN_ENV)
  use iso_fortran_env
--- a/base/comm/psb_dgather.f90
+++ b/base/comm/psb_dgather.f90
@ -31,14 +31,14 @@
 !    
 ! File:  psb_dgather.f90
 !
-! Subroutine: psb_dgatherm
-!   This subroutine gathers pieces of a distributed dense matrix into a local one.
+! Subroutine: psb_dgather_vect
+!   This subroutine gathers pieces of a distributed vector into a local one.
 !
 ! Arguments:
-!   globx     -  real,dimension(:,:).          The local matrix into which gather 
+!   globx     -  real,dimension(:).           The local matrix into which gather 
 !                                                  the distributed pieces.
-!   locx      -  real,dimension(:,:).          The local piece of the distributed 
-!                                                  matrix to be gathered.
+!   locx      -  type(psb_d_vect_type@         The local piece of the distributed 
+!                                                  vector to be gathered.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer.                      The process that has to own the 
@ -159,6 +159,8 @@ subroutine  psb_dgather_vect(globx, locx, desc_a, info, iroot)

 end subroutine psb_dgather_vect

+! Subroutine: psb_dgather_multivect
+!   This subroutine gathers pieces of a distributed multivector into a local one.

 subroutine  psb_dgather_multivect(globx, locx, desc_a, info, iroot)
  use psb_base_mod, psb_protect_name => psb_dgather_multivect
--- a/base/comm/psb_dgather_a.f90
+++ b/base/comm/psb_dgather_a.f90
@ -31,7 +31,7 @@
 !    
 ! File:  psb_dgather.f90
 !
-! Subroutine: psb_dgatherm
+! Subroutine: psb_dgather
 !   This subroutine gathers pieces of a distributed dense matrix into a local one.
 !
 ! Arguments:
--- a/base/comm/psb_dhalo.f90
+++ b/base/comm/psb_dhalo.f90
@ -31,17 +31,17 @@
 !    
 ! File:  psb_dhalo.f90
 !
-! Subroutine: psb_dhalom
+! Subroutine: psb_dhalo_vect
 !   This subroutine performs the exchange of the halo elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed vector between all the processes.
 !
 ! Arguments:
-!   x         -  real,dimension(:,:).          The local part of the dense matrix.
+!   x         -  type(psb_d_vect_type)    The local part of the vector
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Return code
 !   jx        -  integer(optional).            The starting column of the global matrix. 
 !   ik        -  integer(optional).            The number of columns to gather. 
-!   work      -  real(optional).            Work  area.
+!   work      -  real(optional).             Work  area.
 !   tran      -  character(optional).          Transpose exchange.
 !   mode      -  integer(optional).            Communication mode (see Swapdata)
 !   data     - integer                 Which index list in desc_a should be used
@ -52,7 +52,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-
 subroutine  psb_dhalo_vect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_dhalo_vect
  use psi_mod
@ -185,7 +184,28 @@ subroutine  psb_dhalo_vect(x,desc_a,info,work,tran,mode,data)
    return
 end subroutine psb_dhalo_vect

-
+!
+! Subroutine: psb_dhalo_multivect
+!   This subroutine performs the exchange of the halo elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x         -  type(psb_d_multivect_type)    The local part of the multivector
+!   desc_a    -  type(psb_desc_type).        The communication descriptor.
+!   info      -  integer.                      Return code
+!   jx        -  integer(optional).            The starting column of the global matrix. 
+!   ik        -  integer(optional).            The number of columns to gather. 
+!   work      -  real(optional).             Work  area.
+!   tran      -  character(optional).          Transpose exchange.
+!   mode      -  integer(optional).            Communication mode (see Swapdata)
+!   data     - integer                 Which index list in desc_a should be used
+!                                      to retrieve rows, default psb_comm_halo_
+!                                       psb_comm_halo_    use halo_index
+!                                       psb_comm_ext_     use ext_index 
+!                                       psb_comm_ovrl_    use ovrl_index
+!                                       psb_comm_mov_     use ovr_mst_idx
+!
+!
 subroutine  psb_dhalo_multivect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_dhalo_multivect
  use psi_mod
--- a/base/comm/psb_dovrl.f90
+++ b/base/comm/psb_dovrl.f90
@ -32,12 +32,12 @@
 !
 ! File:  psb_dovrl.f90
 !
-! Subroutine: psb_dovrlm
+! Subroutine: psb_dovrl_vect
 !   This subroutine performs the exchange of the overlap elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed dense vector between all the processes.
 !
 ! Arguments:
-!   x(:,:)      -  real                   The local part of the dense matrix.
+!   x           -  type(psb_d_vect_type)  The local part of the vector
 !   desc_a      -  type(psb_desc_type).    The communication descriptor.
 !   info        -  integer.                  Return code.
 !   jx          -  integer(optional).        The starting column of the global matrix
@ -180,7 +180,38 @@ subroutine  psb_dovrl_vect(x,desc_a,info,work,update,mode)
    return
 end subroutine psb_dovrl_vect

-
+!
+! Subroutine: psb_dovrl_multivect
+!   This subroutine performs the exchange of the overlap elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x           -  type(psb_d_vect_type)  The local part of the multivector
+!   desc_a      -  type(psb_desc_type).    The communication descriptor.
+!   info        -  integer.                  Return code.
+!   jx          -  integer(optional).        The starting column of the global matrix
+!   ik          -  integer(optional).        The number of columns to gather. 
+!   work        -  real(optional).           A work area.
+!   update      -  integer(optional).        Type of update:
+!                                            psb_none_   do nothing
+!                                            psb_sum_    sum of overlaps
+!                                            psb_avg_    average of overlaps
+!   mode        -  integer(optional).        Choose the algorithm for data exchange: 
+!                                       this is chosen through bit fields. 
+!                                       - swap_mpi  = iand(flag,psb_swap_mpi_)  /= 0
+!                                       - swap_sync = iand(flag,psb_swap_sync_) /= 0
+!                                       - swap_send = iand(flag,psb_swap_send_) /= 0
+!                                       - swap_recv = iand(flag,psb_swap_recv_) /= 0
+!                                       - if (swap_mpi):  use underlying MPI_ALLTOALLV.
+!                                       - if (swap_sync): use PSB_SND and PSB_RCV in 
+!                                                       synchronized pairs
+!                                       - if (swap_send .and. swap_recv): use mpi_irecv 
+!                                                       and mpi_send
+!                                       - if (swap_send): use psb_snd (but need another 
+!                                                       call with swap_recv to complete)
+!                                       - if (swap_recv): use psb_rcv (completing a 
+!                                                       previous call with swap_send)
+!
 subroutine  psb_dovrl_multivect(x,desc_a,info,work,update,mode)
  use psb_base_mod, psb_protect_name => psb_dovrl_multivect
  use psi_mod
--- a/base/comm/psb_dscatter.F90
+++ b/base/comm/psb_dscatter.F90
@ -31,13 +31,13 @@
 !    
 ! File:  psb_dscatter.f90
 !
-! Subroutine: psb_dscatterm
-!   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+! Subroutine: psb_dscatter_vect
+!   This subroutine scatters a global vector locally owned by one process
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
-!   globx     -  real,dimension(:,:).       The global matrix to scatter.
-!   locx      -  real,dimension(:,:).       The local piece of the distributed matrix.
+!   globx     -  real,dimension(:)          The global matrix to scatter.
+!   locx      -  type(psb_d_vect_type)      The local piece of the distributed matrix.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer(optional).            The process that owns the global matrix. 
--- a/base/comm/psb_dscatter_a.F90
+++ b/base/comm/psb_dscatter_a.F90
@ -33,7 +33,7 @@
 !
 ! Subroutine: psb_dscatterm
 !   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
 !   globx     -  real,dimension(:,:).       The global matrix to scatter.
@ -278,7 +278,7 @@ end subroutine psb_dscatterm

 ! Subroutine: psb_dscatterv
 !   This subroutine scatters a global vector locally owned by one process
-!   into pieces that are local to alle the processes.
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
 !   globx     -  real,dimension(:).         The global vector to scatter.
--- a/base/comm/psb_dspgather.F90
+++ b/base/comm/psb_dspgather.F90
@ -30,6 +30,17 @@
 !   
 !    
 ! File:  psb_dspgather.f90
+!
+! Gathers a sparse matrix onto a single process.
+! Two variants:
+! 1. Gathers to PSB_d_SPARSE_MAT   (i.e. to matrix with IPK_ indices)
+! 2. Gathers to PSB_ld_SPARSE_MAT  (i.e. to matrix with LPK_ indices)
+!
+! Note: this function uses MPI_ALLGATHERV. At this time, the size of the
+! resulting matrix must be within the range of 4 bytes because of the
+! restriction on MPI displacements to be 4 bytes. 
+! 
+!
 subroutine  psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc)
 #if defined(HAVE_ISO_FORTRAN_ENV)
  use iso_fortran_env
--- a/base/comm/psb_egather_a.f90
+++ b/base/comm/psb_egather_a.f90
@ -31,7 +31,7 @@
 !    
 ! File:  psb_egather.f90
 !
-! Subroutine: psb_egatherm
+! Subroutine: psb_egather
 !   This subroutine gathers pieces of a distributed dense matrix into a local one.
 !
 ! Arguments:
--- a/base/comm/psb_escatter_a.F90
+++ b/base/comm/psb_escatter_a.F90
@ -33,7 +33,7 @@
 !
 ! Subroutine: psb_escatterm
 !   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
 !   globx     -  integer,dimension(:,:).       The global matrix to scatter.
@ -278,7 +278,7 @@ end subroutine psb_escatterm

 ! Subroutine: psb_escatterv
 !   This subroutine scatters a global vector locally owned by one process
-!   into pieces that are local to alle the processes.
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
 !   globx     -  integer,dimension(:).         The global vector to scatter.
--- a/base/comm/psb_igather.f90
+++ b/base/comm/psb_igather.f90
@ -31,14 +31,14 @@
 !    
 ! File:  psb_igather.f90
 !
-! Subroutine: psb_igatherm
-!   This subroutine gathers pieces of a distributed dense matrix into a local one.
+! Subroutine: psb_igather_vect
+!   This subroutine gathers pieces of a distributed vector into a local one.
 !
 ! Arguments:
-!   globx     -  integer,dimension(:,:).          The local matrix into which gather 
+!   globx     -  integer,dimension(:).           The local matrix into which gather 
 !                                                  the distributed pieces.
-!   locx      -  integer,dimension(:,:).          The local piece of the distributed 
-!                                                  matrix to be gathered.
+!   locx      -  type(psb_i_vect_type@         The local piece of the distributed 
+!                                                  vector to be gathered.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer.                      The process that has to own the 
@ -159,6 +159,8 @@ subroutine  psb_igather_vect(globx, locx, desc_a, info, iroot)

 end subroutine psb_igather_vect

+! Subroutine: psb_igather_multivect
+!   This subroutine gathers pieces of a distributed multivector into a local one.

 subroutine  psb_igather_multivect(globx, locx, desc_a, info, iroot)
  use psb_base_mod, psb_protect_name => psb_igather_multivect
--- a/base/comm/psb_ihalo.f90
+++ b/base/comm/psb_ihalo.f90
@ -31,17 +31,17 @@
 !    
 ! File:  psb_ihalo.f90
 !
-! Subroutine: psb_ihalom
+! Subroutine: psb_ihalo_vect
 !   This subroutine performs the exchange of the halo elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed vector between all the processes.
 !
 ! Arguments:
-!   x         -  integer,dimension(:,:).          The local part of the dense matrix.
+!   x         -  type(psb_i_vect_type)    The local part of the vector
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Return code
 !   jx        -  integer(optional).            The starting column of the global matrix. 
 !   ik        -  integer(optional).            The number of columns to gather. 
-!   work      -  integer(optional).            Work  area.
+!   work      -  integer(optional).             Work  area.
 !   tran      -  character(optional).          Transpose exchange.
 !   mode      -  integer(optional).            Communication mode (see Swapdata)
 !   data     - integer                 Which index list in desc_a should be used
@ -52,7 +52,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-
 subroutine  psb_ihalo_vect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_ihalo_vect
  use psi_mod
@ -185,7 +184,28 @@ subroutine  psb_ihalo_vect(x,desc_a,info,work,tran,mode,data)
    return
 end subroutine psb_ihalo_vect

-
+!
+! Subroutine: psb_ihalo_multivect
+!   This subroutine performs the exchange of the halo elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x         -  type(psb_i_multivect_type)    The local part of the multivector
+!   desc_a    -  type(psb_desc_type).        The communication descriptor.
+!   info      -  integer.                      Return code
+!   jx        -  integer(optional).            The starting column of the global matrix. 
+!   ik        -  integer(optional).            The number of columns to gather. 
+!   work      -  integer(optional).             Work  area.
+!   tran      -  character(optional).          Transpose exchange.
+!   mode      -  integer(optional).            Communication mode (see Swapdata)
+!   data     - integer                 Which index list in desc_a should be used
+!                                      to retrieve rows, default psb_comm_halo_
+!                                       psb_comm_halo_    use halo_index
+!                                       psb_comm_ext_     use ext_index 
+!                                       psb_comm_ovrl_    use ovrl_index
+!                                       psb_comm_mov_     use ovr_mst_idx
+!
+!
 subroutine  psb_ihalo_multivect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_ihalo_multivect
  use psi_mod
--- a/base/comm/psb_iovrl.f90
+++ b/base/comm/psb_iovrl.f90
@ -32,12 +32,12 @@
 !
 ! File:  psb_iovrl.f90
 !
-! Subroutine: psb_iovrlm
+! Subroutine: psb_iovrl_vect
 !   This subroutine performs the exchange of the overlap elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed dense vector between all the processes.
 !
 ! Arguments:
-!   x(:,:)      -  integer                   The local part of the dense matrix.
+!   x           -  type(psb_i_vect_type)  The local part of the vector
 !   desc_a      -  type(psb_desc_type).    The communication descriptor.
 !   info        -  integer.                  Return code.
 !   jx          -  integer(optional).        The starting column of the global matrix
@ -180,7 +180,38 @@ subroutine  psb_iovrl_vect(x,desc_a,info,work,update,mode)
    return
 end subroutine psb_iovrl_vect

-
+!
+! Subroutine: psb_iovrl_multivect
+!   This subroutine performs the exchange of the overlap elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x           -  type(psb_i_vect_type)  The local part of the multivector
+!   desc_a      -  type(psb_desc_type).    The communication descriptor.
+!   info        -  integer.                  Return code.
+!   jx          -  integer(optional).        The starting column of the global matrix
+!   ik          -  integer(optional).        The number of columns to gather. 
+!   work        -  integer(optional).           A work area.
+!   update      -  integer(optional).        Type of update:
+!                                            psb_none_   do nothing
+!                                            psb_sum_    sum of overlaps
+!                                            psb_avg_    average of overlaps
+!   mode        -  integer(optional).        Choose the algorithm for data exchange: 
+!                                       this is chosen through bit fields. 
+!                                       - swap_mpi  = iand(flag,psb_swap_mpi_)  /= 0
+!                                       - swap_sync = iand(flag,psb_swap_sync_) /= 0
+!                                       - swap_send = iand(flag,psb_swap_send_) /= 0
+!                                       - swap_recv = iand(flag,psb_swap_recv_) /= 0
+!                                       - if (swap_mpi):  use underlying MPI_ALLTOALLV.
+!                                       - if (swap_sync): use PSB_SND and PSB_RCV in 
+!                                                       synchronized pairs
+!                                       - if (swap_send .and. swap_recv): use mpi_irecv 
+!                                                       and mpi_send
+!                                       - if (swap_send): use psb_snd (but need another 
+!                                                       call with swap_recv to complete)
+!                                       - if (swap_recv): use psb_rcv (completing a 
+!                                                       previous call with swap_send)
+!
 subroutine  psb_iovrl_multivect(x,desc_a,info,work,update,mode)
  use psb_base_mod, psb_protect_name => psb_iovrl_multivect
  use psi_mod
--- a/base/comm/psb_iscatter.F90
+++ b/base/comm/psb_iscatter.F90
@ -31,13 +31,13 @@
 !    
 ! File:  psb_iscatter.f90
 !
-! Subroutine: psb_iscatterm
-!   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+! Subroutine: psb_iscatter_vect
+!   This subroutine scatters a global vector locally owned by one process
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
-!   globx     -  integer,dimension(:,:).       The global matrix to scatter.
-!   locx      -  integer,dimension(:,:).       The local piece of the distributed matrix.
+!   globx     -  integer,dimension(:)          The global matrix to scatter.
+!   locx      -  type(psb_i_vect_type)      The local piece of the distributed matrix.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer(optional).            The process that owns the global matrix. 
--- a/base/comm/psb_ispgather.F90
+++ b/base/comm/psb_ispgather.F90
@ -30,6 +30,17 @@
 !   
 !    
 ! File:  psb_ispgather.f90
+!
+! Gathers a sparse matrix onto a single process.
+! Two variants:
+! 1. Gathers to PSB_i_SPARSE_MAT   (i.e. to matrix with IPK_ indices)
+! 2. Gathers to PSB_@LX@_SPARSE_MAT  (i.e. to matrix with LPK_ indices)
+!
+! Note: this function uses MPI_ALLGATHERV. At this time, the size of the
+! resulting matrix must be within the range of 4 bytes because of the
+! restriction on MPI displacements to be 4 bytes. 
+! 
+!
 subroutine  psb_isp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc)
 #if defined(HAVE_ISO_FORTRAN_ENV)
  use iso_fortran_env
--- a/base/comm/psb_lgather.f90
+++ b/base/comm/psb_lgather.f90
@ -31,14 +31,14 @@
 !    
 ! File:  psb_lgather.f90
 !
-! Subroutine: psb_lgatherm
-!   This subroutine gathers pieces of a distributed dense matrix into a local one.
+! Subroutine: psb_lgather_vect
+!   This subroutine gathers pieces of a distributed vector into a local one.
 !
 ! Arguments:
-!   globx     -  integer,dimension(:,:).          The local matrix into which gather 
+!   globx     -  integer,dimension(:).           The local matrix into which gather 
 !                                                  the distributed pieces.
-!   locx      -  integer,dimension(:,:).          The local piece of the distributed 
-!                                                  matrix to be gathered.
+!   locx      -  type(psb_l_vect_type@         The local piece of the distributed 
+!                                                  vector to be gathered.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer.                      The process that has to own the 
@ -159,6 +159,8 @@ subroutine  psb_lgather_vect(globx, locx, desc_a, info, iroot)

 end subroutine psb_lgather_vect

+! Subroutine: psb_lgather_multivect
+!   This subroutine gathers pieces of a distributed multivector into a local one.

 subroutine  psb_lgather_multivect(globx, locx, desc_a, info, iroot)
  use psb_base_mod, psb_protect_name => psb_lgather_multivect
--- a/base/comm/psb_lhalo.f90
+++ b/base/comm/psb_lhalo.f90
@ -31,17 +31,17 @@
 !    
 ! File:  psb_lhalo.f90
 !
-! Subroutine: psb_lhalom
+! Subroutine: psb_lhalo_vect
 !   This subroutine performs the exchange of the halo elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed vector between all the processes.
 !
 ! Arguments:
-!   x         -  integer,dimension(:,:).          The local part of the dense matrix.
+!   x         -  type(psb_l_vect_type)    The local part of the vector
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Return code
 !   jx        -  integer(optional).            The starting column of the global matrix. 
 !   ik        -  integer(optional).            The number of columns to gather. 
-!   work      -  integer(optional).            Work  area.
+!   work      -  integer(optional).             Work  area.
 !   tran      -  character(optional).          Transpose exchange.
 !   mode      -  integer(optional).            Communication mode (see Swapdata)
 !   data     - integer                 Which index list in desc_a should be used
@ -52,7 +52,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-
 subroutine  psb_lhalo_vect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_lhalo_vect
  use psi_mod
@ -185,7 +184,28 @@ subroutine  psb_lhalo_vect(x,desc_a,info,work,tran,mode,data)
    return
 end subroutine psb_lhalo_vect

-
+!
+! Subroutine: psb_lhalo_multivect
+!   This subroutine performs the exchange of the halo elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x         -  type(psb_l_multivect_type)    The local part of the multivector
+!   desc_a    -  type(psb_desc_type).        The communication descriptor.
+!   info      -  integer.                      Return code
+!   jx        -  integer(optional).            The starting column of the global matrix. 
+!   ik        -  integer(optional).            The number of columns to gather. 
+!   work      -  integer(optional).             Work  area.
+!   tran      -  character(optional).          Transpose exchange.
+!   mode      -  integer(optional).            Communication mode (see Swapdata)
+!   data     - integer                 Which index list in desc_a should be used
+!                                      to retrieve rows, default psb_comm_halo_
+!                                       psb_comm_halo_    use halo_index
+!                                       psb_comm_ext_     use ext_index 
+!                                       psb_comm_ovrl_    use ovrl_index
+!                                       psb_comm_mov_     use ovr_mst_idx
+!
+!
 subroutine  psb_lhalo_multivect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_lhalo_multivect
  use psi_mod
--- a/base/comm/psb_lovrl.f90
+++ b/base/comm/psb_lovrl.f90
@ -32,12 +32,12 @@
 !
 ! File:  psb_lovrl.f90
 !
-! Subroutine: psb_lovrlm
+! Subroutine: psb_lovrl_vect
 !   This subroutine performs the exchange of the overlap elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed dense vector between all the processes.
 !
 ! Arguments:
-!   x(:,:)      -  integer                   The local part of the dense matrix.
+!   x           -  type(psb_l_vect_type)  The local part of the vector
 !   desc_a      -  type(psb_desc_type).    The communication descriptor.
 !   info        -  integer.                  Return code.
 !   jx          -  integer(optional).        The starting column of the global matrix
@ -180,7 +180,38 @@ subroutine  psb_lovrl_vect(x,desc_a,info,work,update,mode)
    return
 end subroutine psb_lovrl_vect

-
+!
+! Subroutine: psb_lovrl_multivect
+!   This subroutine performs the exchange of the overlap elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x           -  type(psb_l_vect_type)  The local part of the multivector
+!   desc_a      -  type(psb_desc_type).    The communication descriptor.
+!   info        -  integer.                  Return code.
+!   jx          -  integer(optional).        The starting column of the global matrix
+!   ik          -  integer(optional).        The number of columns to gather. 
+!   work        -  integer(optional).           A work area.
+!   update      -  integer(optional).        Type of update:
+!                                            psb_none_   do nothing
+!                                            psb_sum_    sum of overlaps
+!                                            psb_avg_    average of overlaps
+!   mode        -  integer(optional).        Choose the algorithm for data exchange: 
+!                                       this is chosen through bit fields. 
+!                                       - swap_mpi  = iand(flag,psb_swap_mpi_)  /= 0
+!                                       - swap_sync = iand(flag,psb_swap_sync_) /= 0
+!                                       - swap_send = iand(flag,psb_swap_send_) /= 0
+!                                       - swap_recv = iand(flag,psb_swap_recv_) /= 0
+!                                       - if (swap_mpi):  use underlying MPI_ALLTOALLV.
+!                                       - if (swap_sync): use PSB_SND and PSB_RCV in 
+!                                                       synchronized pairs
+!                                       - if (swap_send .and. swap_recv): use mpi_irecv 
+!                                                       and mpi_send
+!                                       - if (swap_send): use psb_snd (but need another 
+!                                                       call with swap_recv to complete)
+!                                       - if (swap_recv): use psb_rcv (completing a 
+!                                                       previous call with swap_send)
+!
 subroutine  psb_lovrl_multivect(x,desc_a,info,work,update,mode)
  use psb_base_mod, psb_protect_name => psb_lovrl_multivect
  use psi_mod
--- a/base/comm/psb_lscatter.F90
+++ b/base/comm/psb_lscatter.F90
@ -31,13 +31,13 @@
 !    
 ! File:  psb_lscatter.f90
 !
-! Subroutine: psb_lscatterm
-!   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+! Subroutine: psb_lscatter_vect
+!   This subroutine scatters a global vector locally owned by one process
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
-!   globx     -  integer,dimension(:,:).       The global matrix to scatter.
-!   locx      -  integer,dimension(:,:).       The local piece of the distributed matrix.
+!   globx     -  integer,dimension(:)          The global matrix to scatter.
+!   locx      -  type(psb_l_vect_type)      The local piece of the distributed matrix.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer(optional).            The process that owns the global matrix. 
--- a/base/comm/psb_lspgather.F90
+++ b/base/comm/psb_lspgather.F90
@ -30,6 +30,17 @@
 !   
 !    
 ! File:  psb_lspgather.f90
+!
+! Gathers a sparse matrix onto a single process.
+! Two variants:
+! 1. Gathers to PSB_l_SPARSE_MAT   (i.e. to matrix with IPK_ indices)
+! 2. Gathers to PSB_@LX@_SPARSE_MAT  (i.e. to matrix with LPK_ indices)
+!
+! Note: this function uses MPI_ALLGATHERV. At this time, the size of the
+! resulting matrix must be within the range of 4 bytes because of the
+! restriction on MPI displacements to be 4 bytes. 
+! 
+!
 subroutine  psb_lsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc)
 #if defined(HAVE_ISO_FORTRAN_ENV)
  use iso_fortran_env
--- a/base/comm/psb_mgather_a.f90
+++ b/base/comm/psb_mgather_a.f90
@ -31,7 +31,7 @@
 !    
 ! File:  psb_mgather.f90
 !
-! Subroutine: psb_mgatherm
+! Subroutine: psb_mgather
 !   This subroutine gathers pieces of a distributed dense matrix into a local one.
 !
 ! Arguments:
--- a/base/comm/psb_mscatter_a.F90
+++ b/base/comm/psb_mscatter_a.F90
@ -33,7 +33,7 @@
 !
 ! Subroutine: psb_mscatterm
 !   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
 !   globx     -  integer,dimension(:,:).       The global matrix to scatter.
@ -278,7 +278,7 @@ end subroutine psb_mscatterm

 ! Subroutine: psb_mscatterv
 !   This subroutine scatters a global vector locally owned by one process
-!   into pieces that are local to alle the processes.
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
 !   globx     -  integer,dimension(:).         The global vector to scatter.
--- a/base/comm/psb_sgather.f90
+++ b/base/comm/psb_sgather.f90
@ -31,14 +31,14 @@
 !    
 ! File:  psb_sgather.f90
 !
-! Subroutine: psb_sgatherm
-!   This subroutine gathers pieces of a distributed dense matrix into a local one.
+! Subroutine: psb_sgather_vect
+!   This subroutine gathers pieces of a distributed vector into a local one.
 !
 ! Arguments:
-!   globx     -  real,dimension(:,:).          The local matrix into which gather 
+!   globx     -  real,dimension(:).           The local matrix into which gather 
 !                                                  the distributed pieces.
-!   locx      -  real,dimension(:,:).          The local piece of the distributed 
-!                                                  matrix to be gathered.
+!   locx      -  type(psb_s_vect_type@         The local piece of the distributed 
+!                                                  vector to be gathered.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer.                      The process that has to own the 
@ -159,6 +159,8 @@ subroutine  psb_sgather_vect(globx, locx, desc_a, info, iroot)

 end subroutine psb_sgather_vect

+! Subroutine: psb_sgather_multivect
+!   This subroutine gathers pieces of a distributed multivector into a local one.

 subroutine  psb_sgather_multivect(globx, locx, desc_a, info, iroot)
  use psb_base_mod, psb_protect_name => psb_sgather_multivect
--- a/base/comm/psb_sgather_a.f90
+++ b/base/comm/psb_sgather_a.f90
@ -31,7 +31,7 @@
 !    
 ! File:  psb_sgather.f90
 !
-! Subroutine: psb_sgatherm
+! Subroutine: psb_sgather
 !   This subroutine gathers pieces of a distributed dense matrix into a local one.
 !
 ! Arguments:
--- a/base/comm/psb_shalo.f90
+++ b/base/comm/psb_shalo.f90
@ -31,17 +31,17 @@
 !    
 ! File:  psb_shalo.f90
 !
-! Subroutine: psb_shalom
+! Subroutine: psb_shalo_vect
 !   This subroutine performs the exchange of the halo elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed vector between all the processes.
 !
 ! Arguments:
-!   x         -  real,dimension(:,:).          The local part of the dense matrix.
+!   x         -  type(psb_s_vect_type)    The local part of the vector
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Return code
 !   jx        -  integer(optional).            The starting column of the global matrix. 
 !   ik        -  integer(optional).            The number of columns to gather. 
-!   work      -  real(optional).            Work  area.
+!   work      -  real(optional).             Work  area.
 !   tran      -  character(optional).          Transpose exchange.
 !   mode      -  integer(optional).            Communication mode (see Swapdata)
 !   data     - integer                 Which index list in desc_a should be used
@ -52,7 +52,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-
 subroutine  psb_shalo_vect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_shalo_vect
  use psi_mod
@ -185,7 +184,28 @@ subroutine  psb_shalo_vect(x,desc_a,info,work,tran,mode,data)
    return
 end subroutine psb_shalo_vect

-
+!
+! Subroutine: psb_shalo_multivect
+!   This subroutine performs the exchange of the halo elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x         -  type(psb_s_multivect_type)    The local part of the multivector
+!   desc_a    -  type(psb_desc_type).        The communication descriptor.
+!   info      -  integer.                      Return code
+!   jx        -  integer(optional).            The starting column of the global matrix. 
+!   ik        -  integer(optional).            The number of columns to gather. 
+!   work      -  real(optional).             Work  area.
+!   tran      -  character(optional).          Transpose exchange.
+!   mode      -  integer(optional).            Communication mode (see Swapdata)
+!   data     - integer                 Which index list in desc_a should be used
+!                                      to retrieve rows, default psb_comm_halo_
+!                                       psb_comm_halo_    use halo_index
+!                                       psb_comm_ext_     use ext_index 
+!                                       psb_comm_ovrl_    use ovrl_index
+!                                       psb_comm_mov_     use ovr_mst_idx
+!
+!
 subroutine  psb_shalo_multivect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_shalo_multivect
  use psi_mod
--- a/base/comm/psb_sovrl.f90
+++ b/base/comm/psb_sovrl.f90
@ -32,12 +32,12 @@
 !
 ! File:  psb_sovrl.f90
 !
-! Subroutine: psb_sovrlm
+! Subroutine: psb_sovrl_vect
 !   This subroutine performs the exchange of the overlap elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed dense vector between all the processes.
 !
 ! Arguments:
-!   x(:,:)      -  real                   The local part of the dense matrix.
+!   x           -  type(psb_s_vect_type)  The local part of the vector
 !   desc_a      -  type(psb_desc_type).    The communication descriptor.
 !   info        -  integer.                  Return code.
 !   jx          -  integer(optional).        The starting column of the global matrix
@ -180,7 +180,38 @@ subroutine  psb_sovrl_vect(x,desc_a,info,work,update,mode)
    return
 end subroutine psb_sovrl_vect

-
+!
+! Subroutine: psb_sovrl_multivect
+!   This subroutine performs the exchange of the overlap elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x           -  type(psb_s_vect_type)  The local part of the multivector
+!   desc_a      -  type(psb_desc_type).    The communication descriptor.
+!   info        -  integer.                  Return code.
+!   jx          -  integer(optional).        The starting column of the global matrix
+!   ik          -  integer(optional).        The number of columns to gather. 
+!   work        -  real(optional).           A work area.
+!   update      -  integer(optional).        Type of update:
+!                                            psb_none_   do nothing
+!                                            psb_sum_    sum of overlaps
+!                                            psb_avg_    average of overlaps
+!   mode        -  integer(optional).        Choose the algorithm for data exchange: 
+!                                       this is chosen through bit fields. 
+!                                       - swap_mpi  = iand(flag,psb_swap_mpi_)  /= 0
+!                                       - swap_sync = iand(flag,psb_swap_sync_) /= 0
+!                                       - swap_send = iand(flag,psb_swap_send_) /= 0
+!                                       - swap_recv = iand(flag,psb_swap_recv_) /= 0
+!                                       - if (swap_mpi):  use underlying MPI_ALLTOALLV.
+!                                       - if (swap_sync): use PSB_SND and PSB_RCV in 
+!                                                       synchronized pairs
+!                                       - if (swap_send .and. swap_recv): use mpi_irecv 
+!                                                       and mpi_send
+!                                       - if (swap_send): use psb_snd (but need another 
+!                                                       call with swap_recv to complete)
+!                                       - if (swap_recv): use psb_rcv (completing a 
+!                                                       previous call with swap_send)
+!
 subroutine  psb_sovrl_multivect(x,desc_a,info,work,update,mode)
  use psb_base_mod, psb_protect_name => psb_sovrl_multivect
  use psi_mod
--- a/base/comm/psb_sscatter.F90
+++ b/base/comm/psb_sscatter.F90
@ -31,13 +31,13 @@
 !    
 ! File:  psb_sscatter.f90
 !
-! Subroutine: psb_sscatterm
-!   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+! Subroutine: psb_sscatter_vect
+!   This subroutine scatters a global vector locally owned by one process
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
-!   globx     -  real,dimension(:,:).       The global matrix to scatter.
-!   locx      -  real,dimension(:,:).       The local piece of the distributed matrix.
+!   globx     -  real,dimension(:)          The global matrix to scatter.
+!   locx      -  type(psb_s_vect_type)      The local piece of the distributed matrix.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer(optional).            The process that owns the global matrix. 
--- a/base/comm/psb_sscatter_a.F90
+++ b/base/comm/psb_sscatter_a.F90
@ -33,7 +33,7 @@
 !
 ! Subroutine: psb_sscatterm
 !   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
 !   globx     -  real,dimension(:,:).       The global matrix to scatter.
@ -278,7 +278,7 @@ end subroutine psb_sscatterm

 ! Subroutine: psb_sscatterv
 !   This subroutine scatters a global vector locally owned by one process
-!   into pieces that are local to alle the processes.
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
 !   globx     -  real,dimension(:).         The global vector to scatter.
--- a/base/comm/psb_sspgather.F90
+++ b/base/comm/psb_sspgather.F90
@ -30,6 +30,17 @@
 !   
 !    
 ! File:  psb_sspgather.f90
+!
+! Gathers a sparse matrix onto a single process.
+! Two variants:
+! 1. Gathers to PSB_s_SPARSE_MAT   (i.e. to matrix with IPK_ indices)
+! 2. Gathers to PSB_ls_SPARSE_MAT  (i.e. to matrix with LPK_ indices)
+!
+! Note: this function uses MPI_ALLGATHERV. At this time, the size of the
+! resulting matrix must be within the range of 4 bytes because of the
+! restriction on MPI displacements to be 4 bytes. 
+! 
+!
 subroutine  psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keeploc)
 #if defined(HAVE_ISO_FORTRAN_ENV)
  use iso_fortran_env
--- a/base/comm/psb_zgather.f90
+++ b/base/comm/psb_zgather.f90
@ -31,14 +31,14 @@
 !    
 ! File:  psb_zgather.f90
 !
-! Subroutine: psb_zgatherm
-!   This subroutine gathers pieces of a distributed dense matrix into a local one.
+! Subroutine: psb_zgather_vect
+!   This subroutine gathers pieces of a distributed vector into a local one.
 !
 ! Arguments:
-!   globx     -  complex,dimension(:,:).          The local matrix into which gather 
+!   globx     -  complex,dimension(:).           The local matrix into which gather 
 !                                                  the distributed pieces.
-!   locx      -  complex,dimension(:,:).          The local piece of the distributed 
-!                                                  matrix to be gathered.
+!   locx      -  type(psb_z_vect_type@         The local piece of the distributed 
+!                                                  vector to be gathered.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer.                      The process that has to own the 
@ -159,6 +159,8 @@ subroutine  psb_zgather_vect(globx, locx, desc_a, info, iroot)

 end subroutine psb_zgather_vect

+! Subroutine: psb_zgather_multivect
+!   This subroutine gathers pieces of a distributed multivector into a local one.

 subroutine  psb_zgather_multivect(globx, locx, desc_a, info, iroot)
  use psb_base_mod, psb_protect_name => psb_zgather_multivect
--- a/base/comm/psb_zgather_a.f90
+++ b/base/comm/psb_zgather_a.f90
@ -31,7 +31,7 @@
 !    
 ! File:  psb_zgather.f90
 !
-! Subroutine: psb_zgatherm
+! Subroutine: psb_zgather
 !   This subroutine gathers pieces of a distributed dense matrix into a local one.
 !
 ! Arguments:
--- a/base/comm/psb_zhalo.f90
+++ b/base/comm/psb_zhalo.f90
@ -31,17 +31,17 @@
 !    
 ! File:  psb_zhalo.f90
 !
-! Subroutine: psb_zhalom
+! Subroutine: psb_zhalo_vect
 !   This subroutine performs the exchange of the halo elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed vector between all the processes.
 !
 ! Arguments:
-!   x         -  complex,dimension(:,:).          The local part of the dense matrix.
+!   x         -  type(psb_z_vect_type)    The local part of the vector
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Return code
 !   jx        -  integer(optional).            The starting column of the global matrix. 
 !   ik        -  integer(optional).            The number of columns to gather. 
-!   work      -  complex(optional).            Work  area.
+!   work      -  complex(optional).             Work  area.
 !   tran      -  character(optional).          Transpose exchange.
 !   mode      -  integer(optional).            Communication mode (see Swapdata)
 !   data     - integer                 Which index list in desc_a should be used
@ -52,7 +52,6 @@
 !                                       psb_comm_mov_     use ovr_mst_idx
 !
 !
-
 subroutine  psb_zhalo_vect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_zhalo_vect
  use psi_mod
@ -185,7 +184,28 @@ subroutine  psb_zhalo_vect(x,desc_a,info,work,tran,mode,data)
    return
 end subroutine psb_zhalo_vect

-
+!
+! Subroutine: psb_zhalo_multivect
+!   This subroutine performs the exchange of the halo elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x         -  type(psb_z_multivect_type)    The local part of the multivector
+!   desc_a    -  type(psb_desc_type).        The communication descriptor.
+!   info      -  integer.                      Return code
+!   jx        -  integer(optional).            The starting column of the global matrix. 
+!   ik        -  integer(optional).            The number of columns to gather. 
+!   work      -  complex(optional).             Work  area.
+!   tran      -  character(optional).          Transpose exchange.
+!   mode      -  integer(optional).            Communication mode (see Swapdata)
+!   data     - integer                 Which index list in desc_a should be used
+!                                      to retrieve rows, default psb_comm_halo_
+!                                       psb_comm_halo_    use halo_index
+!                                       psb_comm_ext_     use ext_index 
+!                                       psb_comm_ovrl_    use ovrl_index
+!                                       psb_comm_mov_     use ovr_mst_idx
+!
+!
 subroutine  psb_zhalo_multivect(x,desc_a,info,work,tran,mode,data)
  use psb_base_mod, psb_protect_name => psb_zhalo_multivect
  use psi_mod
--- a/base/comm/psb_zovrl.f90
+++ b/base/comm/psb_zovrl.f90
@ -32,12 +32,12 @@
 !
 ! File:  psb_zovrl.f90
 !
-! Subroutine: psb_zovrlm
+! Subroutine: psb_zovrl_vect
 !   This subroutine performs the exchange of the overlap elements in a 
-!    distributed dense matrix between all the processes.
+!    distributed dense vector between all the processes.
 !
 ! Arguments:
-!   x(:,:)      -  complex                   The local part of the dense matrix.
+!   x           -  type(psb_z_vect_type)  The local part of the vector
 !   desc_a      -  type(psb_desc_type).    The communication descriptor.
 !   info        -  integer.                  Return code.
 !   jx          -  integer(optional).        The starting column of the global matrix
@ -180,7 +180,38 @@ subroutine  psb_zovrl_vect(x,desc_a,info,work,update,mode)
    return
 end subroutine psb_zovrl_vect

-
+!
+! Subroutine: psb_zovrl_multivect
+!   This subroutine performs the exchange of the overlap elements in a 
+!    distributed multivector between all the processes.
+!
+! Arguments:
+!   x           -  type(psb_z_vect_type)  The local part of the multivector
+!   desc_a      -  type(psb_desc_type).    The communication descriptor.
+!   info        -  integer.                  Return code.
+!   jx          -  integer(optional).        The starting column of the global matrix
+!   ik          -  integer(optional).        The number of columns to gather. 
+!   work        -  complex(optional).           A work area.
+!   update      -  integer(optional).        Type of update:
+!                                            psb_none_   do nothing
+!                                            psb_sum_    sum of overlaps
+!                                            psb_avg_    average of overlaps
+!   mode        -  integer(optional).        Choose the algorithm for data exchange: 
+!                                       this is chosen through bit fields. 
+!                                       - swap_mpi  = iand(flag,psb_swap_mpi_)  /= 0
+!                                       - swap_sync = iand(flag,psb_swap_sync_) /= 0
+!                                       - swap_send = iand(flag,psb_swap_send_) /= 0
+!                                       - swap_recv = iand(flag,psb_swap_recv_) /= 0
+!                                       - if (swap_mpi):  use underlying MPI_ALLTOALLV.
+!                                       - if (swap_sync): use PSB_SND and PSB_RCV in 
+!                                                       synchronized pairs
+!                                       - if (swap_send .and. swap_recv): use mpi_irecv 
+!                                                       and mpi_send
+!                                       - if (swap_send): use psb_snd (but need another 
+!                                                       call with swap_recv to complete)
+!                                       - if (swap_recv): use psb_rcv (completing a 
+!                                                       previous call with swap_send)
+!
 subroutine  psb_zovrl_multivect(x,desc_a,info,work,update,mode)
  use psb_base_mod, psb_protect_name => psb_zovrl_multivect
  use psi_mod
--- a/base/comm/psb_zscatter.F90
+++ b/base/comm/psb_zscatter.F90
@ -31,13 +31,13 @@
 !    
 ! File:  psb_zscatter.f90
 !
-! Subroutine: psb_zscatterm
-!   This subroutine scatters a global matrix locally owned by one process
-!   into pieces that are local to alle the processes.
+! Subroutine: psb_zscatter_vect
+!   This subroutine scatters a global vector locally owned by one process
+!   into pieces that are local to all the processes.
 !
 ! Arguments:
-!   globx     -  complex,dimension(:,:).       The global matrix to scatter.
-!   locx      -  complex,dimension(:,:).       The local piece of the distributed matrix.
+!   globx     -  complex,dimension(:)          The global matrix to scatter.
+!   locx      -  type(psb_z_vect_type)      The local piece of the distributed matrix.
 !   desc_a    -  type(psb_desc_type).        The communication descriptor.
 !   info      -  integer.                      Error code.
 !   iroot     -  integer(optional).            The process that owns the global matrix. 
--- a/Show More
+++ b/Show More