From 891944834ffdab61ab83d4901b3387173ed32453 Mon Sep 17 00:00:00 2001
From: Salvatore Filippone <salvatore.filippone@cranfield.ac.uk>
Date: Thu, 31 Jan 2008 12:23:47 +0000
Subject: [PATCH] mld2p4:  mld_das_bld.f90  mld_dbaseprec_aply.f90 
 mld_dbaseprec_bld.f90  mld_dfact_bld.f90  mld_dilu_bld.f90 
 mld_dmlprec_aply.f90  mld_dsub_aply.f90  mld_dsub_solve.f90 
 mld_prec_type.f90  mld_zas_bld.f90  mld_zbaseprec_aply.f90 
 mld_zbaseprec_bld.f90  mld_zfact_bld.f90  mld_zilu_bld.f90 
 mld_zmlprec_aply.f90  mld_zsub_aply.f90  mld_zsub_solve.f90

Merged in further header updates from Pasqua/Daniela.
---
 mlprec/mld_das_bld.f90        |   8 +-
 mlprec/mld_dbaseprec_aply.f90 |   2 +-
 mlprec/mld_dbaseprec_bld.f90  |   2 +-
 mlprec/mld_dfact_bld.f90      |  21 ++-
 mlprec/mld_dilu_bld.f90       |   7 +-
 mlprec/mld_dmlprec_aply.f90   | 287 ++++++++++++++++++++++++--------
 mlprec/mld_dsub_aply.f90      |   7 +-
 mlprec/mld_dsub_solve.f90     |  35 ++--
 mlprec/mld_prec_type.f90      |   4 +-
 mlprec/mld_zas_bld.f90        |   8 +-
 mlprec/mld_zbaseprec_aply.f90 |   2 +-
 mlprec/mld_zbaseprec_bld.f90  |   2 +-
 mlprec/mld_zfact_bld.f90      |  21 ++-
 mlprec/mld_zilu_bld.f90       |   7 +-
 mlprec/mld_zmlprec_aply.f90   | 300 ++++++++++++++++++++++++++--------
 mlprec/mld_zsub_aply.f90      |   7 +-
 mlprec/mld_zsub_solve.f90     |  35 ++--
 17 files changed, 545 insertions(+), 210 deletions(-)

diff --git a/mlprec/mld_das_bld.f90 b/mlprec/mld_das_bld.f90
index 63fc8db2..5bc7d082 100644
--- a/mlprec/mld_das_bld.f90
+++ b/mlprec/mld_das_bld.f90
@@ -39,10 +39,10 @@
 ! Subroutine: mld_das_bld
 ! Version:    real
 !
-!  This routine builds the Additive Schwarz (AS) preconditioner.
-!  If the preconditioner is the block-Jacobi one, the routine makes only a copy of
-!  the descriptor of the original matrix and then proceeds to call mld_fact_bld
-!  for LU or incomplete LU factorization of the diagonal blocks of the
+!  This routine builds Additive Schwarz (AS) preconditioners. If the AS
+!  preconditioner is actually the block-Jacobi one, the routine makes only a
+!  copy of the descriptor of the original matrix and then calls mld_fact_bld
+!  to perform an LU or ILU factorization of the diagonal blocks of the
 !  distributed matrix.
 !    
 !
diff --git a/mlprec/mld_dbaseprec_aply.f90 b/mlprec/mld_dbaseprec_aply.f90
index a03ef504..5240ede0 100644
--- a/mlprec/mld_dbaseprec_aply.f90
+++ b/mlprec/mld_dbaseprec_aply.f90
@@ -50,7 +50,7 @@
 !
 !  The routine is used by mld_dmlprec_aply, to apply the multilevel preconditioners,
 !  or directly by mld_dprec_aply, to apply the basic one-level preconditioners (diagonal,
-!  block-Jacobi or additive Schwarz), or to have no preconditioning.
+!  block-Jacobi or additive Schwarz). It also manages the case of no preconditioning.
 !
 !
 ! Arguments:
diff --git a/mlprec/mld_dbaseprec_bld.f90 b/mlprec/mld_dbaseprec_bld.f90
index 6ad6a6c5..c604a481 100644
--- a/mlprec/mld_dbaseprec_bld.f90
+++ b/mlprec/mld_dbaseprec_bld.f90
@@ -45,7 +45,7 @@
 !
 !  Details on the base preconditioner to be built are stored in the iprcparm
 !  field of the preconditioner data structure (for a description of this
-!  structure see mld_prec_type.f90).
+!  data structure see mld_prec_type.f90).
 !    
 !
 ! Arguments:
diff --git a/mlprec/mld_dfact_bld.f90 b/mlprec/mld_dfact_bld.f90
index 6063a794..d030b77e 100644
--- a/mlprec/mld_dfact_bld.f90
+++ b/mlprec/mld_dfact_bld.f90
@@ -39,24 +39,27 @@
 ! Subroutine: mld_dfact_bld
 ! Version:    real
 !
-!  This routine computes an LU or incomplete LU factorization of the diagonal blocks
-!  of a distributed matrix, according to the value of p%iprcparm(iprcparm(sub_solve_), 
-!  set by the user through mld_dprecinit or mld_dprecset.
-!  It may also split the matrix into its block-diagonal and
-!  off block-diagonal parts, for the future application of multiple
-!  block-Jacobi sweeps.
+!  This routine computes an LU or incomplete LU (ILU) factorization of the diagonal
+!  blocks of a distributed matrix, according to the value of
+!  p%iprcparm(iprcparm(sub_solve_), set by the user through
+!  mld_dprecinit or mld_dprecset.
+!  It may also compute an LU factorization of a distributed matrix, or split
+!  a distributed matrix into its block-diagonal and off block-diagonal parts, 
+!  for the future application of multiple block-Jacobi sweeps.
 !
 !  This routine is used by mld_as_bld, to build a 'base' block-Jacobi or
 !  Additive Schwarz (AS) preconditioner at any level of a multilevel preconditioner,
 !  or a block-Jacobi or LU or ILU solver at the coarsest level of a multilevel
-!  preconditioner. For the Additive Schwarz, it is called from mld_as_bld,
-!  which prepares the overlap descriptor and retrieves the remote rows into blck.
+!  preconditioner. For the AS preconditioners, the diagonal blocks to be factorized
+!  are stored into the sparse matrix data structures a and blck, and blck contains
+!  the remote rows needed to build the extended local matrix as required by the
+!  AS preconditioner.
 !
 !  More precisely, the routine performs one of the following tasks: 
 !
 !  1. LU or ILU factorization of the diagonal blocks of the distributed matrix
 !     for the construction of a block-Jacobi or AS preconditioners
-!     (allowed at any level);
+!     (allowed at any level of a multilevel preconditioner);
 !
 !  2. setup of block-Jacobi sweeps to compute an approximate solution of a
 !     linear system
diff --git a/mlprec/mld_dilu_bld.f90 b/mlprec/mld_dilu_bld.f90
index f1c0aebc..2c24f041 100644
--- a/mlprec/mld_dilu_bld.f90
+++ b/mlprec/mld_dilu_bld.f90
@@ -39,10 +39,11 @@
 ! Subroutine: mld_dilu_bld
 ! Version:    real
 !
-!  This routine computes an incomplete LU (ILU) factorization of the diagonal blocks
-!  of a distributed matrix. This factorization is used to build 
-!  the 'base preconditioner' (block-Jacobi preconditioner/solver, Additive Schwarz
+!  This routine computes an incomplete LU (ILU) factorization of the diagonal
+!  blocks of a distributed matrix. This factorization is used to build the
+!  'base preconditioner' (block-Jacobi preconditioner/solver, Additive Schwarz
 !  preconditioner) corresponding to a certain level of a multilevel preconditioner.
+!
 !  The following factorizations are available:
 !  - ILU(k), i.e. ILU factorization with fill-in level k,
 !  - MILU(k), i.e. modified ILU factorization with fill-in level k,
diff --git a/mlprec/mld_dmlprec_aply.f90 b/mlprec/mld_dmlprec_aply.f90
index 466b7d5c..61d8c46f 100644
--- a/mlprec/mld_dmlprec_aply.f90
+++ b/mlprec/mld_dmlprec_aply.f90
@@ -92,9 +92,9 @@
 !                                             outside the diagonal block, for block-Jacobi
 !                                             sweeps.
 !         baseprecv(ilev)%av(mld_ac_)      -  The local part of the matrix A(ilev).
-!         baseprecv(ilev)%av(mld_sm_pr_)   -  The smoother prolongator.   
+!         baseprecv(ilev)%av(mld_sm_pr_)   -  The smoothed prolongator.   
 !                                             It maps vectors (ilev) ---> (ilev-1).
-!         baseprecv(ilev)%av(mld_sm_pr_t_) -  The smoother prolongator transpose.   
+!         baseprecv(ilev)%av(mld_sm_pr_t_) -  The smoothed prolongator transpose.   
 !                                             It maps vectors (ilev-1) ---> (ilev).
 !      baseprecv(ilev)%d         -  real(kind(1.d0)), dimension(:), allocatable.
 !                                   The diagonal entries of the U factor in the ILU
@@ -209,13 +209,14 @@ subroutine mld_dmlprec_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
     call psb_errpush(4001,name,a_err='mld_no_ml_ in mlprc_aply?')
     goto 9999      
 
-
   case(mld_add_ml_)
+    !
+    ! Additive multilevel
+    !
 
     call add_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans_,work,info)
 
   case(mld_mult_ml_)
-
     ! 
     !  Multiplicative multilevel (multiplicative among the levels, additive inside
     !  each level)
@@ -227,7 +228,7 @@ subroutine mld_dmlprec_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
     select case(baseprecv(2)%iprcparm(mld_smooth_pos_))
 
     case(mld_post_smooth_)
-      
+
       select case (trans_) 
       case('N')
         call mlt_post_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans_,work,info)
@@ -239,7 +240,6 @@ subroutine mld_dmlprec_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
         goto 9999      
       end select
 
-      
     case(mld_pre_smooth_)
 
       select case (trans_) 
@@ -288,17 +288,21 @@ contains
   !
   ! Subroutine: add_ml_aply
   ! Version:    real
-  ! Note: internal subroutine of mld_dmlprec_aply.
-  ! This routine computes
+  ! Note:       internal subroutine of mld_dmlprec_aply.
+  !
+  !  This routine computes
   !  
   !                        Y = beta*Y + alpha*op(M^(-1))*X,
   !  where 
-  !  - M is a multilevel domain decomposition (Schwarz) preconditioner associated
-  !    to a certain matrix A and stored in the array baseprecv,
+  !  - M is an additive multilevel domain decomposition (Schwarz) preconditioner
+  !    associated to a certain matrix A and stored in the array baseprecv,
   !  - op(M^(-1)) is M^(-1) or its transpose, according to the value of trans,
   !  - X and Y are vectors,
   !  - alpha and beta are scalars.
   !
+  !  The preconditioner M is additive both through the levels and inside each
+  !  level.
+  !
   !  For each level we have as many submatrices as processes (except for the coarsest
   !  level where we might have a replicated index space) and each process takes care
   !  of one submatrix. 
@@ -312,19 +316,51 @@ contains
   !
   !  The levels are numbered in increasing order starting from the finest one, i.e.
   !  level 1 is the finest level and A(1) is the matrix A. 
-  !  This routine applies the multilevel preconditioner in an additive
-  !  way (additive through the levels and additive on the same level).
-  !  For details on the additive multilevel Schwarz preconditioner see
-  !  the Algorithm 3.1.1 in the book:
-  !    -  B.F. Smith, P.E. Bjorstad & W.D. Gropp,
-  !       Domain decomposition: parallel multilevel methods for elliptic partial
-  !       differential equations, Cambridge University Press, 1996.
   !
-  !  For a detailed description of the arguments, see mld_dmlprec_aply.
+  !  For details on the additive multilevel Schwarz preconditioner see the
+  !  Algorithm 3.1.1 in the book:
+  !    B.F. Smith, P.E. Bjorstad & W.D. Gropp,
+  !    Domain decomposition: parallel multilevel methods for elliptic partial
+  !    differential equations, Cambridge University Press, 1996.
+  !
+  !  For a description of the arguments see mld_dmlprec_aply.
+  !
+  !  A sketch of the algorithm implemented in this routine is provided below
+  !  (AV(ilev; sm_pr_) denotes the smoothed prolongator from level ilev to
+  !  level ilev-1, while AV(ilev; sm_pr_t_) denotes its transpose, i.e. the
+  !  corresponding restriction operator from level ilev-1 to level ilev).
+  !
+  !   1. ! Apply the base preconditioner at level 1.
+  !      ! The sum over the subdomains is carried out in the
+  !      ! application of K(1).
+  !        X(1) = Xest
+  !        Y(1) = (K(1)^(-1))*X(1)
+  !
+  !    2.  DO ilev=2,nlev
+  !
+  !         ! Transfer X(ilev-1) to the next coarser level.
+  !           X(ilev) = AV(ilev; sm_pr_t_)*X(ilev-1)
+  !
+  !         ! Apply the base preconditioner at the current level.
+  !         ! The sum over the subdomains is carried out in the
+  !         ! application of K(ilev).
+  !           Y(ilev) = (K(ilev)^(-1))*X(ilev)
+  !
+  !        ENDDO
+  !
+  !    3.  DO ilev=nlev-1,1,-1
+  !
+  !         ! Transfer Y(ilev+1) to the next finer level.
+  !           Y(ilev) = AV(ilev+1; sm_pr_)*Y(ilev+1)
+  !
+  !        ENDDO
+  !     
+  !    4.  Yext = beta*Yext + alpha*Y(1)
   !
   subroutine add_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
-    !
+
     implicit none 
+
     ! Arguments
     type(psb_desc_type),intent(in)      :: desc_data
     type(mld_dbaseprc_type), intent(in) :: baseprecv(:)
@@ -366,6 +402,7 @@ contains
       call psb_errpush(4010,name,a_err='Allocate')
       goto 9999      
     end if
+
     !
     ! STEP 1
     !
@@ -382,7 +419,6 @@ contains
     mlprec_wrk(1)%x2l(:) = x(:) 
     mlprec_wrk(1)%y2l(:) = dzero 
 
-
     call mld_baseprec_aply(alpha,baseprecv(1),x,beta,y,&
          & baseprecv(1)%base_desc,trans,work,info)
     if (info /=0) then 
@@ -392,8 +428,7 @@ contains
     !
     ! STEP 2
     !
-    !
-    !      For each level except the finest one ...
+    ! For each level except the finest one ...
     !
     do ilev = 2, nlev
       n_row = psb_cd_get_local_rows(baseprecv(ilev-1)%base_desc)
@@ -464,8 +499,7 @@ contains
     !
     ! STEP 3
     !
-    !
-    !      For each level except the finest one ...
+    ! For each level except the finest one ...
     !
     do ilev =nlev,2,-1
 
@@ -530,17 +564,21 @@ contains
   !
   ! Subroutine: mlt_pre_ml_aply
   ! Version:    real
-  ! Note: internal subroutine of mld_dmlprec_aply.
-  ! This routine computes
+  ! Note:       internal subroutine of mld_dmlprec_aply.
+  !
+  !  This routine computes
   !  
   !                        Y = beta*Y + alpha*op(M^(-1))*X,
   !  where 
-  !  - M is a multilevel domain decomposition (Schwarz) preconditioner associated
-  !    to a certain matrix A and stored in the array baseprecv,
+  !  - M is a hybrid multilevel domain decomposition (Schwarz) preconditioner
+  !    associated to a certain matrix A and stored in the array baseprecv,
   !  - op(M^(-1)) is M^(-1) or its transpose, according to the value of trans,
   !  - X and Y are vectors,
   !  - alpha and beta are scalars.
   !
+  !  The preconditioner M is hybrid in the sense that it is multiplicative through the
+  !  levels and additive inside a level; pre-smoothing only is applied at each level.
+  !
   !  For each level we have as many submatrices as processes (except for the coarsest
   !  level where we might have a replicated index space) and each process takes care
   !  of one submatrix. 
@@ -555,19 +593,58 @@ contains
   !  The levels are numbered in increasing order starting from the finest one, i.e.
   !  level 1 is the finest level and A(1) is the matrix A. 
   !
-  !  This routine applies the multilevel preconditioner in a hybrid way
-  !  (multiplicative through the levels and additive on the same level)
-  !  and pre-smoothing.
-  !  For details on pre-smoothed hybrid multiplicative multilevel Schwarz preconditioner,
-  !  see the Algorithm 3.2.1 in the book:
-  !    -  B.F. Smith, P.E. Bjorstad & W.D. Gropp,
-  !       Domain decomposition: parallel multilevel methods for elliptic partial
-  !       differential equations, Cambridge University Press, 1996.
+  !  For details on the pre-smoothed hybrid multiplicative multilevel Schwarz
+  !  preconditioner, see the Algorithm 3.2.1 in the book:
+  !    B.F. Smith, P.E. Bjorstad & W.D. Gropp,
+  !    Domain decomposition: parallel multilevel methods for elliptic partial
+  !    differential equations, Cambridge University Press, 1996.
+  !
+  !  For a description of the arguments see mld_dmlprec_aply.
+  ! 
+  !  A sketch of the algorithm implemented in this routine is provided below
+  !  (AV(ilev; sm_pr_) denotes the smoothed prolongator from level ilev to
+  !  level ilev-1, while AV(ilev; sm_pr_t_) denotes its transpose, i.e. the
+  !  corresponding restriction operator from level ilev-1 to level ilev).
+  !
+  !    1.   X(1) = Xext
+  !
+  !    2. ! Apply the base preconditioner at the finest level.
+  !         Y(1) = (K(1)^(-1))*X(1)
+  !
+  !    3. ! Compute the residual at the finest level.
+  !         TX(1) = X(1) - A(1)*Y(1)
+  !
+  !    4.   DO ilev=2, nlev
+  !
+  !          ! Transfer the residual to the current (coarser) level.
+  !            X(ilev) = AV(ilev; sm_pr_t_)*TX(ilev-1)
+  !
+  !          ! Apply the base preconditioner at the current level.
+  !          ! The sum over the subdomains is carried out in the
+  !          ! application of K(ilev).     
+  !            Y(ilev) = (K(ilev)^(-1))*X(ilev)
+  !
+  !          ! Compute the residual at the current level (except at
+  !          ! the coarsest level).
+  !            IF (ilev < nlev)
+  !               TX(ilev) = (X(ilev)-A(ilev)*Y(ilev))
+  !
+  !         ENDDO
+  !
+  !    5.   DO ilev=nlev-1,1,-1
+  !
+  !          ! Transfer Y(ilev+1) to the next finer level
+  !            Y(ilev) = Y(ilev) + AV(ilev+1; sm_pr_)*Y(ilev+1)
+  !
+  !         ENDDO
+  !     
+  !    6.  Yext = beta*Yext + alpha*Y(1)
+  ! 
   !
-  !  For a detailed description of the arguments, see mld_dmlprec_aply.
   subroutine mlt_pre_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
-    !
+
     implicit none 
+
     ! Arguments
     type(psb_desc_type),intent(in)      :: desc_data
     type(mld_dbaseprc_type), intent(in) :: baseprecv(:)
@@ -610,7 +687,6 @@ contains
       goto 9999      
     end if
 
-
     !
     ! STEP 1
     !
@@ -806,17 +882,21 @@ contains
   !
   ! Subroutine: mlt_post_ml_aply
   ! Version:    real
-  ! Note: internal subroutine of mld_dmlprec_aply.
-  ! This routine computes
+  ! Note:       internal subroutine of mld_dmlprec_aply.
+  !
+  !  This routine computes
   !  
   !                        Y = beta*Y + alpha*op(M^(-1))*X,
   !  where 
-  !  - M is a multilevel domain decomposition (Schwarz) preconditioner associated
-  !    to a certain matrix A and stored in the array baseprecv,
+  !  - M is a hybrid multilevel domain decomposition (Schwarz) preconditioner
+  !    associated to a certain matrix A and stored in the array baseprecv,
   !  - op(M^(-1)) is M^(-1) or its transpose, according to the value of trans,
   !  - X and Y are vectors,
   !  - alpha and beta are scalars.
   !
+  !  The preconditioner M is hybrid in the sense that it is multiplicative through the
+  !  levels and additive inside a level; post-smoothing only is applied at each level.
+  !
   !  For each level we have as many submatrices as processes (except for the coarsest
   !  level where we might have a replicated index space) and each process takes care
   !  of one submatrix. 
@@ -831,18 +911,49 @@ contains
   !  The levels are numbered in increasing order starting from the finest one, i.e.
   !  level 1 is the finest level and A(1) is the matrix A. 
   !
-  !  This routine applies the multilevel preconditioner in a hybrid way
-  !  (multiplicative through the levels and additive on the same level)
-  !  and post-smoothing.
   !  For details on hybrid multiplicative multilevel Schwarz preconditioners, see
-  !    -  B.F. Smith, P.E. Bjorstad & W.D. Gropp,
-  !       Domain decomposition: parallel multilevel methods for elliptic partial
-  !       differential equations, Cambridge University Press, 1996.
+  !    B.F. Smith, P.E. Bjorstad & W.D. Gropp,
+  !    Domain decomposition: parallel multilevel methods for elliptic partial
+  !    differential equations, Cambridge University Press, 1996.
+  !
+  !  For a description of the arguments see mld_dmlprec_aply.
+  !
+  !  A sketch of the algorithm implemented in this routine is provided below.
+  !  (AV(ilev; sm_pr_) denotes the smoothed prolongator from level ilev to
+  !  level ilev-1, while AV(ilev; sm_pr_t_) denotes its transpose, i.e. the
+  !  corresponding restriction operator from level ilev-1 to level ilev).
+  !
+  !    1.  X(1) = Xext
+  !
+  !    2.  DO ilev=2, nlev
+  !
+  !         ! Transfer X(ilev-1) to the next coarser level.
+  !           X(ilev) = AV(ilev; sm_pr_t_)*X(ilev-1) 
+  !
+  !        ENDDO
+  !   
+  !    3.! Apply the preconditioner at the coarsest level.
+  !        Y(nlev) = (K(nlev)^(-1))*X(nlev)
+  !
+  !    4.  DO ilev=nlev-1,1,-1
+  !
+  !         ! Transfer Y(ilev+1) to the next finer level.
+  !           Y(ilev) = AV(ilev+1; sm_pr_)*Y(ilev+1)
+  !
+  !         ! Compute the residual at the current level and apply to it the
+  !         ! base preconditioner. The sum over the subdomains is carried out
+  !         ! in the application of K(ilev).
+  !           Y(ilev) = Y(ilev) + (K(ilev)^(-1))*(X(ilev)-A(ilev)*Y(ilev))
+  !
+  !        ENDDO
+  !
+  !    5.  Yext = beta*Yext + alpha*Y(1)
+  ! 
   !
-  !  For a detailed description of the arguments, see mld_dmlprec_aply.
   subroutine mlt_post_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
-    !
+
     implicit none 
+
     ! Arguments
     type(psb_desc_type),intent(in)      :: desc_data
     type(mld_dbaseprc_type), intent(in) :: baseprecv(:)
@@ -884,6 +995,7 @@ contains
       call psb_errpush(4010,name,a_err='Allocate')
       goto 9999      
     end if
+
     !
     ! STEP 1
     !
@@ -1102,17 +1214,23 @@ contains
   !
   ! Subroutine: mlt_twoside_ml_aply
   ! Version:    real
-  ! Note: internal subroutine of mld_dmlprec_aply.
-  ! This routine computes
+  ! Note:       internal subroutine of mld_dmlprec_aply.
+  !
+  !  This routine computes
   !  
   !                        Y = beta*Y + alpha*op(M^(-1))*X,
   !  where 
-  !  - M is a multilevel domain decomposition (Schwarz) preconditioner associated
-  !    to a certain matrix A and stored in the array baseprecv,
+  !  - M is a symmetrized hybrid multilevel domain decomposition (Schwarz)
+  !    preconditioner associated to a certain matrix A and stored in the array
+  !    baseprecv,
   !  - op(M^(-1)) is M^(-1) or its transpose, according to the value of trans,
   !  - X and Y are vectors,               
   !  - alpha and beta are scalars.
   !
+  !  The preconditioner M is hybrid in the sense that it is multiplicative through
+  !  the levels and additive inside a level; it is symmetrized since pre-smoothing
+  !  and post-smoothing are applied at each level.
+  !
   !  For each level we have as many submatrices as processes (except for the coarsest
   !  level where we might have a replicated index space) and each process takes care
   !  of one submatrix.   
@@ -1127,20 +1245,60 @@ contains
   !  The levels are numbered in increasing order starting from the finest one, i.e.
   !  level 1 is the finest level and A(1) is the matrix A. 
   !
-  !  This routine applies the multilevel preconditioner in a symmetrized hybrid way
-  !  (multiplicative through the levels and additive on the same level, with pre and 
-  !  post-smoothing).
   !  For details on the symmetrized hybrid multiplicative multilevel Schwarz
-  !  preconditioners, see the Algorithm 3.2.2 of the book:
-  !    -  B.F. Smith, P.E. Bjorstad & W.D. Gropp,
-  !       Domain decomposition: parallel multilevel methods for elliptic partial
-  !       differential equations, Cambridge University Press, 1996.
+  !  preconditioner, see the Algorithm 3.2.2 of the book:
+  !    B.F. Smith, P.E. Bjorstad & W.D. Gropp,
+  !    Domain decomposition: parallel multilevel methods for elliptic partial
+  !    differential equations, Cambridge University Press, 1996.
+  !
+  !  For a description of the arguments see mld_dmlprec_aply.
+  !
+  !  A sketch of the algorithm implemented in this routine is provided below.
+  !  (AV(ilev; sm_pr_) denotes the smoothed prolongator from level ilev to
+  !  level ilev-1, while AV(ilev; sm_pr_t_) denotes its transpose, i.e. the
+  !  corresponding restriction operator from level ilev-1 to level ilev).
+  !
+  !    1.   X(1)  = Xext
+  !
+  !    2. ! Apply the base peconditioner at the finest level
+  !         Y(1)  = (K(1)^(-1))*X(1)
+  !
+  !    3. ! Compute the residual at the finest level
+  !         TX(1) = X(1) - A(1)*Y(1)
+  !
+  !    4.   DO ilev=2, nlev
+  !
+  !          ! Transfer the residual to the current (coarser) level
+  !            X(ilev) = AV(ilev; sm_pr_t)*TX(ilev-1)
+  !    
+  !          ! Apply the base preconditioner at the current level.
+  !          ! The sum over the subdomains is carried out in the
+  !          ! application of K(ilev)
+  !            Y(ilev) = (K(ilev)^(-1))*X(ilev)
+  !
+  !          ! Compute the residual at the current level
+  !            TX(ilev) = (X(ilev)-A(ilev)*Y(ilev))
   !
-  !  For a detailed description of the arguments, see mld_dmlprec_aply.
+  !         ENDDO
+  !
+  !    5.   DO ilev=NLEV-1,1,-1
+  !
+  !          ! Transfer Y(ilev+1) to the next finer level
+  !            Y(ilev) = Y(ilev) + AV(ilev+1; sm_pr_)*Y(ilev+1)
+  !
+  !          ! Compute the residual at the current level and apply to it the
+  !          ! base preconditioner. The sum over the subdomains is carried out
+  !          ! in the application of K(ilev)     
+  !            Y(ilev) = Y(ilev) + (K(ilev)**(-1))*(X(ilev)-A(ilev)*Y(ilev))
+  !
+  !         ENDDO
+  !
+  !    6.  Yext = beta*Yext + alpha*Y(1)
   !
   subroutine mlt_twoside_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
-    !
+
     implicit none 
+
     ! Arguments
     type(psb_desc_type),intent(in)      :: desc_data
     type(mld_dbaseprc_type), intent(in) :: baseprecv(:)
@@ -1182,6 +1340,7 @@ contains
       call psb_errpush(4010,name,a_err='Allocate')
       goto 9999      
     end if
+
     ! STEP 1
     !
     ! Copy the input vector X
diff --git a/mlprec/mld_dsub_aply.f90 b/mlprec/mld_dsub_aply.f90
index b96ecb04..6fc1db71 100644
--- a/mlprec/mld_dsub_aply.f90
+++ b/mlprec/mld_dsub_aply.f90
@@ -45,11 +45,12 @@
 !
 !  where
 !  - K is a suitable matrix, as specified below,
-!  - op(K^(-1)) is K^(-1) or its transpose, according to the value of trans,
+!  - op(K^(-1)) is K^(-1) or its transpose, according to the value of the
+!    argument trans,
 !  - X and Y are vectors,
 !  - alpha and beta are scalars.
 !
-!  Depending on K, alpha, beta (and on the communication descriptor desc_data
+!  Depending on K, alpha and beta (and on the communication descriptor desc_data
 !  - see the arguments below), the above computation may correspond to one of
 !  the following tasks:
 !
@@ -90,7 +91,7 @@
 !  or a block-Jacobi or LU or ILU solver at the coarsest level of a multilevel
 !  preconditioner. 
 !
-!  Tasks 1, 3 and 4 are selected when prec%iprcparm(smooth_sweeps_) = 1, 
+!  Tasks 1, 3 and 4 may be selected when prec%iprcparm(smooth_sweeps_) = 1, 
 !  while task 2 is selected when prec%iprcparm(smooth_sweeps_) > 1. Furthermore
 !  Tasks 1, 2 and 3 may be performed when the matrix A is
 !  distributed among the processes (prec%iprcparm(mld_coarse_mat_) = mld_distr_mat_),
diff --git a/mlprec/mld_dsub_solve.f90 b/mlprec/mld_dsub_solve.f90
index 2090c481..ae835a6d 100644
--- a/mlprec/mld_dsub_solve.f90
+++ b/mlprec/mld_dsub_solve.f90
@@ -45,33 +45,36 @@
 !
 !  where
 !  - K is a factored matrix, as specified below,
-!  - op(K^(-1)) is K^(-1) or its transpose, according to the value of trans,
+!  - op(K^(-1)) is K^(-1) or its transpose, according to the value of the
+!    argument trans,
 !  - X and Y are vectors,
 !  - alpha and beta are scalars.
 !
-!  Depending on K, alpha, beta (and on the communication descriptor desc_data
+!  Depending on K, alpha and beta (and on the communication descriptor desc_data
 !  - see the arguments below), the above computation may correspond to one of
 !  the following tasks:
 !
-!  1. Solution of a linear system with sparse factors LU generated by means
-!     of an incomplete factorization approximating 
+!  1. approximate solution of a linear system
 !
 !                                    A*Y = X,
-!     In this case the factors of A are either distributed (in which case
-!     they are also block-diagonal) or replicated. 
+!                              
+!     by using the L and U factors computed with an ILU (incomplete LU) factorization
+!     of A. In this case K = L*U ~ A, alpha = 1 and beta = 0. The factors L and U
+!     (and the matrix A) are either distributed and block-diagonal or replicated. 
 !
-!  2. Solution of a linear system with sparse factors LU generated by means
-!     of a complete factorization 
+!  2. Solution of a linear system
 !
 !                                    A*Y = X,
 !
-!     computed with the aid of an auxiliary sparse package such as                 
-!     a. UMFPACK
-!     b. SuperLU
-!     c. SuperLU_Dist
-!     In cases a. and b. the matrix A and its factors are either distributed
-!     and block diagonal or replicated; in case c. the matrix A and its
-!     factors are distributed.
+!     by using the L and U factors computed with a LU factorization of A. In this
+!     case K = L*U = A, alpha = 1 and beta = 0. The LU factorization is performed
+!     by one of the following auxiliary pakages:
+!     a. UMFPACK,
+!     b. SuperLU,
+!     c. SuperLU_Dist.
+!     In the cases a. and b., the factors L and U (and the matrix A) are either
+!     distributed and block diagonal) or replicated; in the case c., L, U (and A)
+!     are distributed.
 !
 !  This routine is used by mld_dsub_aply, to apply a 'base' block-Jacobi or
 !  Additive Schwarz (AS) preconditioner at any level of a multilevel preconditioner,
@@ -85,7 +88,7 @@
 !                 The scalar alpha.
 !   prec       -  type(mld_dbaseprec_type), input.
 !                 The 'base preconditioner' data structure containing the local 
-!                 part of the preconditioner or solver.
+!                 part of the L and U factors of the matrix A.
 !   x          -  real(kind(0.d0)), dimension(:), input.
 !                 The local part of the vector X.
 !   beta       -  real(kind(0.d0)), input.
diff --git a/mlprec/mld_prec_type.f90 b/mlprec/mld_prec_type.f90
index afae0671..10cc0c9e 100644
--- a/mlprec/mld_prec_type.f90
+++ b/mlprec/mld_prec_type.f90
@@ -104,9 +104,9 @@ module mld_prec_type
   !      av(mld_ap_nd_)    -  The entries of the local part of A(ilev) outside
   !                           the diagonal block, for block-Jacobi sweeps.
   !      av(mld_ac_)       -  The local part of the matrix A(ilev).
-  !      av(mld_sm_pr_)    -  The smoother prolongator.   
+  !      av(mld_sm_pr_)    -  The smoothed prolongator.   
   !                           It maps vectors (ilev) ---> (ilev-1).
-  !      av(mld_sm_pr_t_)  -  The smoother prolongator transpose.   
+  !      av(mld_sm_pr_t_)  -  The smoothed prolongator transpose.   
   !                           It maps vectors (ilev-1) ---> (ilev).
   !      Shouldn't we keep just one of the last two items and handle the transpose
   !      in the Sparse BLAS? Maybe.
diff --git a/mlprec/mld_zas_bld.f90 b/mlprec/mld_zas_bld.f90
index dc9a6251..09b19eda 100644
--- a/mlprec/mld_zas_bld.f90
+++ b/mlprec/mld_zas_bld.f90
@@ -39,10 +39,10 @@
 ! Subroutine: mld_zas_bld
 ! Version:    complex
 !
-!  This routine builds the Additive Schwarz (AS) preconditioner.
-!  If the preconditioner is the block-Jacobi one, the routine makes only a copy of
-!  the descriptor of the original matrix and then proceeds to call mld_fact_bld
-!  for LU or incomplete LU factorization of the diagonal blocks of the
+!  This routine builds Additive Schwarz (AS) preconditioners. If the AS
+!  preconditioner is actually the block-Jacobi one, the routine makes only a
+!  copy of the descriptor of the original matrix and then calls mld_fact_bld
+!  to perform an LU or ILU factorization of the diagonal blocks of the
 !  distributed matrix.
 !    
 !
diff --git a/mlprec/mld_zbaseprec_aply.f90 b/mlprec/mld_zbaseprec_aply.f90
index b292725a..eb3a6508 100644
--- a/mlprec/mld_zbaseprec_aply.f90
+++ b/mlprec/mld_zbaseprec_aply.f90
@@ -50,7 +50,7 @@
 !
 !  The routine is used by mld_dmlprec_aply, to apply the multilevel preconditioners,
 !  or directly by mld_dprec_aply, to apply the basic one-level preconditioners (diagonal,
-!  block-Jacobi or additive Schwarz), or to have no preconditioning.
+!  block-Jacobi or additive Schwarz). It also manages the case of no preconditioning.
 !
 !
 ! Arguments:
diff --git a/mlprec/mld_zbaseprec_bld.f90 b/mlprec/mld_zbaseprec_bld.f90
index e69dbbb5..bc21a5bb 100644
--- a/mlprec/mld_zbaseprec_bld.f90
+++ b/mlprec/mld_zbaseprec_bld.f90
@@ -45,7 +45,7 @@
 !
 !  Details on the base preconditioner to be built are stored in the iprcparm
 !  field of the preconditioner data structure (for a description of this
-!  structure see mld_prec_type.f90).
+!  data structure see mld_prec_type.f90).
 !    
 !
 ! Arguments:
diff --git a/mlprec/mld_zfact_bld.f90 b/mlprec/mld_zfact_bld.f90
index 6c913c86..111276db 100644
--- a/mlprec/mld_zfact_bld.f90
+++ b/mlprec/mld_zfact_bld.f90
@@ -39,24 +39,27 @@
 ! Subroutine: mld_zfact_bld
 ! Version:    complex
 !
-!  This routine computes an LU or incomplete LU factorization of the diagonal blocks
-!  of a distributed matrix, according to the value of p%iprcparm(iprcparm(sub_solve_), 
-!  set by the user through mld_dprecinit or mld_dprecset.
-!  It may also split the matrix into its block-diagonal and
-!  off block-diagonal parts, for the future application of multiple
-!  block-Jacobi sweeps.
+!  This routine computes an LU or incomplete LU (ILU) factorization of the diagonal
+!  blocks of a distributed matrix, according to the value of
+!  p%iprcparm(iprcparm(sub_solve_), set by the user through
+!  mld_dprecinit or mld_dprecset.
+!  It may also compute an LU factorization of a distributed matrix, or split
+!  a distributed matrix into its block-diagonal and off block-diagonal parts, 
+!  for the future application of multiple block-Jacobi sweeps.
 !
 !  This routine is used by mld_as_bld, to build a 'base' block-Jacobi or
 !  Additive Schwarz (AS) preconditioner at any level of a multilevel preconditioner,
 !  or a block-Jacobi or LU or ILU solver at the coarsest level of a multilevel
-!  preconditioner. For the Additive Schwarz, it is called from mld_as_bld,
-!  which prepares the overlap descriptor and retrieves the remote rows into blck.
+!  preconditioner. For the AS preconditioners, the diagonal blocks to be factorized
+!  are stored into the sparse matrix data structures a and blck, and blck contains
+!  the remote rows needed to build the extended local matrix as required by the
+!  AS preconditioner.
 !
 !  More precisely, the routine performs one of the following tasks: 
 !
 !  1. LU or ILU factorization of the diagonal blocks of the distributed matrix
 !     for the construction of a block-Jacobi or AS preconditioners
-!     (allowed at any level);
+!     (allowed at any level of a multilevel preconditioner);
 !
 !  2. setup of block-Jacobi sweeps to compute an approximate solution of a
 !     linear system
diff --git a/mlprec/mld_zilu_bld.f90 b/mlprec/mld_zilu_bld.f90
index 707c7f0d..23ce75e9 100644
--- a/mlprec/mld_zilu_bld.f90
+++ b/mlprec/mld_zilu_bld.f90
@@ -39,10 +39,11 @@
 ! Subroutine: mld_zilu_bld
 ! Version:    complex
 !
-!  This routine computes an incomplete LU (ILU) factorization of the diagonal blocks
-!  of a distributed matrix. This factorization is used to build 
-!  the 'base preconditioner' (block-Jacobi preconditioner/solver, Additive Schwarz
+!  This routine computes an incomplete LU (ILU) factorization of the diagonal
+!  blocks of a distributed matrix. This factorization is used to build the
+!  'base preconditioner' (block-Jacobi preconditioner/solver, Additive Schwarz
 !  preconditioner) corresponding to a certain level of a multilevel preconditioner.
+!
 !  The following factorizations are available:
 !  - ILU(k), i.e. ILU factorization with fill-in level k,
 !  - MILU(k), i.e. modified ILU factorization with fill-in level k,
diff --git a/mlprec/mld_zmlprec_aply.f90 b/mlprec/mld_zmlprec_aply.f90
index 06efb0c5..19311d8f 100644
--- a/mlprec/mld_zmlprec_aply.f90
+++ b/mlprec/mld_zmlprec_aply.f90
@@ -83,19 +83,19 @@
 !      baseprecv(ilev)%av  -  type(psb_zspmat_type), dimension(:), allocatable(:).
 !                             The sparse matrices needed to apply the preconditioner 
 !                             at level ilev. 
-!        baseprecv(ilev)%av(mld_l_pr_)    -  The L factor of the ILU factorization of the 
-!                                            local diagonal block of A(ilev).
-!        baseprecv(ilev)%av(mld_u_pr_)    -  The U factor of the ILU factorization of the
-!                                            local diagonal block of A(ilev), except its
-!                                            diagonal entries (stored in baseprecv(ilev)%d).
-!        baseprecv(ilev)%av(mld_ap_nd_)   -  The entries of the local part of A(ilev)
-!                                            outside the diagonal block, for block-Jacobi
-!                                            sweeps.
-!        baseprecv(ilev)%av(mld_ac_)      -  The local part of the matrix A(ilev).
-!        baseprecv(ilev)%av(mld_sm_pr_)   -  The smoother prolongator.   
-!                                            It maps vectors (ilev) ---> (ilev-1).
-!        baseprecv(ilev)%av(mld_sm_pr_t_) -  The smoother prolongator transpose.   
-!                                            It maps vectors (ilev-1) ---> (ilev).
+!         baseprecv(ilev)%av(mld_l_pr_)    -  The L factor of the ILU factorization of the 
+!                                             local diagonal block of A(ilev).
+!         baseprecv(ilev)%av(mld_u_pr_)    -  The U factor of the ILU factorization of the
+!                                             local diagonal block of A(ilev), except its
+!                                             diagonal entries (stored in baseprecv(ilev)%d).
+!         baseprecv(ilev)%av(mld_ap_nd_)   -  The entries of the local part of A(ilev)
+!                                             outside the diagonal block, for block-Jacobi
+!                                             sweeps.
+!         baseprecv(ilev)%av(mld_ac_)      -  The local part of the matrix A(ilev).
+!         baseprecv(ilev)%av(mld_sm_pr_)   -  The smoothed prolongator.   
+!                                             It maps vectors (ilev) ---> (ilev-1).
+!         baseprecv(ilev)%av(mld_sm_pr_t_) -  The smoothed prolongator transpose.   
+!                                             It maps vectors (ilev-1) ---> (ilev).
 !      baseprecv(ilev)%d         -  complex(kind(1.d0)), dimension(:), allocatable.
 !                                   The diagonal entries of the U factor in the ILU
 !                                   factorization of A(ilev).
@@ -209,13 +209,14 @@ subroutine mld_zmlprec_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
     call psb_errpush(4001,name,a_err='mld_no_ml_ in mlprc_aply?')
     goto 9999      
 
-
   case(mld_add_ml_)
+    !
+    ! Additive multilevel
+    !
 
     call add_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans_,work,info)
 
   case(mld_mult_ml_)
-
     ! 
     !  Multiplicative multilevel (multiplicative among the levels, additive inside
     !  each level)
@@ -239,7 +240,6 @@ subroutine mld_zmlprec_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
         goto 9999      
       end select
 
-      
     case(mld_pre_smooth_)
 
       select case (trans_) 
@@ -289,17 +289,21 @@ contains
   ! Subroutine: add_ml_aply
   ! Version:    complex
   ! Note: internal subroutine of mld_dmlprec_aply.
+  ! 
   ! This routine computes
   !  
   !                        Y = beta*Y + alpha*op(M^(-1))*X,
   !  where 
-  !  - M is a multilevel domain decomposition (Schwarz) preconditioner associated
-  !    to a certain matrix A and stored in the array baseprecv,
+  !  - M is an additive multilevel domain decomposition (Schwarz) preconditioner
+  !    associated to a certain matrix A and stored in the array baseprecv,
   !  - op(M^(-1)) is M^(-1) or its (conjugate) transpose, according to
   !    the value of trans,
   !  - X and Y are vectors,
   !  - alpha and beta are scalars.
   !
+  !  The preconditioner M is additive both through the levels and inside each
+  !  level.
+  !
   !  For each level we have as many submatrices as processes (except for the coarsest
   !  level where we might have a replicated index space) and each process takes care
   !  of one submatrix. 
@@ -313,18 +317,51 @@ contains
   !
   !  The levels are numbered in increasing order starting from the finest one, i.e.
   !  level 1 is the finest level and A(1) is the matrix A. 
-  !  This routine applies the multilevel preconditioner in an additive
-  !  way (additive through the levels and additive on the same level).
-  !  For details on the additive multilevel Schwarz preconditioner see
-  !  the Algorithm 3.1.1 in the book:
-  !    -  B.F. Smith, P.E. Bjorstad & W.D. Gropp,
-  !       Domain decomposition: parallel multilevel methods for elliptic partial
-  !       differential equations, Cambridge University Press, 1996.
   !
-  !  For a detailed description of the arguments, see mld_dmlprec_aply.
+  !  For details on the additive multilevel Schwarz preconditioner see the
+  !  Algorithm 3.1.1 in the book:
+  !    B.F. Smith, P.E. Bjorstad & W.D. Gropp,
+  !    Domain decomposition: parallel multilevel methods for elliptic partial
+  !    differential equations, Cambridge University Press, 1996.
+  !
+  !  For a description of the arguments see mld_dmlprec_aply.
+  !
+  !  A sketch of the algorithm implemented in this routine is provided below
+  !  (AV(ilev; sm_pr_) denotes the smoothed prolongator from level ilev to
+  !  level ilev-1, while AV(ilev; sm_pr_t_) denotes its transpose, i.e. the
+  !  corresponding restriction operator from level ilev-1 to level ilev).
+  !
+  !   1. ! Apply the base preconditioner at level 1.
+  !      ! The sum over the subdomains is carried out in the
+  !      ! application of K(1).
+  !        X(1) = Xest
+  !        Y(1) = (K(1)^(-1))*X(1)
+  !
+  !    2.  DO ilev=2,nlev
+  !
+  !         ! Transfer X(ilev-1) to the next coarser level.
+  !           X(ilev) = AV(ilev; sm_pr_t_)*X(ilev-1)
+  !
+  !         ! Apply the base preconditioner at the current level.
+  !         ! The sum over the subdomains is carried out in the
+  !         ! application of K(ilev).
+  !           Y(ilev) = (K(ilev)^(-1))*X(ilev)
+  !
+  !        ENDDO
+  !
+  !    3.  DO ilev=nlev-1,1,-1
+  !
+  !         ! Transfer Y(ilev+1) to the next finer level.
+  !           Y(ilev) = AV(ilev+1; sm_pr_)*Y(ilev+1)
+  !
+  !        ENDDO
+  !     
+  !    4.  Yext = beta*Yext + alpha*Y(1)
   !
   subroutine add_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
+
     implicit none 
+
     ! Arguments
     type(psb_desc_type),intent(in)      :: desc_data
     type(mld_zbaseprc_type), intent(in) :: baseprecv(:)
@@ -366,6 +403,7 @@ contains
       call psb_errpush(4010,name,a_err='Allocate')
       goto 9999      
     end if
+
     !
     ! STEP 1
     !
@@ -392,8 +430,7 @@ contains
     !
     ! STEP 2
     !
-    !
-    !      For each level except the finest one ...
+    ! For each level except the finest one ...
     !
     do ilev = 2, nlev
       n_row = psb_cd_get_local_rows(baseprecv(ilev-1)%base_desc)
@@ -464,8 +501,7 @@ contains
     !
     ! STEP 3
     !
-    !
-    !      For each level except the finest one ...
+    ! For each level except the finest one ...
     !
     do ilev =nlev,2,-1
 
@@ -531,17 +567,21 @@ contains
   ! Subroutine: mlt_pre_ml_aply
   ! Version:    complex
   ! Note: internal subroutine of mld_dmlprec_aply.
-  ! This routine computes
+  ! 
+  !  This routine computes
   !  
   !                        Y = beta*Y + alpha*op(M^(-1))*X,
   !  where 
-  !  - M is a multilevel domain decomposition (Schwarz) preconditioner associated
-  !    to a certain matrix A and stored in the array baseprecv,
+  !  - M is a hybrid multilevel domain decomposition (Schwarz) preconditioner
+  !    associated to a certain matrix A and stored in the array baseprecv,
   !  - op(M^(-1)) is M^(-1) or its (conjugate) transpose, according to
   !    the value of trans,
   !  - X and Y are vectors,
   !  - alpha and beta are scalars.
   !
+  !  The preconditioner M is hybrid in the sense that it is multiplicative through the
+  !  levels and additive inside a level; pre-smoothing only is applied at each level.
+  !
   !  For each level we have as many submatrices as processes (except for the coarsest
   !  level where we might have a replicated index space) and each process takes care
   !  of one submatrix. 
@@ -556,19 +596,58 @@ contains
   !  The levels are numbered in increasing order starting from the finest one, i.e.
   !  level 1 is the finest level and A(1) is the matrix A. 
   !
-  !  This routine applies the multilevel preconditioner in a hybrid way
-  !  (multiplicative through the levels and additive on the same level)
-  !  and pre-smoothing.
-  !  For details on pre-smoothed hybrid multiplicative multilevel Schwarz preconditioner,
-  !  see the Algorithm 3.2.1 in the book:
-  !    -  B.F. Smith, P.E. Bjorstad & W.D. Gropp,
-  !       Domain decomposition: parallel multilevel methods for elliptic partial
-  !       differential equations, Cambridge University Press, 1996.
+  !  For details on the pre-smoothed hybrid multiplicative multilevel Schwarz
+  !  preconditioner, see the Algorithm 3.2.1 in the book:
+  !    B.F. Smith, P.E. Bjorstad & W.D. Gropp,
+  !    Domain decomposition: parallel multilevel methods for elliptic partial
+  !    differential equations, Cambridge University Press, 1996.
+  !
+  !  For a description of the arguments see mld_dmlprec_aply.
+  ! 
+  !  A sketch of the algorithm implemented in this routine is provided below
+  !  (AV(ilev; sm_pr_) denotes the smoothed prolongator from level ilev to
+  !  level ilev-1, while AV(ilev; sm_pr_t_) denotes its transpose, i.e. the
+  !  corresponding restriction operator from level ilev-1 to level ilev).
+  !
+  !    1.   X(1) = Xext
+  !
+  !    2. ! Apply the base preconditioner at the finest level.
+  !         Y(1) = (K(1)^(-1))*X(1)
+  !
+  !    3. ! Compute the residual at the finest level.
+  !         TX(1) = X(1) - A(1)*Y(1)
+  !
+  !    4.   DO ilev=2, nlev
+  !
+  !          ! Transfer the residual to the current (coarser) level.
+  !            X(ilev) = AV(ilev; sm_pr_t_)*TX(ilev-1)
+  !
+  !          ! Apply the base preconditioner at the current level.
+  !          ! The sum over the subdomains is carried out in the
+  !          ! application of K(ilev).     
+  !            Y(ilev) = (K(ilev)^(-1))*X(ilev)
+  !
+  !          ! Compute the residual at the current level (except at
+  !          ! the coarsest level).
+  !            IF (ilev < nlev)
+  !               TX(ilev) = (X(ilev)-A(ilev)*Y(ilev))
+  !
+  !         ENDDO
+  !
+  !    5.   DO ilev=nlev-1,1,-1
+  !
+  !          ! Transfer Y(ilev+1) to the next finer level
+  !            Y(ilev) = Y(ilev) + AV(ilev+1; sm_pr_)*Y(ilev+1)
+  !
+  !         ENDDO
+  !     
+  !    6.  Yext = beta*Yext + alpha*Y(1)
+  ! 
   !
-  !  For a detailed description of the arguments, see mld_dmlprec_aply.
   subroutine mlt_pre_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
-    !
+
     implicit none 
+
     ! Arguments
     type(psb_desc_type),intent(in)      :: desc_data
     type(mld_zbaseprc_type), intent(in) :: baseprecv(:)
@@ -611,7 +690,6 @@ contains
       goto 9999      
     end if
 
-
     !
     ! STEP 1
     !
@@ -807,18 +885,22 @@ contains
   !
   ! Subroutine: mlt_post_ml_aply
   ! Version:    complex
-  ! Note: internal subroutine of mld_dmlprec_aply.
-  ! This routine computes
+  ! Note:       internal subroutine of mld_dmlprec_aply.
+  ! 
+  !  This routine computes
   !  
   !                        Y = beta*Y + alpha*op(M^(-1))*X,
   !  where 
-  !  - M is a multilevel domain decomposition (Schwarz) preconditioner associated
-  !    to a certain matrix A and stored in the array baseprecv,
+  !  - M is a hybrid multilevel domain decomposition (Schwarz) preconditioner
+  !    associated to a certain matrix A and stored in the array baseprecv,
   !  - op(M^(-1)) is M^(-1) or its (conjugate) transpose, according to
   !    the value of trans,
   !  - X and Y are vectors,
   !  - alpha and beta are scalars.
   !
+  !  The preconditioner M is hybrid in the sense that it is multiplicative through the
+  !  levels and additive inside a level; post-smoothing only is applied at each level.
+  !
   !  For each level we have as many submatrices as processes (except for the coarsest
   !  level where we might have a replicated index space) and each process takes care
   !  of one submatrix. 
@@ -833,18 +915,49 @@ contains
   !  The levels are numbered in increasing order starting from the finest one, i.e.
   !  level 1 is the finest level and A(1) is the matrix A. 
   !
-  !  This routine applies the multilevel preconditioner in a hybrid way
-  !  (multiplicative through the levels and additive on the same level)
-  !  and post-smoothing.
   !  For details on hybrid multiplicative multilevel Schwarz preconditioners, see
-  !    -  B.F. Smith, P.E. Bjorstad & W.D. Gropp,
-  !       Domain decomposition: parallel multilevel methods for elliptic partial
-  !       differential equations, Cambridge University Press, 1996.
+  !    B.F. Smith, P.E. Bjorstad & W.D. Gropp,
+  !    Domain decomposition: parallel multilevel methods for elliptic partial
+  !    differential equations, Cambridge University Press, 1996.
+  !
+  !  For a description of the arguments see mld_dmlprec_aply.
+  !
+  !  A sketch of the algorithm implemented in this routine is provided below.
+  !  (AV(ilev; sm_pr_) denotes the smoothed prolongator from level ilev to
+  !  level ilev-1, while AV(ilev; sm_pr_t_) denotes its transpose, i.e. the
+  !  corresponding restriction operator from level ilev-1 to level ilev).
+  !
+  !    1.  X(1) = Xext
+  !
+  !    2.  DO ilev=2, nlev
+  !
+  !         ! Transfer X(ilev-1) to the next coarser level.
+  !           X(ilev) = AV(ilev; sm_pr_t_)*X(ilev-1) 
+  !
+  !        ENDDO
+  !   
+  !    3.! Apply the preconditioner at the coarsest level.
+  !        Y(nlev) = (K(nlev)^(-1))*X(nlev)
+  !
+  !    4.  DO ilev=nlev-1,1,-1
+  !
+  !         ! Transfer Y(ilev+1) to the next finer level.
+  !           Y(ilev) = AV(ilev+1; sm_pr_)*Y(ilev+1)
+  !
+  !         ! Compute the residual at the current level and apply to it the
+  !         ! base preconditioner. The sum over the subdomains is carried out
+  !         ! in the application of K(ilev).
+  !           Y(ilev) = Y(ilev) + (K(ilev)^(-1))*(X(ilev)-A(ilev)*Y(ilev))
+  !
+  !        ENDDO
+  !
+  !    5.  Yext = beta*Yext + alpha*Y(1)
+  ! 
   !
-  !  For a detailed description of the arguments, see mld_dmlprec_aply.
   subroutine mlt_post_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
-    !
+
     implicit none 
+
     ! Arguments
     type(psb_desc_type),intent(in)      :: desc_data
     type(mld_zbaseprc_type), intent(in) :: baseprecv(:)
@@ -886,6 +999,7 @@ contains
       call psb_errpush(4010,name,a_err='Allocate')
       goto 9999      
     end if
+
     !
     ! STEP 1
     !
@@ -1103,18 +1217,24 @@ contains
   !
   ! Subroutine: mlt_twoside_ml_aply
   ! Version:    complex
-  ! Note: internal subroutine of mld_dmlprec_aply.
-  ! This routine computes
+  ! Note:       internal subroutine of mld_dmlprec_aply.
+  ! 
+  !  This routine computes
   !  
   !                        Y = beta*Y + alpha*op(M^(-1))*X,
   !  where 
-  !  - M is a multilevel domain decomposition (Schwarz) preconditioner associated
-  !    to a certain matrix A and stored in the array baseprecv,
+  !  - M is a symmetrized hybrid multilevel domain decomposition (Schwarz)
+  !    preconditioner associated to a certain matrix A and stored in the array
+  !    baseprecv,
   !  - op(M^(-1)) is M^(-1) or its (conjugate) transpose, according to
   !    the value of trans,
   !  - X and Y are vectors,               
   !  - alpha and beta are scalars.
   !
+  !  The preconditioner M is hybrid in the sense that it is multiplicative through
+  !  the levels and additive inside a level; it is symmetrized since pre-smoothing
+  !  and post-smoothing are applied at each level.
+  !
   !  For each level we have as many submatrices as processes (except for the coarsest
   !  level where we might have a replicated index space) and each process takes care
   !  of one submatrix.   
@@ -1129,20 +1249,60 @@ contains
   !  The levels are numbered in increasing order starting from the finest one, i.e.
   !  level 1 is the finest level and A(1) is the matrix A. 
   !
-  !  This routine applies the multilevel preconditioner in a symmetrized hybrid way
-  !  (multiplicative through the levels and additive on the same level, with pre and 
-  !  post-smoothing).
   !  For details on the symmetrized hybrid multiplicative multilevel Schwarz
-  !  preconditioners, see the Algorithm 3.2.2 of the book:
-  !    -  B.F. Smith, P.E. Bjorstad & W.D. Gropp,
-  !       Domain decomposition: parallel multilevel methods for elliptic partial
-  !       differential equations, Cambridge University Press, 1996.
+  !  preconditioner, see the Algorithm 3.2.2 of the book:
+  !    B.F. Smith, P.E. Bjorstad & W.D. Gropp,
+  !    Domain decomposition: parallel multilevel methods for elliptic partial
+  !    differential equations, Cambridge University Press, 1996.
+  !
+  !  For a description of the arguments see mld_dmlprec_aply.
+  !
+  !  A sketch of the algorithm implemented in this routine is provided below.
+  !  (AV(ilev; sm_pr_) denotes the smoothed prolongator from level ilev to
+  !  level ilev-1, while AV(ilev; sm_pr_t_) denotes its transpose, i.e. the
+  !  corresponding restriction operator from level ilev-1 to level ilev).
+  !
+  !    1.   X(1)  = Xext
+  !
+  !    2. ! Apply the base peconditioner at the finest level
+  !         Y(1)  = (K(1)^(-1))*X(1)
+  !
+  !    3. ! Compute the residual at the finest level
+  !         TX(1) = X(1) - A(1)*Y(1)
+  !
+  !    4.   DO ilev=2, nlev
+  !
+  !          ! Transfer the residual to the current (coarser) level
+  !            X(ilev) = AV(ilev; sm_pr_t)*TX(ilev-1)
+  !    
+  !          ! Apply the base preconditioner at the current level.
+  !          ! The sum over the subdomains is carried out in the
+  !          ! application of K(ilev)
+  !            Y(ilev) = (K(ilev)^(-1))*X(ilev)
+  !
+  !          ! Compute the residual at the current level
+  !            TX(ilev) = (X(ilev)-A(ilev)*Y(ilev))
+  !
+  !         ENDDO
   !
-  !  For a detailed description of the arguments, see mld_dmlprec_aply.
+  !    5.   DO ilev=NLEV-1,1,-1
+  !
+  !          ! Transfer Y(ilev+1) to the next finer level
+  !            Y(ilev) = Y(ilev) + AV(ilev+1; sm_pr_)*Y(ilev+1)
+  !
+  !          ! Compute the residual at the current level and apply to it the
+  !          ! base preconditioner. The sum over the subdomains is carried out
+  !          ! in the application of K(ilev)     
+  !            Y(ilev) = Y(ilev) + (K(ilev)**(-1))*(X(ilev)-A(ilev)*Y(ilev))
+  !
+  !         ENDDO
+  !
+  !    6.  Yext = beta*Yext + alpha*Y(1)
   !
   subroutine mlt_twoside_ml_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info)
-    !
+
     implicit none 
+
     ! Arguments
     type(psb_desc_type),intent(in)      :: desc_data
     type(mld_zbaseprc_type), intent(in) :: baseprecv(:)
@@ -1184,6 +1344,7 @@ contains
       call psb_errpush(4010,name,a_err='Allocate')
       goto 9999      
     end if
+
     ! STEP 1
     !
     ! Copy the input vector X
@@ -1261,7 +1422,6 @@ contains
       mlprec_wrk(ilev)%tx(:)  = zzero
       mlprec_wrk(ilev)%ty(:)  = zzero
 
-
       if (ismth  /= mld_no_smooth_) then 
         !
         ! Apply the smoothed prolongator transpose
diff --git a/mlprec/mld_zsub_aply.f90 b/mlprec/mld_zsub_aply.f90
index 70945af8..60a41a00 100644
--- a/mlprec/mld_zsub_aply.f90
+++ b/mlprec/mld_zsub_aply.f90
@@ -45,11 +45,12 @@
 !
 !  where
 !  - K is a suitable matrix, as specified below,
-!  - op(K^(-1)) is K^(-1) or its transpose, according to the value of trans,
+!  - op(K^(-1)) is K^(-1) or its transpose, according to the value of the
+!    argument trans,
 !  - X and Y are vectors,
 !  - alpha and beta are scalars.
 !
-!  Depending on K, alpha, beta (and on the communication descriptor desc_data
+!  Depending on K, alpha and beta (and on the communication descriptor desc_data
 !  - see the arguments below), the above computation may correspond to one of
 !  the following tasks:
 !
@@ -90,7 +91,7 @@
 !  or a block-Jacobi or LU or ILU solver at the coarsest level of a multilevel
 !  preconditioner. 
 !
-!  Tasks 1, 3 and 4 are selected when prec%iprcparm(smooth_sweeps_) = 1, 
+!  Tasks 1, 3 and 4 may be selected when prec%iprcparm(smooth_sweeps_) = 1, 
 !  while task 2 is selected when prec%iprcparm(smooth_sweeps_) > 1. Furthermore
 !  Tasks 1, 2 and 3 may be performed when the matrix A is
 !  distributed among the processes (prec%iprcparm(mld_coarse_mat_) = mld_distr_mat_),
diff --git a/mlprec/mld_zsub_solve.f90 b/mlprec/mld_zsub_solve.f90
index 1a1f9d9e..f754561b 100644
--- a/mlprec/mld_zsub_solve.f90
+++ b/mlprec/mld_zsub_solve.f90
@@ -45,33 +45,36 @@
 !
 !  where
 !  - K is a factored matrix, as specified below,
-!  - op(K^(-1)) is K^(-1) or its transpose, according to the value of trans,
+!  - op(K^(-1)) is K^(-1) or its transpose, according to the value of the
+!    argument trans,
 !  - X and Y are vectors,
 !  - alpha and beta are scalars.
 !
-!  Depending on K, alpha, beta (and on the communication descriptor desc_data
+!  Depending on K, alpha and beta (and on the communication descriptor desc_data
 !  - see the arguments below), the above computation may correspond to one of
 !  the following tasks:
 !
-!  1. Solution of a linear system with sparse factors LU generated by means
-!     of an incomplete factorization approximating 
+!  1. approximate solution of a linear system
 !
 !                                    A*Y = X,
-!     In this case the factors of A are either distributed (in which case
-!     they are also block-diagonal) or replicated. 
+!                              
+!     by using the L and U factors computed with an ILU (incomplete LU) factorization
+!     of A. In this case K = L*U ~ A, alpha = 1 and beta = 0. The factors L and U
+!     (and the matrix A) are either distributed and block-diagonal or replicated. 
 !
-!  2. Solution of a linear system with sparse factors LU generated by means
-!     of a complete factorization 
+!  2. Solution of a linear system
 !
 !                                    A*Y = X,
 !
-!     computed with the aid of an auxiliary sparse package such as                 
-!     a. UMFPACK
-!     b. SuperLU
-!     c. SuperLU_Dist
-!     In cases a. and b. the matrix A and its factors are either distributed
-!     and block diagonal or replicated; in case c. the matrix A and its
-!     factors are distributed.
+!     by using the L and U factors computed with a LU factorization of A. In this
+!     case K = L*U = A, alpha = 1 and beta = 0. The LU factorization is performed
+!     by one of the following auxiliary pakages:
+!     a. UMFPACK,
+!     b. SuperLU,
+!     c. SuperLU_Dist.
+!     In the cases a. and b., the factors L and U (and the matrix A) are either
+!     distributed and block diagonal) or replicated; in the case c., L, U (and A)
+!     are distributed.
 !
 !  This routine is used by mld_dsub_aply, to apply a 'base' block-Jacobi or
 !  Additive Schwarz (AS) preconditioner at any level of a multilevel preconditioner,
@@ -85,7 +88,7 @@
 !                 The scalar alpha.
 !   prec       -  type(mld_zbaseprec_type), input.
 !                 The 'base preconditioner' data structure containing the local 
-!                 part of the preconditioner or solver.
+!                 part of the L and U factors of the matrix A.
 !   x          -  complex(kind(0.d0)), dimension(:), input.
 !                 The local part of the vector X.
 !   beta       -  complex(kind(0.d0)), input.