diff --git a/mlprec/impl/mld_c_dec_map_bld.f90 b/mlprec/impl/mld_c_dec_map_bld.f90
index 0084692d..368bbd60 100644
--- a/mlprec/impl/mld_c_dec_map_bld.f90
+++ b/mlprec/impl/mld_c_dec_map_bld.f90
@@ -36,7 +36,38 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-
+!
+! File: mld_c_dec_map__bld.f90
+!
+! Subroutine: mld_c_dec_map_bld
+! Version:    complex
+!
+!  This routine builds the tentative prolongator based on the
+!  decoupled aggregation algorithm presented in
+!
+!    M. Brezina and P. Vanek, A black-box iterative solver based on a 
+!    two-level Schwarz method, Computing,  63 (1999), 233-263.
+!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of
+!    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
+!    57 (2007), 1181-1196.
+!
+! Note: upon exit 
+!
+! Arguments:
+!    a       -  type(psb_cspmat_type).
+!               The sparse matrix structure containing the local part of the
+!               matrix to be preconditioned.
+!    desc_a  -  type(psb_desc_type), input.
+!               The communication descriptor of a.
+!    p       -  type(mld_cprec_type), input/output.
+!               The preconditioner data structure; upon exit it contains 
+!               the multilevel hierarchy of prolongators, restrictors
+!               and coarse matrices.
+!    info    -  integer, output.
+!               Error code.              
+!
+!
+!
 subroutine mld_c_dec_map_bld(iorder,theta,a,desc_a,nlaggr,ilaggr,info)
 
   use psb_base_mod
diff --git a/mlprec/impl/mld_c_hierarchy_bld.f90 b/mlprec/impl/mld_c_hierarchy_bld.f90
index 836a6905..2870d33b 100644
--- a/mlprec/impl/mld_c_hierarchy_bld.f90
+++ b/mlprec/impl/mld_c_hierarchy_bld.f90
@@ -58,8 +58,9 @@
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
 !    p       -  type(mld_cprec_type), input/output.
-!               The preconditioner data structure containing the local part
-!               of the preconditioner to be built.
+!               The preconditioner data structure; upon exit it contains 
+!               the multilevel hierarchy of prolongators, restrictors
+!               and coarse matrices.
 !    info    -  integer, output.
 !               Error code.              
 !  
diff --git a/mlprec/impl/mld_c_lev_aggrmap_bld.f90 b/mlprec/impl/mld_c_lev_aggrmap_bld.f90
index 0e2034bf..73a67d0f 100644
--- a/mlprec/impl/mld_c_lev_aggrmap_bld.f90
+++ b/mlprec/impl/mld_c_lev_aggrmap_bld.f90
@@ -36,32 +36,40 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-! File: mld_ccoarse_bld.f90
+! File: mld_c_lev_aggrmap_bld.f90
 !
-! Subroutine: mld_ccoarse_bld
-! Version:    real
+! Subroutine: mld_c_lev_aggrmap_bld
+! Version:    complex
 !
-!  This routine builds the matrix associated to the current level of the
-!  multilevel preconditioner from the matrix associated to the previous level,
-!  by using a smoothed aggregation technique (therefore, it also builds the
-!  prolongation and restriction operators mapping the current level to the
-!  previous one and vice versa). Then the routine builds the base preconditioner
-!  at the current level.
-!  The current level is regarded as the coarse one, while the previous as
-!  the fine one. This is in agreement with the fact that the routine is called,
-!  by mld_mlprec_bld, only on levels >=2.
+!  This routine is just an interface to aggrmap_bld where the real work is performed. 
+!  It takes care of some consistency checking though.
+!
+!  See mld_caggrmap_bld for constraints on input/oput arguments. 
 !
 ! 
 ! Arguments:
+!    p       -  type(mld_c_onelev_type), input/output.
+!               The 'one-level' data structure containing the control
+!               parameters and (eventually) coarse matrix and prolongator/restrictors. 
+!               
 !    a       -  type(psb_cspmat_type).
 !               The sparse matrix structure containing the local part of the
 !               fine-level matrix.
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
-!    p       -  type(mld_c_onelev_type), input/output.
-!               The 'one-level' data structure containing the local part
-!               of the base preconditioner to be built as well as
-!               information concerning the prolongator and its transpose.
+!    ilaggr     -  integer, dimension(:), allocatable, output
+!                  The mapping between the row indices of the coarse-level
+!                  matrix and the row indices of the fine-level matrix.
+!                  ilaggr(i)=j means that node i in the adjacency graph
+!                  of the fine-level matrix is mapped onto node j in the
+!                  adjacency graph of the coarse-level matrix. Note that on exit the indices
+!                  will be shifted so as to make sure the ranges on the various processes do not
+!                  overlap.
+!    nlaggr     -  integer, dimension(:), allocatable, output
+!                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_cspmat_type), output
+!               The tentative prolongator, based on ilaggr.
+!               
 !    info    -  integer, output.
 !               Error code.         
 !  
diff --git a/mlprec/impl/mld_c_lev_aggrmat_asb.f90 b/mlprec/impl/mld_c_lev_aggrmat_asb.f90
index 8dbb2370..71ef1cf8 100644
--- a/mlprec/impl/mld_c_lev_aggrmat_asb.f90
+++ b/mlprec/impl/mld_c_lev_aggrmat_asb.f90
@@ -36,32 +36,51 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-! File: mld_ccoarse_bld.f90
+! File: mld_c_lev_aggrmat_asb.f90
 !
-! Subroutine: mld_ccoarse_bld
-! Version:    real
+! Subroutine: mld_c_lev_aggrmat_asb
+! Version:    complex
 !
 !  This routine builds the matrix associated to the current level of the
 !  multilevel preconditioner from the matrix associated to the previous level,
-!  by using a smoothed aggregation technique (therefore, it also builds the
+!  by using the user-specified aggregation technique (therefore, it also builds the
 !  prolongation and restriction operators mapping the current level to the
-!  previous one and vice versa). Then the routine builds the base preconditioner
-!  at the current level.
+!  previous one and vice versa). 
 !  The current level is regarded as the coarse one, while the previous as
 !  the fine one. This is in agreement with the fact that the routine is called,
 !  by mld_mlprec_bld, only on levels >=2.
+!  The main structure is:
+!  1. Perform sanity checks;
+!  2. Call mld_Xaggrmat_asb to compute prolongator/restrictor/AC
+!  3. According to the choice of DIST/REPL for AC, build a descriptor DESC_AC,
+!     and adjust the column numbering of AC/OP_PROL/OP_RESTR
+!  4. Pack restrictor and prolongator into p%map
+!  5. Fix base_a and base_desc pointers.
 !
 ! 
 ! Arguments:
+!    p       -  type(mld_c_onelev_type), input/output.
+!               The 'one-level' data structure containing the control
+!               parameters and (eventually) coarse matrix and prolongator/restrictors. 
+!               
 !    a       -  type(psb_cspmat_type).
 !               The sparse matrix structure containing the local part of the
 !               fine-level matrix.
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
-!    p       -  type(mld_c_onelev_type), input/output.
-!               The 'one-level' data structure containing the local part
-!               of the base preconditioner to be built as well as
-!               information concerning the prolongator and its transpose.
+!    ilaggr     -  integer, dimension(:), input
+!                  The mapping between the row indices of the coarse-level
+!                  matrix and the row indices of the fine-level matrix.
+!                  ilaggr(i)=j means that node i in the adjacency graph
+!                  of the fine-level matrix is mapped onto node j in the
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
+!                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_cspmat_type), input/output
+!               The tentative prolongator on input, released on output. 
+!               
 !    info    -  integer, output.
 !               Error code.         
 !  
diff --git a/mlprec/impl/mld_c_smoothers_bld.f90 b/mlprec/impl/mld_c_smoothers_bld.f90
index 4a8815b2..91d4f6ef 100644
--- a/mlprec/impl/mld_c_smoothers_bld.f90
+++ b/mlprec/impl/mld_c_smoothers_bld.f90
@@ -41,14 +41,18 @@
 ! Subroutine: mld_c_smoothers_bld
 ! Version:    complex
 !
-!  This routine builds the preconditioner according to the requirements made by
-!  the user trough the subroutines mld_precinit and mld_precset.
+!  This routine performs the final phase of the multilevel preconditioner build process:
+!  builds the "smoother" objects at each level, based on the matrix hierarchy prepared
+!  by mld_c_hierarchy_bld.
 !  
 !  A multilevel preconditioner is regarded as an array of 'one-level' data structures,
 !  each containing the part of the preconditioner associated to a certain level,
 !  (for more details see the description of mld_Tonelev_type in mld_prec_type.f90).
 !  The levels are numbered in increasing order starting from the finest one, i.e.
 !  level 1 is the finest level. No transfer operators are associated to level 1.
+!  Each level provides a "build" method; for the base type, the "one-level"
+!  build procedure simply invokes the build method of the first smoother object,
+!  and also on the second object if allocated. 
 ! 
 !
 ! Arguments:
diff --git a/mlprec/impl/mld_caggrmap_bld.f90 b/mlprec/impl/mld_caggrmap_bld.f90
index 365c6a87..f12f8d91 100644
--- a/mlprec/impl/mld_caggrmap_bld.f90
+++ b/mlprec/impl/mld_caggrmap_bld.f90
@@ -73,9 +73,14 @@
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
+!                  adjacency graph of the coarse-level matrix. Note that on exit the indices
+!                  will be shifted so as to make sure the ranges on the various processes do not
+!                  overlap.
 !    nlaggr     -  integer, dimension(:), allocatable.
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_cspmat_type).
+!               The tentative prolongator, based on ilaggr.
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_caggrmat_asb.f90 b/mlprec/impl/mld_caggrmat_asb.f90
index ba0bda60..0fa9f29f 100644
--- a/mlprec/impl/mld_caggrmat_asb.f90
+++ b/mlprec/impl/mld_caggrmat_asb.f90
@@ -53,12 +53,16 @@
 !  The prolongator P_C is built here from this mapping, according to the
 !  value of p%iprcparm(mld_aggr_kind_), specified by the user through
 !  mld_cprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_c_lev_aggrmat_asb.
 !
-!  Currently three different prolongators are implemented, corresponding to
-!  three aggregation algorithms:
-!  1. non-smoothed aggregation,
+!  Currently four  different prolongators are implemented, corresponding to
+!  four  aggregation algorithms:
+!  1. un-smoothed aggregation,
 !  2. smoothed aggregation,
 !  3. "bizarre" aggregation.
+!  4. minimum energy 
 !  1. The non-smoothed aggregation uses as prolongator the piecewise constant
 !     interpolation operator corresponding to the fine-to-coarse level mapping built
 !     by mld_aggrmap_bld. This is called tentative prolongator.
@@ -67,6 +71,7 @@
 !  3. The "bizarre" aggregation uses a prolongator proposed by the authors of MLD2P4.
 !     This prolongator still requires a deep analysis and testing and its use is
 !     not recommended.
+!  4. Minimum energy aggregation: ADD REFERENCE.
 !
 !  For more details see
 !    M. Brezina and P. Vanek, A black-box iterative solver based on a two-level
@@ -85,16 +90,29 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_c_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_sml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_cspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_cspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_cspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_caggrmat_biz_asb.f90 b/mlprec/impl/mld_caggrmat_biz_asb.f90
index d1ce58f9..b8be4255 100644
--- a/mlprec/impl/mld_caggrmat_biz_asb.f90
+++ b/mlprec/impl/mld_caggrmat_biz_asb.f90
@@ -56,6 +56,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat,
 !  specified by the user through mld_cprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_c_lev_aggrmat_asb.
 !
 ! Arguments:
 !    a          -  type(psb_cspmat_type), input.     
@@ -98,7 +101,7 @@ subroutine mld_caggrmat_biz_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr
        & naggr, nzl,naggrm1,naggrp1, i, j, k, jd, icolF, nrw, err_act
   integer(psb_ipk_) ::ictxt, np, me
   character(len=20) :: name
-  type(psb_cspmat_type) :: am3, am4
+  type(psb_cspmat_type) :: am3, am4,tmp_prol
   type(psb_c_coo_sparse_mat) :: tmpcoo
   type(psb_c_csr_sparse_mat) :: acsr1, acsr2, acsr3, acsrf, ptilde
   complex(psb_spk_), allocatable :: adiag(:)
@@ -321,29 +324,29 @@ subroutine mld_caggrmat_biz_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr
   call acsr1%set_dupl(psb_dupl_add_)
 
   call op_prol%mv_from(acsr1)
-
-  call psb_rwextd(ncol,op_prol,info)
+  call op_prol%clone(tmp_prol,info)
+  call psb_rwextd(ncol,tmp_prol,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_internal_error_,name,a_err='Halo of op_prol')
     goto 9999
   end if
 
-  call psb_symbmm(a,op_prol,am3,info)
+  call psb_symbmm(a,tmp_prol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,a_err='symbmm 2')
     goto 9999
   end if
 
-  call psb_numbmm(a,op_prol,am3)
+  call psb_numbmm(a,tmp_prol,am3)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'Done NUMBMM 2',parms%aggr_kind, mld_smooth_prol_
 
-  call op_prol%transp(op_restr)
+  call tmp_prol%transp(op_restr)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'starting sphalo/ rwxtd'
-
+  call tmp_prol%free()
   call psb_rwextd(ncol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3')
diff --git a/mlprec/impl/mld_caggrmat_minnrg_asb.f90 b/mlprec/impl/mld_caggrmat_minnrg_asb.f90
index 58e45cd6..09765e26 100644
--- a/mlprec/impl/mld_caggrmat_minnrg_asb.f90
+++ b/mlprec/impl/mld_caggrmat_minnrg_asb.f90
@@ -60,22 +60,19 @@
 !  radius of D^(-1)A, to be used in the computation of omega, is provided, 
 !  according to the value of p%parms%aggr_omega_alg, specified by the user
 !  through mld_cprecinit and mld_cprecset.
+!  4. Minimum energy aggregation: ADD REFERENCE.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_c_lev_aggrmat_asb.
 !
-!  This routine can also build A_C according to a "bizarre" aggregation algorithm,
-!  using a "naive" prolongator proposed by the authors of MLD2P4. However, this
-!  prolongator still requires a deep analysis and testing and its use is not
-!  recommended.
+!  For more details see
+!    M. Brezina and P. Vanek, A black-box iterative solver based on a two-level
+!    Schwarz method, Computing,  63 (1999), 233-263.
+!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of PSBLAS-based
+!    parallel two-level Schwarz preconditioners, Appl. Num. Math., 57 (2007),
+!    1181-1196.
 !
-!  The coarse-level matrix A_C is distributed among the parallel processes or
-!  replicated on each of them, according to the value of p%parms%coarse_mat,
-!  specified by the user through mld_cprecinit and mld_zprecset.
 !
-!  For more details see
-!    M. Brezina and P. Vanek, A black-box iterative solver based on a 
-!    two-level Schwarz method, Computing,  63 (1999), 233-263.
-!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of
-!    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
-!    57 (2007), 1181-1196.
 !
 ! Arguments:
 !    a          -  type(psb_cspmat_type), input.     
@@ -85,19 +82,33 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_c_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_sml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_cspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_cspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_cspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
+!
 subroutine mld_caggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr,info)
   use psb_base_mod
   use mld_c_inner_mod, mld_protect_name => mld_caggrmat_minnrg_asb
@@ -121,7 +132,7 @@ subroutine mld_caggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
   character(len=20)            :: name
   type(psb_cspmat_type)      :: af, ptilde, rtilde, atran, atp, atdatp
   type(psb_cspmat_type)      :: am3,am4, ap, adap,atmp,rada, ra, atmp2, dap, dadap, da
-  type(psb_cspmat_type)      :: dat, datp, datdatp, atmp3
+  type(psb_cspmat_type)      :: dat, datp, datdatp, atmp3, tmp_prol
   type(psb_c_coo_sparse_mat) :: tmpcoo
   type(psb_c_csr_sparse_mat) :: acsr1, acsr2, acsr3, acsr, acsrf
   type(psb_c_csc_sparse_mat) :: csc_dap, csc_dadap, csc_datp, csc_datdatp, acsc
@@ -484,9 +495,10 @@ subroutine mld_caggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
   ! Now we have to gather the halo of op_prol, and add it to itself
   ! to multiply it by A,
   !
-  call psb_sphalo(op_prol,desc_a,am4,info,&
+  call op_prol%clone(tmp_prol,info) 
+  if (info == psb_success_) call psb_sphalo(tmp_prol,desc_a,am4,info,&
        & colcnv=.false.,rowscale=.true.)
-  if (info == psb_success_) call psb_rwextd(ncol,op_prol,info,b=am4)      
+  if (info == psb_success_) call psb_rwextd(ncol,tmp_prol,info,b=am4)      
   if (info == psb_success_) call am4%free()
 
   if(info /= psb_success_) then
@@ -519,13 +531,13 @@ subroutine mld_caggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
        & write(debug_unit,*) me,' ',trim(name),&
        & 'starting sphalo/ rwxtd'
 
-  call psb_symbmm(a,op_prol,am3,info)
+  call psb_symbmm(a,tmp_prol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,&
          & a_err='symbmm 2')
     goto 9999
   end if
-  call psb_numbmm(a,op_prol,am3)
+  call psb_numbmm(a,tmp_prol,am3)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'Done NUMBMM 2'
diff --git a/mlprec/impl/mld_caggrmat_nosmth_asb.f90 b/mlprec/impl/mld_caggrmat_nosmth_asb.f90
index b47c1dd7..67046a69 100644
--- a/mlprec/impl/mld_caggrmat_nosmth_asb.f90
+++ b/mlprec/impl/mld_caggrmat_nosmth_asb.f90
@@ -52,6 +52,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat
 !  specified by the user through mld_cprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_c_lev_aggrmat_asb.
 !
 !  For details see
 !    P. D'Ambra, D. di Serafino and  S. Filippone, On the development of
@@ -59,7 +62,6 @@
 !    57 (2007), 1181-1196.
 !
 !
-!
 ! Arguments:
 !    a          -  type(psb_cspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
@@ -68,19 +70,33 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_c_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_sml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_cspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_cspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_cspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
+!
 subroutine mld_caggrmat_nosmth_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr,info)
   use psb_base_mod
   use mld_c_inner_mod, mld_protect_name => mld_caggrmat_nosmth_asb
diff --git a/mlprec/impl/mld_caggrmat_smth_asb.f90 b/mlprec/impl/mld_caggrmat_smth_asb.f90
index ed72e9ee..5b2b0d49 100644
--- a/mlprec/impl/mld_caggrmat_smth_asb.f90
+++ b/mlprec/impl/mld_caggrmat_smth_asb.f90
@@ -64,6 +64,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat,
 !  specified by the user through mld_cprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_c_lev_aggrmat_asb.
 !
 !  For more details see
 !    M. Brezina and P. Vanek, A black-box iterative solver based on a 
@@ -72,6 +75,7 @@
 !    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
 !    57 (2007), 1181-1196.
 !
+!
 ! Arguments:
 !    a          -  type(psb_cspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
@@ -80,16 +84,29 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_c_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_sml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_cspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_cspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_cspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_cmlprec_bld.f90 b/mlprec/impl/mld_cmlprec_bld.f90
index ba53ea68..9920b9e4 100644
--- a/mlprec/impl/mld_cmlprec_bld.f90
+++ b/mlprec/impl/mld_cmlprec_bld.f90
@@ -49,6 +49,9 @@
 !  (for more details see the description of mld_Tonelev_type in mld_prec_type.f90).
 !  The levels are numbered in increasing order starting from the finest one, i.e.
 !  level 1 is the finest level. No transfer operators are associated to level 1.
+!
+!  This routine simply calls mld_c_hierarchy_bld and mld_c_smoothers_bld; they
+!  can also be called explicitly from the user. 
 ! 
 !
 ! Arguments:
diff --git a/mlprec/impl/mld_d_dec_map_bld.f90 b/mlprec/impl/mld_d_dec_map_bld.f90
index ecbf5a8b..c0d47b3c 100644
--- a/mlprec/impl/mld_d_dec_map_bld.f90
+++ b/mlprec/impl/mld_d_dec_map_bld.f90
@@ -36,7 +36,38 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-
+!
+! File: mld_d_dec_map__bld.f90
+!
+! Subroutine: mld_d_dec_map_bld
+! Version:    real
+!
+!  This routine builds the tentative prolongator based on the
+!  decoupled aggregation algorithm presented in
+!
+!    M. Brezina and P. Vanek, A black-box iterative solver based on a 
+!    two-level Schwarz method, Computing,  63 (1999), 233-263.
+!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of
+!    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
+!    57 (2007), 1181-1196.
+!
+! Note: upon exit 
+!
+! Arguments:
+!    a       -  type(psb_dspmat_type).
+!               The sparse matrix structure containing the local part of the
+!               matrix to be preconditioned.
+!    desc_a  -  type(psb_desc_type), input.
+!               The communication descriptor of a.
+!    p       -  type(mld_dprec_type), input/output.
+!               The preconditioner data structure; upon exit it contains 
+!               the multilevel hierarchy of prolongators, restrictors
+!               and coarse matrices.
+!    info    -  integer, output.
+!               Error code.              
+!
+!
+!
 subroutine mld_d_dec_map_bld(iorder,theta,a,desc_a,nlaggr,ilaggr,info)
 
   use psb_base_mod
diff --git a/mlprec/impl/mld_d_hierarchy_bld.f90 b/mlprec/impl/mld_d_hierarchy_bld.f90
index c6061345..5afd73ba 100644
--- a/mlprec/impl/mld_d_hierarchy_bld.f90
+++ b/mlprec/impl/mld_d_hierarchy_bld.f90
@@ -58,8 +58,9 @@
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
 !    p       -  type(mld_dprec_type), input/output.
-!               The preconditioner data structure containing the local part
-!               of the preconditioner to be built.
+!               The preconditioner data structure; upon exit it contains 
+!               the multilevel hierarchy of prolongators, restrictors
+!               and coarse matrices.
 !    info    -  integer, output.
 !               Error code.              
 !  
diff --git a/mlprec/impl/mld_d_lev_aggrmap_bld.f90 b/mlprec/impl/mld_d_lev_aggrmap_bld.f90
index eec9a32c..9454a738 100644
--- a/mlprec/impl/mld_d_lev_aggrmap_bld.f90
+++ b/mlprec/impl/mld_d_lev_aggrmap_bld.f90
@@ -36,32 +36,40 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-! File: mld_dcoarse_bld.f90
+! File: mld_d_lev_aggrmap_bld.f90
 !
-! Subroutine: mld_dcoarse_bld
+! Subroutine: mld_d_lev_aggrmap_bld
 ! Version:    real
 !
-!  This routine builds the matrix associated to the current level of the
-!  multilevel preconditioner from the matrix associated to the previous level,
-!  by using a smoothed aggregation technique (therefore, it also builds the
-!  prolongation and restriction operators mapping the current level to the
-!  previous one and vice versa). Then the routine builds the base preconditioner
-!  at the current level.
-!  The current level is regarded as the coarse one, while the previous as
-!  the fine one. This is in agreement with the fact that the routine is called,
-!  by mld_mlprec_bld, only on levels >=2.
+!  This routine is just an interface to aggrmap_bld where the real work is performed. 
+!  It takes care of some consistency checking though.
+!
+!  See mld_daggrmap_bld for constraints on input/oput arguments. 
 !
 ! 
 ! Arguments:
+!    p       -  type(mld_d_onelev_type), input/output.
+!               The 'one-level' data structure containing the control
+!               parameters and (eventually) coarse matrix and prolongator/restrictors. 
+!               
 !    a       -  type(psb_dspmat_type).
 !               The sparse matrix structure containing the local part of the
 !               fine-level matrix.
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
-!    p       -  type(mld_d_onelev_type), input/output.
-!               The 'one-level' data structure containing the local part
-!               of the base preconditioner to be built as well as
-!               information concerning the prolongator and its transpose.
+!    ilaggr     -  integer, dimension(:), allocatable, output
+!                  The mapping between the row indices of the coarse-level
+!                  matrix and the row indices of the fine-level matrix.
+!                  ilaggr(i)=j means that node i in the adjacency graph
+!                  of the fine-level matrix is mapped onto node j in the
+!                  adjacency graph of the coarse-level matrix. Note that on exit the indices
+!                  will be shifted so as to make sure the ranges on the various processes do not
+!                  overlap.
+!    nlaggr     -  integer, dimension(:), allocatable, output
+!                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_dspmat_type), output
+!               The tentative prolongator, based on ilaggr.
+!               
 !    info    -  integer, output.
 !               Error code.         
 !  
diff --git a/mlprec/impl/mld_d_lev_aggrmat_asb.f90 b/mlprec/impl/mld_d_lev_aggrmat_asb.f90
index ca47cc2e..ba619228 100644
--- a/mlprec/impl/mld_d_lev_aggrmat_asb.f90
+++ b/mlprec/impl/mld_d_lev_aggrmat_asb.f90
@@ -36,32 +36,51 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-! File: mld_dcoarse_bld.f90
+! File: mld_d_lev_aggrmat_asb.f90
 !
-! Subroutine: mld_dcoarse_bld
+! Subroutine: mld_d_lev_aggrmat_asb
 ! Version:    real
 !
 !  This routine builds the matrix associated to the current level of the
 !  multilevel preconditioner from the matrix associated to the previous level,
-!  by using a smoothed aggregation technique (therefore, it also builds the
+!  by using the user-specified aggregation technique (therefore, it also builds the
 !  prolongation and restriction operators mapping the current level to the
-!  previous one and vice versa). Then the routine builds the base preconditioner
-!  at the current level.
+!  previous one and vice versa). 
 !  The current level is regarded as the coarse one, while the previous as
 !  the fine one. This is in agreement with the fact that the routine is called,
 !  by mld_mlprec_bld, only on levels >=2.
+!  The main structure is:
+!  1. Perform sanity checks;
+!  2. Call mld_Xaggrmat_asb to compute prolongator/restrictor/AC
+!  3. According to the choice of DIST/REPL for AC, build a descriptor DESC_AC,
+!     and adjust the column numbering of AC/OP_PROL/OP_RESTR
+!  4. Pack restrictor and prolongator into p%map
+!  5. Fix base_a and base_desc pointers.
 !
 ! 
 ! Arguments:
+!    p       -  type(mld_d_onelev_type), input/output.
+!               The 'one-level' data structure containing the control
+!               parameters and (eventually) coarse matrix and prolongator/restrictors. 
+!               
 !    a       -  type(psb_dspmat_type).
 !               The sparse matrix structure containing the local part of the
 !               fine-level matrix.
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
-!    p       -  type(mld_d_onelev_type), input/output.
-!               The 'one-level' data structure containing the local part
-!               of the base preconditioner to be built as well as
-!               information concerning the prolongator and its transpose.
+!    ilaggr     -  integer, dimension(:), input
+!                  The mapping between the row indices of the coarse-level
+!                  matrix and the row indices of the fine-level matrix.
+!                  ilaggr(i)=j means that node i in the adjacency graph
+!                  of the fine-level matrix is mapped onto node j in the
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
+!                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_dspmat_type), input/output
+!               The tentative prolongator on input, released on output. 
+!               
 !    info    -  integer, output.
 !               Error code.         
 !  
diff --git a/mlprec/impl/mld_d_smoothers_bld.f90 b/mlprec/impl/mld_d_smoothers_bld.f90
index f0e3ad2d..e01def14 100644
--- a/mlprec/impl/mld_d_smoothers_bld.f90
+++ b/mlprec/impl/mld_d_smoothers_bld.f90
@@ -41,14 +41,18 @@
 ! Subroutine: mld_d_smoothers_bld
 ! Version:    real
 !
-!  This routine builds the preconditioner according to the requirements made by
-!  the user trough the subroutines mld_precinit and mld_precset.
+!  This routine performs the final phase of the multilevel preconditioner build process:
+!  builds the "smoother" objects at each level, based on the matrix hierarchy prepared
+!  by mld_d_hierarchy_bld.
 !  
 !  A multilevel preconditioner is regarded as an array of 'one-level' data structures,
 !  each containing the part of the preconditioner associated to a certain level,
 !  (for more details see the description of mld_Tonelev_type in mld_prec_type.f90).
 !  The levels are numbered in increasing order starting from the finest one, i.e.
 !  level 1 is the finest level. No transfer operators are associated to level 1.
+!  Each level provides a "build" method; for the base type, the "one-level"
+!  build procedure simply invokes the build method of the first smoother object,
+!  and also on the second object if allocated. 
 ! 
 !
 ! Arguments:
diff --git a/mlprec/impl/mld_daggrmap_bld.f90 b/mlprec/impl/mld_daggrmap_bld.f90
index f8e6d7cc..02472e4b 100644
--- a/mlprec/impl/mld_daggrmap_bld.f90
+++ b/mlprec/impl/mld_daggrmap_bld.f90
@@ -73,9 +73,14 @@
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
+!                  adjacency graph of the coarse-level matrix. Note that on exit the indices
+!                  will be shifted so as to make sure the ranges on the various processes do not
+!                  overlap.
 !    nlaggr     -  integer, dimension(:), allocatable.
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_dspmat_type).
+!               The tentative prolongator, based on ilaggr.
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_daggrmat_asb.f90 b/mlprec/impl/mld_daggrmat_asb.f90
index 5ee9b0e4..3eb8eeff 100644
--- a/mlprec/impl/mld_daggrmat_asb.f90
+++ b/mlprec/impl/mld_daggrmat_asb.f90
@@ -53,12 +53,16 @@
 !  The prolongator P_C is built here from this mapping, according to the
 !  value of p%iprcparm(mld_aggr_kind_), specified by the user through
 !  mld_dprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_d_lev_aggrmat_asb.
 !
-!  Currently three different prolongators are implemented, corresponding to
-!  three aggregation algorithms:
-!  1. non-smoothed aggregation,
+!  Currently four  different prolongators are implemented, corresponding to
+!  four  aggregation algorithms:
+!  1. un-smoothed aggregation,
 !  2. smoothed aggregation,
 !  3. "bizarre" aggregation.
+!  4. minimum energy 
 !  1. The non-smoothed aggregation uses as prolongator the piecewise constant
 !     interpolation operator corresponding to the fine-to-coarse level mapping built
 !     by mld_aggrmap_bld. This is called tentative prolongator.
@@ -67,6 +71,7 @@
 !  3. The "bizarre" aggregation uses a prolongator proposed by the authors of MLD2P4.
 !     This prolongator still requires a deep analysis and testing and its use is
 !     not recommended.
+!  4. Minimum energy aggregation: ADD REFERENCE.
 !
 !  For more details see
 !    M. Brezina and P. Vanek, A black-box iterative solver based on a two-level
@@ -78,23 +83,36 @@
 !
 !
 ! Arguments:
-!    a          -  type(psb_cspmat_type), input.     
+!    a          -  type(psb_dspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
 !                  the fine-level matrix.
 !    desc_a     -  type(psb_desc_type), input.
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_d_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_dml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_dspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_dspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_dspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_daggrmat_biz_asb.f90 b/mlprec/impl/mld_daggrmat_biz_asb.f90
index f65f16eb..7eafe44a 100644
--- a/mlprec/impl/mld_daggrmat_biz_asb.f90
+++ b/mlprec/impl/mld_daggrmat_biz_asb.f90
@@ -56,6 +56,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat,
 !  specified by the user through mld_dprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_d_lev_aggrmat_asb.
 !
 ! Arguments:
 !    a          -  type(psb_dspmat_type), input.     
@@ -98,7 +101,7 @@ subroutine mld_daggrmat_biz_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr
        & naggr, nzl,naggrm1,naggrp1, i, j, k, jd, icolF, nrw, err_act
   integer(psb_ipk_) ::ictxt, np, me
   character(len=20) :: name
-  type(psb_dspmat_type) :: am3, am4
+  type(psb_dspmat_type) :: am3, am4,tmp_prol
   type(psb_d_coo_sparse_mat) :: tmpcoo
   type(psb_d_csr_sparse_mat) :: acsr1, acsr2, acsr3, acsrf, ptilde
   real(psb_dpk_), allocatable :: adiag(:)
@@ -321,29 +324,29 @@ subroutine mld_daggrmat_biz_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr
   call acsr1%set_dupl(psb_dupl_add_)
 
   call op_prol%mv_from(acsr1)
-
-  call psb_rwextd(ncol,op_prol,info)
+  call op_prol%clone(tmp_prol,info)
+  call psb_rwextd(ncol,tmp_prol,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_internal_error_,name,a_err='Halo of op_prol')
     goto 9999
   end if
 
-  call psb_symbmm(a,op_prol,am3,info)
+  call psb_symbmm(a,tmp_prol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,a_err='symbmm 2')
     goto 9999
   end if
 
-  call psb_numbmm(a,op_prol,am3)
+  call psb_numbmm(a,tmp_prol,am3)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'Done NUMBMM 2',parms%aggr_kind, mld_smooth_prol_
 
-  call op_prol%transp(op_restr)
+  call tmp_prol%transp(op_restr)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'starting sphalo/ rwxtd'
-
+  call tmp_prol%free()
   call psb_rwextd(ncol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3')
diff --git a/mlprec/impl/mld_daggrmat_minnrg_asb.f90 b/mlprec/impl/mld_daggrmat_minnrg_asb.f90
index f54912d3..efb1cf95 100644
--- a/mlprec/impl/mld_daggrmat_minnrg_asb.f90
+++ b/mlprec/impl/mld_daggrmat_minnrg_asb.f90
@@ -60,22 +60,19 @@
 !  radius of D^(-1)A, to be used in the computation of omega, is provided, 
 !  according to the value of p%parms%aggr_omega_alg, specified by the user
 !  through mld_dprecinit and mld_dprecset.
+!  4. Minimum energy aggregation: ADD REFERENCE.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_d_lev_aggrmat_asb.
 !
-!  This routine can also build A_C according to a "bizarre" aggregation algorithm,
-!  using a "naive" prolongator proposed by the authors of MLD2P4. However, this
-!  prolongator still requires a deep analysis and testing and its use is not
-!  recommended.
+!  For more details see
+!    M. Brezina and P. Vanek, A black-box iterative solver based on a two-level
+!    Schwarz method, Computing,  63 (1999), 233-263.
+!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of PSBLAS-based
+!    parallel two-level Schwarz preconditioners, Appl. Num. Math., 57 (2007),
+!    1181-1196.
 !
-!  The coarse-level matrix A_C is distributed among the parallel processes or
-!  replicated on each of them, according to the value of p%parms%coarse_mat,
-!  specified by the user through mld_dprecinit and mld_zprecset.
 !
-!  For more details see
-!    M. Brezina and P. Vanek, A black-box iterative solver based on a 
-!    two-level Schwarz method, Computing,  63 (1999), 233-263.
-!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of
-!    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
-!    57 (2007), 1181-1196.
 !
 ! Arguments:
 !    a          -  type(psb_dspmat_type), input.     
@@ -85,19 +82,33 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_d_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_dml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_dspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_dspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_dspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
+!
 subroutine mld_daggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr,info)
   use psb_base_mod
   use mld_d_inner_mod, mld_protect_name => mld_daggrmat_minnrg_asb
@@ -121,7 +132,7 @@ subroutine mld_daggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
   character(len=20)            :: name
   type(psb_dspmat_type)      :: af, ptilde, rtilde, atran, atp, atdatp
   type(psb_dspmat_type)      :: am3,am4, ap, adap,atmp,rada, ra, atmp2, dap, dadap, da
-  type(psb_dspmat_type)      :: dat, datp, datdatp, atmp3
+  type(psb_dspmat_type)      :: dat, datp, datdatp, atmp3, tmp_prol
   type(psb_d_coo_sparse_mat) :: tmpcoo
   type(psb_d_csr_sparse_mat) :: acsr1, acsr2, acsr3, acsr, acsrf
   type(psb_d_csc_sparse_mat) :: csc_dap, csc_dadap, csc_datp, csc_datdatp, acsc
@@ -484,9 +495,10 @@ subroutine mld_daggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
   ! Now we have to gather the halo of op_prol, and add it to itself
   ! to multiply it by A,
   !
-  call psb_sphalo(op_prol,desc_a,am4,info,&
+  call op_prol%clone(tmp_prol,info) 
+  if (info == psb_success_) call psb_sphalo(tmp_prol,desc_a,am4,info,&
        & colcnv=.false.,rowscale=.true.)
-  if (info == psb_success_) call psb_rwextd(ncol,op_prol,info,b=am4)      
+  if (info == psb_success_) call psb_rwextd(ncol,tmp_prol,info,b=am4)      
   if (info == psb_success_) call am4%free()
 
   if(info /= psb_success_) then
@@ -519,13 +531,13 @@ subroutine mld_daggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
        & write(debug_unit,*) me,' ',trim(name),&
        & 'starting sphalo/ rwxtd'
 
-  call psb_symbmm(a,op_prol,am3,info)
+  call psb_symbmm(a,tmp_prol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,&
          & a_err='symbmm 2')
     goto 9999
   end if
-  call psb_numbmm(a,op_prol,am3)
+  call psb_numbmm(a,tmp_prol,am3)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'Done NUMBMM 2'
diff --git a/mlprec/impl/mld_daggrmat_nosmth_asb.f90 b/mlprec/impl/mld_daggrmat_nosmth_asb.f90
index d4788037..5db27b45 100644
--- a/mlprec/impl/mld_daggrmat_nosmth_asb.f90
+++ b/mlprec/impl/mld_daggrmat_nosmth_asb.f90
@@ -52,6 +52,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat
 !  specified by the user through mld_dprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_d_lev_aggrmat_asb.
 !
 !  For details see
 !    P. D'Ambra, D. di Serafino and  S. Filippone, On the development of
@@ -59,7 +62,6 @@
 !    57 (2007), 1181-1196.
 !
 !
-!
 ! Arguments:
 !    a          -  type(psb_dspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
@@ -68,19 +70,33 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_d_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_dml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_dspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_dspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_dspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
+!
 subroutine mld_daggrmat_nosmth_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr,info)
   use psb_base_mod
   use mld_d_inner_mod, mld_protect_name => mld_daggrmat_nosmth_asb
diff --git a/mlprec/impl/mld_daggrmat_smth_asb.f90 b/mlprec/impl/mld_daggrmat_smth_asb.f90
index 390f5b71..02535b67 100644
--- a/mlprec/impl/mld_daggrmat_smth_asb.f90
+++ b/mlprec/impl/mld_daggrmat_smth_asb.f90
@@ -64,6 +64,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat,
 !  specified by the user through mld_dprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_d_lev_aggrmat_asb.
 !
 !  For more details see
 !    M. Brezina and P. Vanek, A black-box iterative solver based on a 
@@ -72,6 +75,7 @@
 !    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
 !    57 (2007), 1181-1196.
 !
+!
 ! Arguments:
 !    a          -  type(psb_dspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
@@ -80,16 +84,29 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_d_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_dml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_dspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_dspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_dspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_dmlprec_bld.f90 b/mlprec/impl/mld_dmlprec_bld.f90
index 9fbe1c70..380df97d 100644
--- a/mlprec/impl/mld_dmlprec_bld.f90
+++ b/mlprec/impl/mld_dmlprec_bld.f90
@@ -49,6 +49,9 @@
 !  (for more details see the description of mld_Tonelev_type in mld_prec_type.f90).
 !  The levels are numbered in increasing order starting from the finest one, i.e.
 !  level 1 is the finest level. No transfer operators are associated to level 1.
+!
+!  This routine simply calls mld_d_hierarchy_bld and mld_d_smoothers_bld; they
+!  can also be called explicitly from the user. 
 ! 
 !
 ! Arguments:
diff --git a/mlprec/impl/mld_s_dec_map_bld.f90 b/mlprec/impl/mld_s_dec_map_bld.f90
index afd107fa..9c11da2a 100644
--- a/mlprec/impl/mld_s_dec_map_bld.f90
+++ b/mlprec/impl/mld_s_dec_map_bld.f90
@@ -36,7 +36,38 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-
+!
+! File: mld_s_dec_map__bld.f90
+!
+! Subroutine: mld_s_dec_map_bld
+! Version:    real
+!
+!  This routine builds the tentative prolongator based on the
+!  decoupled aggregation algorithm presented in
+!
+!    M. Brezina and P. Vanek, A black-box iterative solver based on a 
+!    two-level Schwarz method, Computing,  63 (1999), 233-263.
+!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of
+!    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
+!    57 (2007), 1181-1196.
+!
+! Note: upon exit 
+!
+! Arguments:
+!    a       -  type(psb_sspmat_type).
+!               The sparse matrix structure containing the local part of the
+!               matrix to be preconditioned.
+!    desc_a  -  type(psb_desc_type), input.
+!               The communication descriptor of a.
+!    p       -  type(mld_sprec_type), input/output.
+!               The preconditioner data structure; upon exit it contains 
+!               the multilevel hierarchy of prolongators, restrictors
+!               and coarse matrices.
+!    info    -  integer, output.
+!               Error code.              
+!
+!
+!
 subroutine mld_s_dec_map_bld(iorder,theta,a,desc_a,nlaggr,ilaggr,info)
 
   use psb_base_mod
diff --git a/mlprec/impl/mld_s_hierarchy_bld.f90 b/mlprec/impl/mld_s_hierarchy_bld.f90
index b6c1f269..1a30dfee 100644
--- a/mlprec/impl/mld_s_hierarchy_bld.f90
+++ b/mlprec/impl/mld_s_hierarchy_bld.f90
@@ -58,8 +58,9 @@
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
 !    p       -  type(mld_sprec_type), input/output.
-!               The preconditioner data structure containing the local part
-!               of the preconditioner to be built.
+!               The preconditioner data structure; upon exit it contains 
+!               the multilevel hierarchy of prolongators, restrictors
+!               and coarse matrices.
 !    info    -  integer, output.
 !               Error code.              
 !  
diff --git a/mlprec/impl/mld_s_lev_aggrmap_bld.f90 b/mlprec/impl/mld_s_lev_aggrmap_bld.f90
index fa4b78aa..21b067bd 100644
--- a/mlprec/impl/mld_s_lev_aggrmap_bld.f90
+++ b/mlprec/impl/mld_s_lev_aggrmap_bld.f90
@@ -36,32 +36,40 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-! File: mld_scoarse_bld.f90
+! File: mld_s_lev_aggrmap_bld.f90
 !
-! Subroutine: mld_scoarse_bld
+! Subroutine: mld_s_lev_aggrmap_bld
 ! Version:    real
 !
-!  This routine builds the matrix associated to the current level of the
-!  multilevel preconditioner from the matrix associated to the previous level,
-!  by using a smoothed aggregation technique (therefore, it also builds the
-!  prolongation and restriction operators mapping the current level to the
-!  previous one and vice versa). Then the routine builds the base preconditioner
-!  at the current level.
-!  The current level is regarded as the coarse one, while the previous as
-!  the fine one. This is in agreement with the fact that the routine is called,
-!  by mld_mlprec_bld, only on levels >=2.
+!  This routine is just an interface to aggrmap_bld where the real work is performed. 
+!  It takes care of some consistency checking though.
+!
+!  See mld_saggrmap_bld for constraints on input/oput arguments. 
 !
 ! 
 ! Arguments:
+!    p       -  type(mld_s_onelev_type), input/output.
+!               The 'one-level' data structure containing the control
+!               parameters and (eventually) coarse matrix and prolongator/restrictors. 
+!               
 !    a       -  type(psb_sspmat_type).
 !               The sparse matrix structure containing the local part of the
 !               fine-level matrix.
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
-!    p       -  type(mld_s_onelev_type), input/output.
-!               The 'one-level' data structure containing the local part
-!               of the base preconditioner to be built as well as
-!               information concerning the prolongator and its transpose.
+!    ilaggr     -  integer, dimension(:), allocatable, output
+!                  The mapping between the row indices of the coarse-level
+!                  matrix and the row indices of the fine-level matrix.
+!                  ilaggr(i)=j means that node i in the adjacency graph
+!                  of the fine-level matrix is mapped onto node j in the
+!                  adjacency graph of the coarse-level matrix. Note that on exit the indices
+!                  will be shifted so as to make sure the ranges on the various processes do not
+!                  overlap.
+!    nlaggr     -  integer, dimension(:), allocatable, output
+!                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_sspmat_type), output
+!               The tentative prolongator, based on ilaggr.
+!               
 !    info    -  integer, output.
 !               Error code.         
 !  
diff --git a/mlprec/impl/mld_s_lev_aggrmat_asb.f90 b/mlprec/impl/mld_s_lev_aggrmat_asb.f90
index 0d746a94..63f7a2ae 100644
--- a/mlprec/impl/mld_s_lev_aggrmat_asb.f90
+++ b/mlprec/impl/mld_s_lev_aggrmat_asb.f90
@@ -36,32 +36,51 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-! File: mld_scoarse_bld.f90
+! File: mld_s_lev_aggrmat_asb.f90
 !
-! Subroutine: mld_scoarse_bld
+! Subroutine: mld_s_lev_aggrmat_asb
 ! Version:    real
 !
 !  This routine builds the matrix associated to the current level of the
 !  multilevel preconditioner from the matrix associated to the previous level,
-!  by using a smoothed aggregation technique (therefore, it also builds the
+!  by using the user-specified aggregation technique (therefore, it also builds the
 !  prolongation and restriction operators mapping the current level to the
-!  previous one and vice versa). Then the routine builds the base preconditioner
-!  at the current level.
+!  previous one and vice versa). 
 !  The current level is regarded as the coarse one, while the previous as
 !  the fine one. This is in agreement with the fact that the routine is called,
 !  by mld_mlprec_bld, only on levels >=2.
+!  The main structure is:
+!  1. Perform sanity checks;
+!  2. Call mld_Xaggrmat_asb to compute prolongator/restrictor/AC
+!  3. According to the choice of DIST/REPL for AC, build a descriptor DESC_AC,
+!     and adjust the column numbering of AC/OP_PROL/OP_RESTR
+!  4. Pack restrictor and prolongator into p%map
+!  5. Fix base_a and base_desc pointers.
 !
 ! 
 ! Arguments:
+!    p       -  type(mld_s_onelev_type), input/output.
+!               The 'one-level' data structure containing the control
+!               parameters and (eventually) coarse matrix and prolongator/restrictors. 
+!               
 !    a       -  type(psb_sspmat_type).
 !               The sparse matrix structure containing the local part of the
 !               fine-level matrix.
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
-!    p       -  type(mld_s_onelev_type), input/output.
-!               The 'one-level' data structure containing the local part
-!               of the base preconditioner to be built as well as
-!               information concerning the prolongator and its transpose.
+!    ilaggr     -  integer, dimension(:), input
+!                  The mapping between the row indices of the coarse-level
+!                  matrix and the row indices of the fine-level matrix.
+!                  ilaggr(i)=j means that node i in the adjacency graph
+!                  of the fine-level matrix is mapped onto node j in the
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
+!                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_sspmat_type), input/output
+!               The tentative prolongator on input, released on output. 
+!               
 !    info    -  integer, output.
 !               Error code.         
 !  
diff --git a/mlprec/impl/mld_s_smoothers_bld.f90 b/mlprec/impl/mld_s_smoothers_bld.f90
index 4561a5f7..1d1bc68e 100644
--- a/mlprec/impl/mld_s_smoothers_bld.f90
+++ b/mlprec/impl/mld_s_smoothers_bld.f90
@@ -41,14 +41,18 @@
 ! Subroutine: mld_s_smoothers_bld
 ! Version:    real
 !
-!  This routine builds the preconditioner according to the requirements made by
-!  the user trough the subroutines mld_precinit and mld_precset.
+!  This routine performs the final phase of the multilevel preconditioner build process:
+!  builds the "smoother" objects at each level, based on the matrix hierarchy prepared
+!  by mld_s_hierarchy_bld.
 !  
 !  A multilevel preconditioner is regarded as an array of 'one-level' data structures,
 !  each containing the part of the preconditioner associated to a certain level,
 !  (for more details see the description of mld_Tonelev_type in mld_prec_type.f90).
 !  The levels are numbered in increasing order starting from the finest one, i.e.
 !  level 1 is the finest level. No transfer operators are associated to level 1.
+!  Each level provides a "build" method; for the base type, the "one-level"
+!  build procedure simply invokes the build method of the first smoother object,
+!  and also on the second object if allocated. 
 ! 
 !
 ! Arguments:
diff --git a/mlprec/impl/mld_saggrmap_bld.f90 b/mlprec/impl/mld_saggrmap_bld.f90
index 5dfd1af5..ee3f7c48 100644
--- a/mlprec/impl/mld_saggrmap_bld.f90
+++ b/mlprec/impl/mld_saggrmap_bld.f90
@@ -73,9 +73,14 @@
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
+!                  adjacency graph of the coarse-level matrix. Note that on exit the indices
+!                  will be shifted so as to make sure the ranges on the various processes do not
+!                  overlap.
 !    nlaggr     -  integer, dimension(:), allocatable.
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_sspmat_type).
+!               The tentative prolongator, based on ilaggr.
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_saggrmat_asb.f90 b/mlprec/impl/mld_saggrmat_asb.f90
index e540c117..2d6ac23f 100644
--- a/mlprec/impl/mld_saggrmat_asb.f90
+++ b/mlprec/impl/mld_saggrmat_asb.f90
@@ -53,12 +53,16 @@
 !  The prolongator P_C is built here from this mapping, according to the
 !  value of p%iprcparm(mld_aggr_kind_), specified by the user through
 !  mld_sprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_s_lev_aggrmat_asb.
 !
-!  Currently three different prolongators are implemented, corresponding to
-!  three aggregation algorithms:
-!  1. non-smoothed aggregation,
+!  Currently four  different prolongators are implemented, corresponding to
+!  four  aggregation algorithms:
+!  1. un-smoothed aggregation,
 !  2. smoothed aggregation,
 !  3. "bizarre" aggregation.
+!  4. minimum energy 
 !  1. The non-smoothed aggregation uses as prolongator the piecewise constant
 !     interpolation operator corresponding to the fine-to-coarse level mapping built
 !     by mld_aggrmap_bld. This is called tentative prolongator.
@@ -67,6 +71,7 @@
 !  3. The "bizarre" aggregation uses a prolongator proposed by the authors of MLD2P4.
 !     This prolongator still requires a deep analysis and testing and its use is
 !     not recommended.
+!  4. Minimum energy aggregation: ADD REFERENCE.
 !
 !  For more details see
 !    M. Brezina and P. Vanek, A black-box iterative solver based on a two-level
@@ -78,23 +83,36 @@
 !
 !
 ! Arguments:
-!    a          -  type(psb_cspmat_type), input.     
+!    a          -  type(psb_sspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
 !                  the fine-level matrix.
 !    desc_a     -  type(psb_desc_type), input.
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_s_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_sml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_sspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_sspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_sspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_saggrmat_biz_asb.f90 b/mlprec/impl/mld_saggrmat_biz_asb.f90
index 946bb3eb..ef20aafb 100644
--- a/mlprec/impl/mld_saggrmat_biz_asb.f90
+++ b/mlprec/impl/mld_saggrmat_biz_asb.f90
@@ -56,6 +56,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat,
 !  specified by the user through mld_sprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_s_lev_aggrmat_asb.
 !
 ! Arguments:
 !    a          -  type(psb_sspmat_type), input.     
@@ -98,7 +101,7 @@ subroutine mld_saggrmat_biz_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr
        & naggr, nzl,naggrm1,naggrp1, i, j, k, jd, icolF, nrw, err_act
   integer(psb_ipk_) ::ictxt, np, me
   character(len=20) :: name
-  type(psb_sspmat_type) :: am3, am4
+  type(psb_sspmat_type) :: am3, am4,tmp_prol
   type(psb_s_coo_sparse_mat) :: tmpcoo
   type(psb_s_csr_sparse_mat) :: acsr1, acsr2, acsr3, acsrf, ptilde
   real(psb_spk_), allocatable :: adiag(:)
@@ -321,29 +324,29 @@ subroutine mld_saggrmat_biz_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr
   call acsr1%set_dupl(psb_dupl_add_)
 
   call op_prol%mv_from(acsr1)
-
-  call psb_rwextd(ncol,op_prol,info)
+  call op_prol%clone(tmp_prol,info)
+  call psb_rwextd(ncol,tmp_prol,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_internal_error_,name,a_err='Halo of op_prol')
     goto 9999
   end if
 
-  call psb_symbmm(a,op_prol,am3,info)
+  call psb_symbmm(a,tmp_prol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,a_err='symbmm 2')
     goto 9999
   end if
 
-  call psb_numbmm(a,op_prol,am3)
+  call psb_numbmm(a,tmp_prol,am3)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'Done NUMBMM 2',parms%aggr_kind, mld_smooth_prol_
 
-  call op_prol%transp(op_restr)
+  call tmp_prol%transp(op_restr)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'starting sphalo/ rwxtd'
-
+  call tmp_prol%free()
   call psb_rwextd(ncol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3')
diff --git a/mlprec/impl/mld_saggrmat_minnrg_asb.f90 b/mlprec/impl/mld_saggrmat_minnrg_asb.f90
index defca9c0..c7895d47 100644
--- a/mlprec/impl/mld_saggrmat_minnrg_asb.f90
+++ b/mlprec/impl/mld_saggrmat_minnrg_asb.f90
@@ -60,22 +60,19 @@
 !  radius of D^(-1)A, to be used in the computation of omega, is provided, 
 !  according to the value of p%parms%aggr_omega_alg, specified by the user
 !  through mld_sprecinit and mld_sprecset.
+!  4. Minimum energy aggregation: ADD REFERENCE.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_s_lev_aggrmat_asb.
 !
-!  This routine can also build A_C according to a "bizarre" aggregation algorithm,
-!  using a "naive" prolongator proposed by the authors of MLD2P4. However, this
-!  prolongator still requires a deep analysis and testing and its use is not
-!  recommended.
+!  For more details see
+!    M. Brezina and P. Vanek, A black-box iterative solver based on a two-level
+!    Schwarz method, Computing,  63 (1999), 233-263.
+!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of PSBLAS-based
+!    parallel two-level Schwarz preconditioners, Appl. Num. Math., 57 (2007),
+!    1181-1196.
 !
-!  The coarse-level matrix A_C is distributed among the parallel processes or
-!  replicated on each of them, according to the value of p%parms%coarse_mat,
-!  specified by the user through mld_sprecinit and mld_zprecset.
 !
-!  For more details see
-!    M. Brezina and P. Vanek, A black-box iterative solver based on a 
-!    two-level Schwarz method, Computing,  63 (1999), 233-263.
-!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of
-!    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
-!    57 (2007), 1181-1196.
 !
 ! Arguments:
 !    a          -  type(psb_sspmat_type), input.     
@@ -85,19 +82,33 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_s_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_sml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_sspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_sspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_sspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
+!
 subroutine mld_saggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr,info)
   use psb_base_mod
   use mld_s_inner_mod, mld_protect_name => mld_saggrmat_minnrg_asb
@@ -121,7 +132,7 @@ subroutine mld_saggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
   character(len=20)            :: name
   type(psb_sspmat_type)      :: af, ptilde, rtilde, atran, atp, atdatp
   type(psb_sspmat_type)      :: am3,am4, ap, adap,atmp,rada, ra, atmp2, dap, dadap, da
-  type(psb_sspmat_type)      :: dat, datp, datdatp, atmp3
+  type(psb_sspmat_type)      :: dat, datp, datdatp, atmp3, tmp_prol
   type(psb_s_coo_sparse_mat) :: tmpcoo
   type(psb_s_csr_sparse_mat) :: acsr1, acsr2, acsr3, acsr, acsrf
   type(psb_s_csc_sparse_mat) :: csc_dap, csc_dadap, csc_datp, csc_datdatp, acsc
@@ -484,9 +495,10 @@ subroutine mld_saggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
   ! Now we have to gather the halo of op_prol, and add it to itself
   ! to multiply it by A,
   !
-  call psb_sphalo(op_prol,desc_a,am4,info,&
+  call op_prol%clone(tmp_prol,info) 
+  if (info == psb_success_) call psb_sphalo(tmp_prol,desc_a,am4,info,&
        & colcnv=.false.,rowscale=.true.)
-  if (info == psb_success_) call psb_rwextd(ncol,op_prol,info,b=am4)      
+  if (info == psb_success_) call psb_rwextd(ncol,tmp_prol,info,b=am4)      
   if (info == psb_success_) call am4%free()
 
   if(info /= psb_success_) then
@@ -519,13 +531,13 @@ subroutine mld_saggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
        & write(debug_unit,*) me,' ',trim(name),&
        & 'starting sphalo/ rwxtd'
 
-  call psb_symbmm(a,op_prol,am3,info)
+  call psb_symbmm(a,tmp_prol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,&
          & a_err='symbmm 2')
     goto 9999
   end if
-  call psb_numbmm(a,op_prol,am3)
+  call psb_numbmm(a,tmp_prol,am3)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'Done NUMBMM 2'
diff --git a/mlprec/impl/mld_saggrmat_nosmth_asb.f90 b/mlprec/impl/mld_saggrmat_nosmth_asb.f90
index bbb8246f..2d25b906 100644
--- a/mlprec/impl/mld_saggrmat_nosmth_asb.f90
+++ b/mlprec/impl/mld_saggrmat_nosmth_asb.f90
@@ -52,6 +52,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat
 !  specified by the user through mld_sprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_s_lev_aggrmat_asb.
 !
 !  For details see
 !    P. D'Ambra, D. di Serafino and  S. Filippone, On the development of
@@ -59,7 +62,6 @@
 !    57 (2007), 1181-1196.
 !
 !
-!
 ! Arguments:
 !    a          -  type(psb_sspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
@@ -68,19 +70,33 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_s_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_sml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_sspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_sspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_sspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
+!
 subroutine mld_saggrmat_nosmth_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr,info)
   use psb_base_mod
   use mld_s_inner_mod, mld_protect_name => mld_saggrmat_nosmth_asb
diff --git a/mlprec/impl/mld_saggrmat_smth_asb.f90 b/mlprec/impl/mld_saggrmat_smth_asb.f90
index 4fc52ab2..cd625d29 100644
--- a/mlprec/impl/mld_saggrmat_smth_asb.f90
+++ b/mlprec/impl/mld_saggrmat_smth_asb.f90
@@ -64,6 +64,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat,
 !  specified by the user through mld_sprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_s_lev_aggrmat_asb.
 !
 !  For more details see
 !    M. Brezina and P. Vanek, A black-box iterative solver based on a 
@@ -72,6 +75,7 @@
 !    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
 !    57 (2007), 1181-1196.
 !
+!
 ! Arguments:
 !    a          -  type(psb_sspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
@@ -80,16 +84,29 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_s_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_sml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_sspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_sspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_sspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_smlprec_bld.f90 b/mlprec/impl/mld_smlprec_bld.f90
index 761df534..9560f4be 100644
--- a/mlprec/impl/mld_smlprec_bld.f90
+++ b/mlprec/impl/mld_smlprec_bld.f90
@@ -49,6 +49,9 @@
 !  (for more details see the description of mld_Tonelev_type in mld_prec_type.f90).
 !  The levels are numbered in increasing order starting from the finest one, i.e.
 !  level 1 is the finest level. No transfer operators are associated to level 1.
+!
+!  This routine simply calls mld_s_hierarchy_bld and mld_s_smoothers_bld; they
+!  can also be called explicitly from the user. 
 ! 
 !
 ! Arguments:
diff --git a/mlprec/impl/mld_z_dec_map_bld.f90 b/mlprec/impl/mld_z_dec_map_bld.f90
index dffb3402..ff739919 100644
--- a/mlprec/impl/mld_z_dec_map_bld.f90
+++ b/mlprec/impl/mld_z_dec_map_bld.f90
@@ -36,7 +36,38 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-
+!
+! File: mld_z_dec_map__bld.f90
+!
+! Subroutine: mld_z_dec_map_bld
+! Version:    complex
+!
+!  This routine builds the tentative prolongator based on the
+!  decoupled aggregation algorithm presented in
+!
+!    M. Brezina and P. Vanek, A black-box iterative solver based on a 
+!    two-level Schwarz method, Computing,  63 (1999), 233-263.
+!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of
+!    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
+!    57 (2007), 1181-1196.
+!
+! Note: upon exit 
+!
+! Arguments:
+!    a       -  type(psb_zspmat_type).
+!               The sparse matrix structure containing the local part of the
+!               matrix to be preconditioned.
+!    desc_a  -  type(psb_desc_type), input.
+!               The communication descriptor of a.
+!    p       -  type(mld_zprec_type), input/output.
+!               The preconditioner data structure; upon exit it contains 
+!               the multilevel hierarchy of prolongators, restrictors
+!               and coarse matrices.
+!    info    -  integer, output.
+!               Error code.              
+!
+!
+!
 subroutine mld_z_dec_map_bld(iorder,theta,a,desc_a,nlaggr,ilaggr,info)
 
   use psb_base_mod
diff --git a/mlprec/impl/mld_z_hierarchy_bld.f90 b/mlprec/impl/mld_z_hierarchy_bld.f90
index 6b6a306e..d952bd91 100644
--- a/mlprec/impl/mld_z_hierarchy_bld.f90
+++ b/mlprec/impl/mld_z_hierarchy_bld.f90
@@ -58,8 +58,9 @@
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
 !    p       -  type(mld_zprec_type), input/output.
-!               The preconditioner data structure containing the local part
-!               of the preconditioner to be built.
+!               The preconditioner data structure; upon exit it contains 
+!               the multilevel hierarchy of prolongators, restrictors
+!               and coarse matrices.
 !    info    -  integer, output.
 !               Error code.              
 !  
diff --git a/mlprec/impl/mld_z_lev_aggrmap_bld.f90 b/mlprec/impl/mld_z_lev_aggrmap_bld.f90
index 52892fde..48355e2f 100644
--- a/mlprec/impl/mld_z_lev_aggrmap_bld.f90
+++ b/mlprec/impl/mld_z_lev_aggrmap_bld.f90
@@ -36,32 +36,40 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-! File: mld_zcoarse_bld.f90
+! File: mld_z_lev_aggrmap_bld.f90
 !
-! Subroutine: mld_zcoarse_bld
-! Version:    real
+! Subroutine: mld_z_lev_aggrmap_bld
+! Version:    complex
 !
-!  This routine builds the matrix associated to the current level of the
-!  multilevel preconditioner from the matrix associated to the previous level,
-!  by using a smoothed aggregation technique (therefore, it also builds the
-!  prolongation and restriction operators mapping the current level to the
-!  previous one and vice versa). Then the routine builds the base preconditioner
-!  at the current level.
-!  The current level is regarded as the coarse one, while the previous as
-!  the fine one. This is in agreement with the fact that the routine is called,
-!  by mld_mlprec_bld, only on levels >=2.
+!  This routine is just an interface to aggrmap_bld where the real work is performed. 
+!  It takes care of some consistency checking though.
+!
+!  See mld_zaggrmap_bld for constraints on input/oput arguments. 
 !
 ! 
 ! Arguments:
+!    p       -  type(mld_z_onelev_type), input/output.
+!               The 'one-level' data structure containing the control
+!               parameters and (eventually) coarse matrix and prolongator/restrictors. 
+!               
 !    a       -  type(psb_zspmat_type).
 !               The sparse matrix structure containing the local part of the
 !               fine-level matrix.
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
-!    p       -  type(mld_z_onelev_type), input/output.
-!               The 'one-level' data structure containing the local part
-!               of the base preconditioner to be built as well as
-!               information concerning the prolongator and its transpose.
+!    ilaggr     -  integer, dimension(:), allocatable, output
+!                  The mapping between the row indices of the coarse-level
+!                  matrix and the row indices of the fine-level matrix.
+!                  ilaggr(i)=j means that node i in the adjacency graph
+!                  of the fine-level matrix is mapped onto node j in the
+!                  adjacency graph of the coarse-level matrix. Note that on exit the indices
+!                  will be shifted so as to make sure the ranges on the various processes do not
+!                  overlap.
+!    nlaggr     -  integer, dimension(:), allocatable, output
+!                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_zspmat_type), output
+!               The tentative prolongator, based on ilaggr.
+!               
 !    info    -  integer, output.
 !               Error code.         
 !  
diff --git a/mlprec/impl/mld_z_lev_aggrmat_asb.f90 b/mlprec/impl/mld_z_lev_aggrmat_asb.f90
index 6101fbed..46ec10d4 100644
--- a/mlprec/impl/mld_z_lev_aggrmat_asb.f90
+++ b/mlprec/impl/mld_z_lev_aggrmat_asb.f90
@@ -36,32 +36,51 @@
 !!$  POSSIBILITY OF SUCH DAMAGE.
 !!$ 
 !!$
-! File: mld_zcoarse_bld.f90
+! File: mld_z_lev_aggrmat_asb.f90
 !
-! Subroutine: mld_zcoarse_bld
-! Version:    real
+! Subroutine: mld_z_lev_aggrmat_asb
+! Version:    complex
 !
 !  This routine builds the matrix associated to the current level of the
 !  multilevel preconditioner from the matrix associated to the previous level,
-!  by using a smoothed aggregation technique (therefore, it also builds the
+!  by using the user-specified aggregation technique (therefore, it also builds the
 !  prolongation and restriction operators mapping the current level to the
-!  previous one and vice versa). Then the routine builds the base preconditioner
-!  at the current level.
+!  previous one and vice versa). 
 !  The current level is regarded as the coarse one, while the previous as
 !  the fine one. This is in agreement with the fact that the routine is called,
 !  by mld_mlprec_bld, only on levels >=2.
+!  The main structure is:
+!  1. Perform sanity checks;
+!  2. Call mld_Xaggrmat_asb to compute prolongator/restrictor/AC
+!  3. According to the choice of DIST/REPL for AC, build a descriptor DESC_AC,
+!     and adjust the column numbering of AC/OP_PROL/OP_RESTR
+!  4. Pack restrictor and prolongator into p%map
+!  5. Fix base_a and base_desc pointers.
 !
 ! 
 ! Arguments:
+!    p       -  type(mld_z_onelev_type), input/output.
+!               The 'one-level' data structure containing the control
+!               parameters and (eventually) coarse matrix and prolongator/restrictors. 
+!               
 !    a       -  type(psb_zspmat_type).
 !               The sparse matrix structure containing the local part of the
 !               fine-level matrix.
 !    desc_a  -  type(psb_desc_type), input.
 !               The communication descriptor of a.
-!    p       -  type(mld_z_onelev_type), input/output.
-!               The 'one-level' data structure containing the local part
-!               of the base preconditioner to be built as well as
-!               information concerning the prolongator and its transpose.
+!    ilaggr     -  integer, dimension(:), input
+!                  The mapping between the row indices of the coarse-level
+!                  matrix and the row indices of the fine-level matrix.
+!                  ilaggr(i)=j means that node i in the adjacency graph
+!                  of the fine-level matrix is mapped onto node j in the
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
+!                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_zspmat_type), input/output
+!               The tentative prolongator on input, released on output. 
+!               
 !    info    -  integer, output.
 !               Error code.         
 !  
diff --git a/mlprec/impl/mld_z_smoothers_bld.f90 b/mlprec/impl/mld_z_smoothers_bld.f90
index 20f1f1b9..24bc5b2b 100644
--- a/mlprec/impl/mld_z_smoothers_bld.f90
+++ b/mlprec/impl/mld_z_smoothers_bld.f90
@@ -41,14 +41,18 @@
 ! Subroutine: mld_z_smoothers_bld
 ! Version:    complex
 !
-!  This routine builds the preconditioner according to the requirements made by
-!  the user trough the subroutines mld_precinit and mld_precset.
+!  This routine performs the final phase of the multilevel preconditioner build process:
+!  builds the "smoother" objects at each level, based on the matrix hierarchy prepared
+!  by mld_z_hierarchy_bld.
 !  
 !  A multilevel preconditioner is regarded as an array of 'one-level' data structures,
 !  each containing the part of the preconditioner associated to a certain level,
 !  (for more details see the description of mld_Tonelev_type in mld_prec_type.f90).
 !  The levels are numbered in increasing order starting from the finest one, i.e.
 !  level 1 is the finest level. No transfer operators are associated to level 1.
+!  Each level provides a "build" method; for the base type, the "one-level"
+!  build procedure simply invokes the build method of the first smoother object,
+!  and also on the second object if allocated. 
 ! 
 !
 ! Arguments:
diff --git a/mlprec/impl/mld_zaggrmap_bld.f90 b/mlprec/impl/mld_zaggrmap_bld.f90
index 9af55340..a7f63506 100644
--- a/mlprec/impl/mld_zaggrmap_bld.f90
+++ b/mlprec/impl/mld_zaggrmap_bld.f90
@@ -73,9 +73,14 @@
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
+!                  adjacency graph of the coarse-level matrix. Note that on exit the indices
+!                  will be shifted so as to make sure the ranges on the various processes do not
+!                  overlap.
 !    nlaggr     -  integer, dimension(:), allocatable.
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_zspmat_type).
+!               The tentative prolongator, based on ilaggr.
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_zaggrmat_asb.f90 b/mlprec/impl/mld_zaggrmat_asb.f90
index 1cd620a5..568d9be3 100644
--- a/mlprec/impl/mld_zaggrmat_asb.f90
+++ b/mlprec/impl/mld_zaggrmat_asb.f90
@@ -53,12 +53,16 @@
 !  The prolongator P_C is built here from this mapping, according to the
 !  value of p%iprcparm(mld_aggr_kind_), specified by the user through
 !  mld_zprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_z_lev_aggrmat_asb.
 !
-!  Currently three different prolongators are implemented, corresponding to
-!  three aggregation algorithms:
-!  1. non-smoothed aggregation,
+!  Currently four  different prolongators are implemented, corresponding to
+!  four  aggregation algorithms:
+!  1. un-smoothed aggregation,
 !  2. smoothed aggregation,
 !  3. "bizarre" aggregation.
+!  4. minimum energy 
 !  1. The non-smoothed aggregation uses as prolongator the piecewise constant
 !     interpolation operator corresponding to the fine-to-coarse level mapping built
 !     by mld_aggrmap_bld. This is called tentative prolongator.
@@ -67,6 +71,7 @@
 !  3. The "bizarre" aggregation uses a prolongator proposed by the authors of MLD2P4.
 !     This prolongator still requires a deep analysis and testing and its use is
 !     not recommended.
+!  4. Minimum energy aggregation: ADD REFERENCE.
 !
 !  For more details see
 !    M. Brezina and P. Vanek, A black-box iterative solver based on a two-level
@@ -78,23 +83,36 @@
 !
 !
 ! Arguments:
-!    a          -  type(psb_cspmat_type), input.     
+!    a          -  type(psb_zspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
 !                  the fine-level matrix.
 !    desc_a     -  type(psb_desc_type), input.
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_z_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_dml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_zspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_zspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_zspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_zaggrmat_biz_asb.f90 b/mlprec/impl/mld_zaggrmat_biz_asb.f90
index a79f7c70..0d36e7b4 100644
--- a/mlprec/impl/mld_zaggrmat_biz_asb.f90
+++ b/mlprec/impl/mld_zaggrmat_biz_asb.f90
@@ -56,6 +56,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat,
 !  specified by the user through mld_zprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_z_lev_aggrmat_asb.
 !
 ! Arguments:
 !    a          -  type(psb_zspmat_type), input.     
@@ -98,7 +101,7 @@ subroutine mld_zaggrmat_biz_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr
        & naggr, nzl,naggrm1,naggrp1, i, j, k, jd, icolF, nrw, err_act
   integer(psb_ipk_) ::ictxt, np, me
   character(len=20) :: name
-  type(psb_zspmat_type) :: am3, am4
+  type(psb_zspmat_type) :: am3, am4,tmp_prol
   type(psb_z_coo_sparse_mat) :: tmpcoo
   type(psb_z_csr_sparse_mat) :: acsr1, acsr2, acsr3, acsrf, ptilde
   complex(psb_dpk_), allocatable :: adiag(:)
@@ -321,29 +324,29 @@ subroutine mld_zaggrmat_biz_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr
   call acsr1%set_dupl(psb_dupl_add_)
 
   call op_prol%mv_from(acsr1)
-
-  call psb_rwextd(ncol,op_prol,info)
+  call op_prol%clone(tmp_prol,info)
+  call psb_rwextd(ncol,tmp_prol,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_internal_error_,name,a_err='Halo of op_prol')
     goto 9999
   end if
 
-  call psb_symbmm(a,op_prol,am3,info)
+  call psb_symbmm(a,tmp_prol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,a_err='symbmm 2')
     goto 9999
   end if
 
-  call psb_numbmm(a,op_prol,am3)
+  call psb_numbmm(a,tmp_prol,am3)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'Done NUMBMM 2',parms%aggr_kind, mld_smooth_prol_
 
-  call op_prol%transp(op_restr)
+  call tmp_prol%transp(op_restr)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'starting sphalo/ rwxtd'
-
+  call tmp_prol%free()
   call psb_rwextd(ncol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3')
diff --git a/mlprec/impl/mld_zaggrmat_minnrg_asb.f90 b/mlprec/impl/mld_zaggrmat_minnrg_asb.f90
index 1e52efd5..8066f318 100644
--- a/mlprec/impl/mld_zaggrmat_minnrg_asb.f90
+++ b/mlprec/impl/mld_zaggrmat_minnrg_asb.f90
@@ -60,22 +60,19 @@
 !  radius of D^(-1)A, to be used in the computation of omega, is provided, 
 !  according to the value of p%parms%aggr_omega_alg, specified by the user
 !  through mld_zprecinit and mld_zprecset.
+!  4. Minimum energy aggregation: ADD REFERENCE.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_z_lev_aggrmat_asb.
 !
-!  This routine can also build A_C according to a "bizarre" aggregation algorithm,
-!  using a "naive" prolongator proposed by the authors of MLD2P4. However, this
-!  prolongator still requires a deep analysis and testing and its use is not
-!  recommended.
+!  For more details see
+!    M. Brezina and P. Vanek, A black-box iterative solver based on a two-level
+!    Schwarz method, Computing,  63 (1999), 233-263.
+!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of PSBLAS-based
+!    parallel two-level Schwarz preconditioners, Appl. Num. Math., 57 (2007),
+!    1181-1196.
 !
-!  The coarse-level matrix A_C is distributed among the parallel processes or
-!  replicated on each of them, according to the value of p%parms%coarse_mat,
-!  specified by the user through mld_zprecinit and mld_zprecset.
 !
-!  For more details see
-!    M. Brezina and P. Vanek, A black-box iterative solver based on a 
-!    two-level Schwarz method, Computing,  63 (1999), 233-263.
-!    P. D'Ambra, D. di Serafino and S. Filippone, On the development of
-!    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
-!    57 (2007), 1181-1196.
 !
 ! Arguments:
 !    a          -  type(psb_zspmat_type), input.     
@@ -85,19 +82,33 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_z_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_dml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_zspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_zspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_zspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
+!
 subroutine mld_zaggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr,info)
   use psb_base_mod
   use mld_z_inner_mod, mld_protect_name => mld_zaggrmat_minnrg_asb
@@ -121,7 +132,7 @@ subroutine mld_zaggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
   character(len=20)            :: name
   type(psb_zspmat_type)      :: af, ptilde, rtilde, atran, atp, atdatp
   type(psb_zspmat_type)      :: am3,am4, ap, adap,atmp,rada, ra, atmp2, dap, dadap, da
-  type(psb_zspmat_type)      :: dat, datp, datdatp, atmp3
+  type(psb_zspmat_type)      :: dat, datp, datdatp, atmp3, tmp_prol
   type(psb_z_coo_sparse_mat) :: tmpcoo
   type(psb_z_csr_sparse_mat) :: acsr1, acsr2, acsr3, acsr, acsrf
   type(psb_z_csc_sparse_mat) :: csc_dap, csc_dadap, csc_datp, csc_datdatp, acsc
@@ -484,9 +495,10 @@ subroutine mld_zaggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
   ! Now we have to gather the halo of op_prol, and add it to itself
   ! to multiply it by A,
   !
-  call psb_sphalo(op_prol,desc_a,am4,info,&
+  call op_prol%clone(tmp_prol,info) 
+  if (info == psb_success_) call psb_sphalo(tmp_prol,desc_a,am4,info,&
        & colcnv=.false.,rowscale=.true.)
-  if (info == psb_success_) call psb_rwextd(ncol,op_prol,info,b=am4)      
+  if (info == psb_success_) call psb_rwextd(ncol,tmp_prol,info,b=am4)      
   if (info == psb_success_) call am4%free()
 
   if(info /= psb_success_) then
@@ -519,13 +531,13 @@ subroutine mld_zaggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_re
        & write(debug_unit,*) me,' ',trim(name),&
        & 'starting sphalo/ rwxtd'
 
-  call psb_symbmm(a,op_prol,am3,info)
+  call psb_symbmm(a,tmp_prol,am3,info)
   if(info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,&
          & a_err='symbmm 2')
     goto 9999
   end if
-  call psb_numbmm(a,op_prol,am3)
+  call psb_numbmm(a,tmp_prol,am3)
   if (debug_level >= psb_debug_outer_) &
        & write(debug_unit,*) me,' ',trim(name),&
        & 'Done NUMBMM 2'
diff --git a/mlprec/impl/mld_zaggrmat_nosmth_asb.f90 b/mlprec/impl/mld_zaggrmat_nosmth_asb.f90
index 4d3960bb..a11c65b0 100644
--- a/mlprec/impl/mld_zaggrmat_nosmth_asb.f90
+++ b/mlprec/impl/mld_zaggrmat_nosmth_asb.f90
@@ -52,6 +52,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat
 !  specified by the user through mld_zprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_z_lev_aggrmat_asb.
 !
 !  For details see
 !    P. D'Ambra, D. di Serafino and  S. Filippone, On the development of
@@ -59,7 +62,6 @@
 !    57 (2007), 1181-1196.
 !
 !
-!
 ! Arguments:
 !    a          -  type(psb_zspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
@@ -68,19 +70,33 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_z_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_dml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_zspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_zspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_zspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
+!
 subroutine mld_zaggrmat_nosmth_asb(a,desc_a,ilaggr,nlaggr,parms,ac,op_prol,op_restr,info)
   use psb_base_mod
   use mld_z_inner_mod, mld_protect_name => mld_zaggrmat_nosmth_asb
diff --git a/mlprec/impl/mld_zaggrmat_smth_asb.f90 b/mlprec/impl/mld_zaggrmat_smth_asb.f90
index 0f632736..dce30b03 100644
--- a/mlprec/impl/mld_zaggrmat_smth_asb.f90
+++ b/mlprec/impl/mld_zaggrmat_smth_asb.f90
@@ -64,6 +64,9 @@
 !  The coarse-level matrix A_C is distributed among the parallel processes or
 !  replicated on each of them, according to the value of p%parms%coarse_mat,
 !  specified by the user through mld_zprecinit and mld_zprecset.
+!  On output from this routine the entries of AC, op_prol, op_restr
+!  are still in "global numbering" mode; this is fixed in the calling routine
+!  mld_z_lev_aggrmat_asb.
 !
 !  For more details see
 !    M. Brezina and P. Vanek, A black-box iterative solver based on a 
@@ -72,6 +75,7 @@
 !    PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math.
 !    57 (2007), 1181-1196.
 !
+!
 ! Arguments:
 !    a          -  type(psb_zspmat_type), input.     
 !                  The sparse matrix structure containing the local part of
@@ -80,16 +84,29 @@
 !                  The communication descriptor of the fine-level matrix.
 !    p          -  type(mld_z_onelev_type), input/output.
 !                  The 'one-level' data structure that will contain the local
-!                  part of the matrix to be built as well as the information 
+!                  part of the matrix to be built as well as the information
 !                  concerning the prolongator and its transpose.
-!    ilaggr     -  integer, dimension(:), allocatable.
+!    parms      -   type(mld_dml_parms), input
+!                  Parameters controlling the choice of algorithm
+!    ac         -  type(psb_zspmat_type), output
+!                  The coarse matrix on output 
+!                  
+!    ilaggr     -  integer, dimension(:), input
 !                  The mapping between the row indices of the coarse-level
 !                  matrix and the row indices of the fine-level matrix.
 !                  ilaggr(i)=j means that node i in the adjacency graph
 !                  of the fine-level matrix is mapped onto node j in the
-!                  adjacency graph of the coarse-level matrix.
-!    nlaggr     -  integer, dimension(:), allocatable.
+!                  adjacency graph of the coarse-level matrix. Note that the indices
+!                  are assumed to be shifted so as to make sure the ranges on
+!                  the various processes do not   overlap.
+!    nlaggr     -  integer, dimension(:) input
 !                  nlaggr(i) contains the aggregates held by process i.
+!    op_prol    -  type(psb_zspmat_type), input/output
+!                  The tentative prolongator on input, the computed prolongator on output
+!               
+!    op_restr    -  type(psb_zspmat_type), output
+!                  The restrictor operator; normally, it is the transpose of the prolongator. 
+!               
 !    info       -  integer, output.
 !                  Error code.
 !
diff --git a/mlprec/impl/mld_zmlprec_bld.f90 b/mlprec/impl/mld_zmlprec_bld.f90
index 50253ac9..62a6a691 100644
--- a/mlprec/impl/mld_zmlprec_bld.f90
+++ b/mlprec/impl/mld_zmlprec_bld.f90
@@ -49,6 +49,9 @@
 !  (for more details see the description of mld_Tonelev_type in mld_prec_type.f90).
 !  The levels are numbered in increasing order starting from the finest one, i.e.
 !  level 1 is the finest level. No transfer operators are associated to level 1.
+!
+!  This routine simply calls mld_z_hierarchy_bld and mld_z_smoothers_bld; they
+!  can also be called explicitly from the user. 
 ! 
 !
 ! Arguments: