&1) | $(FILTER)
+endef
+
+define latex-filter
+ @echo
+ @echo "----- latex -------------------------------------------------------"
+ @echo -n "Starting: "; date
+ @echo
+ cd tmp; ($(LATEX) $(HTMLFILE) 2>&1) | $(FILTER)
+endef
+
+define ltx2html-filter
+ @echo
+ @echo "----- latex -------------------------------------------------------"
+ @echo -n "Starting: "; date
+ @echo
+ cd tmp; ($(LTX2HTML) $(HTMLFLAGS) -dir ../$(HTMLDIR) $(HTMLFILE) 2>&1) | $(FILTER)
+endef
+
+#----------------------------------------------------------------------------
+
+define bibtex
+ @echo
+ @echo "----- bibtex ---------------------------------------------------------"
+ @echo -n "Starting: "; date
+ @echo
+ cd tmp; $(BIBTEX) $(BASEFILE)
+endef
+
+#----------------------------------------------------------------------------
+
+define glosstex
+ @echo
+ @echo "----- glosstex -------------------------------------------------------"
+ @echo -n "Starting: "; date
+ @echo
+ cd tmp; $(GLOSSTEX) $(BASEFILE) $(GLOFILES)
+ cd tmp; $(MAKEIDX) $(GXS) -o $(GLX) -s glosstex.ist
+endef
+
+#----------------------------------------------------------------------------
+
+define makeindex
+ @echo
+ @echo "----- makeindex ------------------------------------------------------"
+ @echo -n "Starting: "; date
+ @echo
+ cd tmp; mv $(IDX) $(IDX)-; $(CLEANIDX) < $(IDX)- > $(IDX)
+ cd tmp; $(MAKEIDX) $(IDX)
+endef
+
+#----------------------------------------------------------------------------
+
+define finish
+ @ln -sf tmp/$@ .
+ @echo
+ @echo "----- finish ---------------------------------------------------------"
+ @echo -n "Start: "$(START); echo
+ @echo -n "Finish: "; date
+ @echo -n "Output: "; ls -l -o tmp/$@
+ @echo -n "Target: "
+endef
+
+define ltx2html-finish
+ @ln -sf tmp/$@ .
+ @echo
+ @echo "----- finish ---------------------------------------------------------"
+ @echo -n "Start: "$(START); echo
+ @echo -n "Finish: "; date
+ @echo -n "Output: "; ls -l -o tmp/$@
+ @echo -n "Target: "
+endef
+
+
diff --git a/docs/src/abstract.tex b/docs/src/abstract.tex
new file mode 100644
index 00000000..45856568
--- /dev/null
+++ b/docs/src/abstract.tex
@@ -0,0 +1,27 @@
+\section*{Abstract}
+\addcontentsline{toc}{section}{Abstract}
+\textsc{MLD2P4 (Multi-Level Domain Decomposition Parallel Preconditioners Package based on
+PSBLAS}) is a package of parallel algebraic multi-level preconditioners.
+It implements various versions of one-level additive and of multi-level additive
+and hybrid Schwarz algorithms. In the multi-level case, a purely algebraic approach
+is applied to generate coarse-level corrections, so that no geometric background is needed
+concerning the matrix to be preconditioned. The matrix is required to be square, real
+or complex, with a symmetric sparsity pattern.
+
+MLD2P4 has been designed to provide scalable and easy-to-use preconditioners in the
+context of the PSBLAS (Parallel Sparse Basic Linear Algebra Subprograms)
+computational framework and can be used in conjuction with the Krylov solvers
+available in this framework. MLD2P4 enables the user to easily specify different aspects
+of a generic algebraic multilevel Schwarz preconditioner, thus allowing to search
+for the ``best'' preconditioner for the problem at hand.
+
+The package has been designed employing object-oriented techniques,
+using Fortran 95, with interfaces to additional third party libraries
+such as UMFPACK, SuperLU and SuperLU\_Dist, that
+can be exploited in building multi-level preconditioners. The parallel
+implementation is based on a Single Program Multiple Data (SPMD)
+paradigm for distributed-memory architectures; the inter-process data
+communication is based on MPI and is managed mainly through PSBLAS.
+
+This guide provides a brief description of the functionalities and
+the user interface of MLD2P4.
diff --git a/docs/src/background.tex b/docs/src/background.tex
new file mode 100644
index 00000000..52e7674b
--- /dev/null
+++ b/docs/src/background.tex
@@ -0,0 +1,348 @@
+\section{Multi-level Domain Decomposition Background\label{sec:background}}
+\markboth{\textsc{MLD2P4 User's and Reference Guide}}
+ {\textsc{\ref{sec:background} Multi-level Domain Decomposition Background}}
+
+\emph{Domain Decomposition} (DD) preconditioners, coupled with Krylov iterative
+solvers, are widely used in the parallel solution of large and sparse linear systems.
+These preconditioners are based on the divide and conquer technique: the matrix
+to be preconditioned is divided into submatrices, a ``local'' linear system
+involving each submatrix is (approximately) solved, and the local solutions are used
+to build a preconditioner for the whole original matrix. This process
+often corresponds to dividing a physical domain associated to the original matrix
+into subdomains, e.g. in a PDE discretization, to (approximately) solving the
+subproblems corresponding to the subdomains and to building an approximate
+solution of the original problem from the local solutions
+\cite{Cai_Widlund_92,dd1_94,dd2_96}.
+
+\emph{Additive Schwarz} preconditioners are DD preconditioners using overlapping
+submatrices, i.e.\ with some common rows, to couple the local information
+related to the submatrices (see, e.g., \cite{dd2_96}).
+The main motivation for choosing Additive Schwarz preconditioners is their
+intrinsic parallelism. A drawback of these
+preconditioners is that the number of iterations of the preconditioned solvers
+generally grows with the number of submatrices. This may be a serious limitation
+on parallel computers, since the number of submatrices usually matches the number
+of available processors. Optimal convergence rates, i.e.\ iteration numbers
+independent of the number of submatrices, can be obtained by correcting the
+preconditioner through a suitable approximation of the original linear system
+in a coarse space, which globally couples the information related to the single
+submatrices.
+
+\emph{Two-level Schwarz} preconditioners are obtained
+by combining basic (one-level) Sch\-warz preconditioners with a coarse-level
+correction. In this context, the one-level preconditioner is often
+called `smoother'. Different two-level preconditioners are obtained by varying the
+choice of the smoother and of the coarse-level correction, and the
+way they are combined \cite{dd2_96}. The same reasoning can be applied starting
+from the coarse-level system, i.e.\ a coarse-space correction can be built
+from this system, thus obtaining \emph{multi-level} preconditioners.
+
+It is worth noting that optimal preconditioners do not necessarily correspond
+to minimum execution times. Indeed, to obtain effective multi-level preconditioners
+a tradeoff between optimality of convergence and the cost of building and applying
+the coarse-space corrections must be achieved. The choice of the number of levels,
+i.e.\ of the coarse-space corrections, also affects the effectiveness of the
+preconditioners. One more goal is to get convergence rates as less sensitive
+as possible to variations in the matrix coefficients.
+
+Two main approaches can be used to build coarse-space corrections. The geometric approach
+applies coarsening strategies based on the knowledge of some physical grid associated
+to the matrix and requires the user to define grid transfer operators from the fine
+to the coarse levels and vice versa. This may result difficult for complex geometries;
+furthermore, suitable one-level preconditioners may be required to get efficient
+interplay between fine and coarse levels, e.g.\ when matrices with highly varying coefficients
+are considered. The algebraic approach builds coarse-space corrections using only matrix
+information. It performs a fully automatic coarsening and enforces the interplay between
+the fine and coarse levels by suitably choosing the coarse space and the coarse-to-fine
+interpolation \cite{StubenGMD69_99}.
+
+MLD2P4 uses a pure algebraic approach for building the sequence of coarse matrices
+starting from the original matrix. The algebraic approach is based on the \emph{smoothed
+aggregation} algorithm \cite{BREZINA_VANEK,VANEK_MANDEL_BREZINA}. A decoupled version
+of this algorithm is implemented, where the smoothed aggregation is applied locally
+to each submatrix \cite{TUMINARO_TONG}. In the next two subsections we provide
+a brief description of the multi-level Schwarz preconditioners and of the smoothed
+aggregation technique as implemented in MLD2P4. For further details the user
+is referred to \cite{para_04,aaecc_07,apnum_07,dd2_96}.
+
+
+\subsection{Multi-level Schwarz Preconditioners\label{sec:multilevel}}
+
+The Multilevel preconditioners implemented in MLD2P4 are obtained by combining
+AS preconditioners with coarse-space corrections; therefore
+we first provide a sketch of the AS preconditioners.
+
+Given the linear system \Ref{system1},
+where $A=(a_{ij}) \in \Re^{n \times n}$ is a
+nonsingular sparse matrix with a symmetric nonzero pattern,
+let $G=(W,E)$ be the adjacency graph of $A$, where $W=\{1, 2, \ldots, n\}$
+and $E=\{(i,j) : a_{ij} \neq 0\}$ are the vertex set and the edge set of $G$,
+respectively. Two vertices are called adjacent if there is an edge connecting
+them. For any integer $\delta > 0$, a $\delta$-overlap
+partition of $W$ can be defined recursively as follows.
+Given a 0-overlap (or non-overlapping) partition of $W$,
+i.e.\ a set of $m$ disjoint nonempty sets $W_i^0 \subset W$ such that
+$\cup_{i=1}^m W_i^0 = W$, a $\delta$-overlap
+partition of $W$ is obtained by considering the sets
+$W_i^\delta \supset W_i^{\delta-1}$ obtained by including the vertices that
+are adjacent to any vertex in $W_i^{\delta-1}$.
+
+Let $n_i^\delta$ be the size of $W_i^\delta$ and $R_i^{\delta} \in
+\Re^{n_i^\delta \times n}$ the restriction operator that maps
+a vector $v \in \Re^n$ onto the vector $v_i^{\delta} \in \Re^{n_i^\delta}$
+containing the components of $v$ corresponding to the vertices in
+$W_i^\delta$. The transpose of $R_i^{\delta}$ is a
+prolongation operator from $\Re^{n_i^\delta}$ to $\Re^n$.
+The matrix $A_i^\delta=R_i^\delta A (R_i^\delta)^T \in
+\Re^{n_i^\delta \times n_i^\delta}$ can be considered
+as a restriction of $A$ corresponding to the set $W_i^{\delta}$.
+
+The \emph{classical one-level AS} preconditioner is defined by
+\[
+M_{AS}^{-1}= \sum_{i=1}^m (R_i^{\delta})^T
+(A_i^\delta)^{-1} R_i^{\delta},
+\]
+where $A_i^\delta$ is assumed to be nonsingular. Its application
+to a vector $v \in \Re^n$ within a Krylov solver requires the following
+three steps:
+\begin{enumerate}
+ \item restriction of $v$ as $v_i = R_i^{\delta} v$, $i=1,\ldots,m$;
+ \item solution of the linear systems $A_i^\delta w_i = v_i$,
+ $i=1,\ldots,m$;
+ \item prolongation and sum of the $w_i$'s, i.e. $w = \sum_{i=1}^m (R_i^{\delta})^T w_i$.
+\end{enumerate}
+Note that the linear systems at step 2 are usually solved approximately,
+e.g.\ using incomplete LU factorizations such as ILU($p$), MILU($p$) and
+ILU($p,t$) \cite[Chapter 10]{Saad_book}.
+
+A variant of the classical AS preconditioner that outperforms it
+in terms of convergence rate and of computation and communication
+time on parallel distributed-memory computers is the so-called \emph{Restricted AS
+(RAS)} preconditioner~\cite{CAI_SARKIS,EFSTATHIOU}. It
+is obtained by zeroing the components of $w_i$ corresponding to the
+overlapping vertices when applying the prolongation. Therefore,
+RAS differs from classical AS by the prolongation operators,
+which are substituted by $(\tilde{R}_i^0)^T \in \Re^{n_i^\delta \times n}$,
+where $\tilde{R}_i^0$ is obtained by zeroing the rows of $R_i^\delta$
+corresponding to the vertices in $W_i^\delta \backslash W_i^0$:
+\[
+M_{RAS}^{-1}= \sum_{i=1}^m (\tilde{R}_i^0)^T
+(A_i^\delta)^{-1} R_i^{\delta}.
+\]
+Analogously, the AS variant called \emph{AS with Harmonic extension (ASH)}
+is defined by
+\[ M_{ASH}^{-1}= \sum_{i=1}^m (R_i^{\delta})^T
+(A_i^\delta)^{-1} \tilde{R}_i^0.
+\]
+We note that for $\delta=0$ the three variants of the AS preconditioner are
+all equal to the block-Jacobi preconditioner.
+
+As already observed, the convergence rate of the one-level Schwarz
+preconditioned iterative solvers deteriorates as the number $m$ of partitions
+of $W$ increases \cite{dd1_94,dd2_96}. To reduce the dependency
+of the number of iterations on the degree of parallelism we may
+introduce a global coupling among the overlapping partitions by defining
+a coarse-space approximation $A_C$ of the matrix $A$.
+In a pure algebraic setting, $A_C$ is usually built with
+a Galerkin approach. Given a set $W_C$ of \emph{coarse vertices},
+with size $n_C$, and a suitable restriction operator
+$R_C \in \Re^{n_C \times n}$, $A_C$ is defined as
+\[
+A_C=R_C A R_C^T
+\]
+and the coarse-level correction matrix to be combined with a generic
+one-level AS preconditioner $M_{1L}$ is obtained as
+\[
+M_{C}^{-1}= R_C^T A_C^{-1} R_C,
+\]
+where $A_C$ is assumed to be nonsingular. The application of $M_{C}^{-1}$
+to a vector $v$ corresponds to a restriction, a solution and
+a prolongation step; the solution step, involving the matrix $A_C$,
+may be carried out also approximately.
+
+The combination of $M_{C}$ and $M_{1L}$ may be
+performed in either an additive or a multiplicative framework.
+In the former case, the \emph{two-level additive} Schwarz preconditioner
+is obtained:
+\[
+M_{2LA}^{-1} = M_{C}^{-1} + M_{1L}^{-1}.
+\]
+Applying $M_{2L-A}^{-1}$ to a vector $v$ within a Krylov solver
+corresponds to applying $M_{C}^{-1}$
+and $M_{1L}^{-1}$ to $v$ independently and then summing up
+the results.
+
+In the multiplicative case, the combination can be
+performed by first applying the smoother $M_{1L}^{-1}$ and then
+the coarse-level correction operator $M_{C}^{-1}$:
+\[
+\begin{array}{l}
+w = M_{1L}^{-1} v, \\
+z = w + M_{C}^{-1} (v-Aw);
+\end{array}
+\]
+this corresponds to the following \emph{two-level hybrid pre-smoothed}
+Schwarz preconditioner:
+\[
+M_{2LH-PRE}^{-1} = M_{C}^{-1} + \left( I - M_{C}^{-1}A \right) M_{1L}^{-1}.
+\]
+On the other hand, by applying the smoother after the coarse-level correction,
+i.e.\ by computing
+\[
+\begin{array}{l}
+w = M_{C}^{-1} v , \\
+z = w + M_{1L}^{-1} (v-Aw) ,
+\end{array}
+\]
+the \emph{two-level hybrid post-smoothed}
+Schwarz preconditioner is obtained:
+\[
+M_{2LH-POST}^{-1} = M_{1L}^{-1} + \left( I - M_{1L}^{-1}A \right) M_{C}^{-1}.
+\]
+One more variant of two-level hybrid preconditioner is obtained by applying
+the smoother before and after the coarse-level correction. In this case, the
+preconditioner is symmetric if $A$, $M_{1L}$ and $M_{C}$ are symmetric.
+
+As previously noted, on parallel computers the number of submatrices usually matches
+the number of available processors. When the size of the system to be preconditioned
+is very large, the use of many processors, i.e.\ of many small submatrices, often
+leads to a large coarse-level system, whose solution may be computationally expensive.
+On the other hand, the use of few processors often leads to local sumatrices that
+are too expensive to be processed on single processors, because of memory and/or
+computing requirements. Therefore, it seems natural to use a recursive approach,
+in which the coarse-level correction is re-applied starting from the current
+coarse-level system. The corresponding preconditioners, called \emph{multi-level}
+preconditioners, can significantly reduce the computational cost of preconditioning
+with respect to the two-level case (see \cite[Chapter 3]{dd2_96}).
+Additive and hybrid multilevel preconditioners
+are obtained as direct extensions of the two-level counterparts.
+For a detailed descrition of them, the reader is
+referred to \cite[Chapter 3]{dd2_96}.
+The algorithm for the application of a multi-level hybrid
+post-smoothed preconditioner $M$ to a vector $v$, i.e.\ for the
+computation of $w=M^{-1}v$, is reported, for
+example, in Figure~\ref{fig:mlhpost_alg}. Here the number of levels
+is denoted by $nlev$ and the levels are numbered in increasing order starting
+from the finest one, i.e.\ the finest level is level 1; the coarse matrix
+and the corresponding basic preconditioner at each level $l$ are denoted by $A_l$ and
+$M_l$, respectively, with $A_1=A$.
+%
+\begin{figure}[t]
+\begin{center}
+\framebox{
+\begin{minipage}{.85\textwidth} {\small
+\begin{tabbing}
+\quad \=\quad \=\quad \=\quad \\[-1mm]
+%
+%! assign the finest matrix\\
+%$A_1 \leftarrow A$;\\[1mm]
+%! define the number of levels $nlev$ \\[1mm]
+%! define $nlev-1$ prolongators\\
+%$R_l^T, l=2, \ldots, nlev$;\\[1mm]
+%! define $nlev-1$ coarser matrices\\
+%$A_l \leftarrow R_lA_{l-1}R_l^T, \; l=2, \ldots, nlev$;\\[1mm]
+%! define the $nlev-1$ basic Schwarz preconditioners\\
+%$M_l$, basic preconditioner for $A_l \; l=1, \ldots, nlev-1$;\\[1mm]
+%$! assign a vector $v$\\
+%
+$v_1 = v$; \\[2mm]
+\textbf{for $l=2, nlev$ do}\\[1mm]
+\> ! transfer $v_{l-1}$ to the next coarser level\\
+\> $v_l = R_lv_{l-1}$ \\[1mm]
+\textbf{endfor} \\[2mm]
+! apply the coarsest-level correction\\[1mm]
+$y_{nlev} = A_{nlev}^{-1} v_{nlev}$\\[2mm]
+\textbf{for $l=nlev -1 , 1, -1$ do}\\[1mm]
+\> ! transfer $y_{l+1}$ to the next finer level\\
+\> $y_l = R_{l+1}^T y_{l+1}$;\\[1mm]
+\> ! compute the residual at the current level\\
+\> $r_l = v_l-A_l^{-1} y_l$;\\[1mm]
+\> ! apply the basic Schwarz preconditioner to the residual\\
+\> $r_l = M_l^{-1} r_l$\\[1mm]
+\> ! update $y_l$\\
+\> $y_l = y_l+r_l$\\
+\textbf{endfor} \\[1mm]
+$w = y_1$;
+\end{tabbing}
+}
+\end{minipage}
+}
+\caption{Application of the multi-level hybrid post-smoothed preconditioner.\label{fig:mlhpost_alg}}
+\end{center}
+\end{figure}
+%
+
+
+\subsection{Smoothed Aggregation\label{sec:aggregation}}
+
+In order to define the restriction operator $R_C$, which is used to compute
+the coarse-level matrix $A_C$, MLD2P4 uses the \emph{smoothed aggregation}
+algorithm described in \cite{BREZINA_VANEK,VANEK_MANDEL_BREZINA}.
+The basic idea of this algorithm is to build a coarse set of vertices
+$W_C$ by suitably grouping the vertices of $W$ into disjoint subsets
+(aggregates), and to define the coarse-to-fine space transfer operator $R_C^T$ by
+applying a suitable smoother to a simple piecewise constant
+prolongation operator, to improve the quality of the coarse-space correction.
+
+Three main steps can be identified in the smoothed aggregation procedure:
+\begin{enumerate}
+ \item coarsening of the vertex set $W$, to obtain $W_C$;
+ \item construction of the prolongator $R_C^T$;
+ \item application of $R_C$ and $R_C^T$ to build $A_C$.
+\end{enumerate}
+%\textbf{NOTA: Controllare cosa fa trilinos dopo il primo passo.}
+
+To perform the coarsening step, we have implemented the aggregation algorithm sketched
+in \cite{apnum_07}. According to \cite{VANEK_MANDEL_BREZINA}, a modification of
+this algorithm has been actually considered,
+in which each aggregate $N_r$ is made of vertices of $W$ that are \emph{strongly coupled}
+to a certain root vertex $r \in W$, i.e.\
+\[ N_r = \left\{s \in W: |a_{rs}| > \theta \sqrt{|a_{rr}a_{ss}|} \right\}
+ \cup \left\{ r \right\} ,
+\]
+for a given $\theta \in [0,1]$.
+Since this algorithm has a sequential nature, a \emph{decoupled} version of
+it has been chosen, where each processor $i$ independently applies the algorithm to
+the set of vertices $W_i^0$ assigned to it in the initial data distribution. This
+version is embarrassingly parallel, since it does not require any data communication.
+On the other hand, it may produce non-uniform aggregates near boundary vertices,
+i.e.\ near vertices adjacent to vertices in other processors, and is strongly
+dependent on the number of processors and on the initial partitioning of the matrix $A$.
+Nevertheless, this algorithm has been chosen for the implementation in MLD2P4,
+since it has been shown to produce good results in practice
+\cite{aaecc_07,apnum_07,TUMINARO_TONG}.
+
+The prolongator $P_C=R_C^T$ is built starting from a \emph{tentative prolongator}
+$P \in \Re^{n \times n_C}$, defined as
+\begin{equation}
+P=(p_{ij}), \quad p_{ij}=
+\left\{ \begin{array}{ll}
+1 & \quad \mbox{if} \; i \in V^j_C \\
+0 & \quad \mbox{otherwise}
+\end{array} \right. .
+\label{eq:tent_prol}
+\end{equation}
+$P_C$ is obtained by
+applying to $P$ a smoother $S \in \Re^{n \times n}$:
+\begin{equation}
+P_C = S P,
+\label{eq:smoothed_prol}
+\end{equation}
+in order to remove oscillatory components from the range of the prolongator
+and hence to improve the convergence properties of the multi-level
+Schwarz method \cite{BREZINA_VANEK,StubenGMD69_99}.
+A simple choice for $S$ is the damped Jacobi smoother:
+\begin{equation}
+S = I - \omega D^{-1} A ,
+\label{eq:jac_smoother}
+\end{equation}
+where the value of $\omega$ can be chosen
+using some estimate of the spectral radius of $D^{-1}A$ \cite{BREZINA_VANEK}.
+%
+%\textbf{NOTA: filtering di $A$ nello smoothing, da implementare?}
+%
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "userguide"
+%%% End:
diff --git a/docs/src/bibliography.tex b/docs/src/bibliography.tex
new file mode 100644
index 00000000..b68ae5e9
--- /dev/null
+++ b/docs/src/bibliography.tex
@@ -0,0 +1,207 @@
+%\section{Bibliography\label{sec:bib}}
+\begin{thebibliography}{99}
+\addcontentsline{toc}{section}{\refname}
+\markboth{\textsc{MLD2P4 User's and Reference Guide}}
+ {\textsc{References}}
+
+%\let\refname\relax
+
+%
+%\bibitem{PARA04FOREST}
+%G.~Bella, S.~Filippone, A.~De Maio, A., Testa, M.:
+%A Simulation Model for Forest Fires.
+%In: Dongarra, J., Madsen, K., Wasniewski, J. (eds.):
+%Proceedings of PARA~04 Workshop on State of the Art
+%in Scientific Computing. Lecture Notes in Computer Science, 3732. Berlin:
+%Springer, 2005
+%
+\bibitem{BREZINA_VANEK}
+M.~Brezina, P.~Van{\v e}k,
+{\em A Black-Box Iterative Solver Based on a Two-Level Schwarz Method},
+Computing, 63, 1999, 233--263.
+%
+\bibitem{para_04}
+A.~Buttari, P.~D'Ambra, D.~di Serafino, S.~Filippone,
+{\em Extending PSBLAS to Build Parallel Schwarz Preconditioners},
+in , J.~Dongarra, K.~Madsen, J.~Wasniewski, editors,
+Proceedings of PARA~04 Workshop on State of the Art
+in Scientific Computing, Lecture Notes in Computer Science,
+Springer, 2005, 593--602.
+%
+\bibitem{aaecc_07} A.~Buttari, P.~D'Ambra, D.~di~Serafino, S.~Filippone,
+{\em 2LEV-D2P4: a package of high-performance preconditioners
+for scientific and engineering applications},
+Applicable Algebra in Engineering, Communications and Computing,
+18, 3, 2007, 223--239.
+%Published online: 13 February 2007, {\tt http://dx.doi.org/10.1007/s00200-007-0035-z}
+%
+\bibitem{apnum_07} P.~D'Ambra, S.~Filippone, D.~di~Serafino,
+{\em On the Development of PSBLAS-based Parallel Two-level Schwarz Preconditioners},
+Applied Numerical Mathematics, Elsevier Science,
+57, 11-12, 2007, 1181-1196.
+%published online 3 February 2007, {\tt
+% http://dx.doi.org/10.1016/j.apnum.2007.01.006}
+
+%% \bibitem{DOUGLAS}
+%% R.E.~Bank and C.C.~Douglas,
+%% {\em SMMP: Sparse Matrix Multiplication Package},
+%% Advances in Computational Mathematics, 1993, 1, 127-137.
+%% (See also {\tt http://www.mgnet.org/~douglas/ccd-codes.html})
+%
+%
+%% \bibitem{CAI_SAAD}
+%% X.~C.~Cai and Y.~Saad,
+%% {\em Overlapping Domain Decomposition Algorithms for General Sparse Matrices},
+%% Numerical Linear Algebra with Applications, 3(3), pp.~221--237, 1996.
+%
+\bibitem{CAI_SARKIS}
+X.~C.~Cai, M.~Sarkis,
+{\em A Restricted Additive Schwarz Preconditioner for General Sparse Linear Systems},
+SIAM Journal on Scientific Computing, 21, 2, 1999, 792--797.
+%
+\bibitem{Cai_Widlund_92}
+X.~C.~Cai, O.~B.~Widlund,
+{\em Domain Decomposition Algorithms for Indefinite Elliptic Problems},
+SIAM Journal on Scientific and Statistical Computing, 13, 1, 1992, 243--258.
+%
+\bibitem{dd1_94}
+T.~Chan and T.~Mathew,
+{\em Domain Decomposition Algorithms},
+in A.~Iserles, editor, Acta Numerica 1994, 61--143.
+Cambridge University Press.
+%
+\bibitem{UMFPACK}
+T.A.~Davis,
+{\em Algorithm 832: UMFPACK - an Unsymmetric-pattern Multifrontal
+Method with a Column Pre-ordering Strategy},
+ACM Transactions on Mathematical Software, 30, 2004, 196--199.
+(See also {\tt http://www.cise.ufl.edu/~davis/})
+%
+\bibitem{SUPERLU}
+J.W.~Demmel, S.C.~Eisenstat, J.R.~Gilbert, X.S.~Li and J.W.H.~Liu,
+A supernodal approach to sparse partial pivoting,
+SIAM Journal on Matrix Analysis and Applications, 20, 3, 1999, 720--755.
+%
+\bibitem{blas3}
+J.~J.~Dongarra, J.~Du Croz, I.~S.~Duff, S.~Hammarling,
+\emph{A set of Level 3 Basic Linear Algebra Subprograms},
+ACM Transactions on Mathematical Software, 16, 1990, 1--17.
+%
+\bibitem{blas2}
+J.~J.~Dongarra, J.~Du Croz, S.~Hammarling, R.~J.~Hanson,
+\emph{An extended set of FORTRAN Basic Linear Algebra Subprograms},
+ACM Transactions on Mathematical Software, 14, 1988, 1--17.
+%
+\bibitem{BLACS}
+J.~J.~Dongarra and R.~C.~Whaley,
+{\em A User's Guide to the BLACS v.~1.1},
+Lapack Working Note 94, Tech.\ Rep.\ UT-CS-95-281, University of
+Tennessee, March 1995 (updated May 1997).
+%
+%\bibitem{sblas_97}
+%I.~Duff, M.~Marrone, G.~Radicati and C.~Vittoli,
+%{\em Level 3 Basic Linear Algebra Subprograms for Sparse Matrices:
+%a User Level Interface},
+%ACM Transactions on Mathematical Software, 23(3), pp.~379--401, 1997.
+%
+%\bibitem{sblas_02}
+%I.~Duff, M.~Heroux and R.~Pozo,
+%{\em An Overview of the Sparse Basic Linear
+%Algebra Subprograms: the New Standard from the BLAS Technical Forum},
+%ACM Transactions on Mathematical Software, 28(2), pp.~239--267, 2002.
+%
+\bibitem{EFSTATHIOU}
+E.~Efstathiou, J.~G.~Gander,
+{\em Why Restricted Additive Schwarz Converges Faster than Additive Schwarz},
+BIT Numerical Mathematics, 43, 2003, 945--959.
+%
+\bibitem{PSBLASGUIDE}
+S.~Filippone, A.~Buttari,
+{\em PSBLAS-2.3 User's Guide. A Reference Guide for the Parallel Sparse BLAS Library}, 2008,
+available from \texttt{http://www.ce.uniroma2.it/psblas/}.
+%
+\bibitem{psblas_00}
+S.~Filippone, M.~Colajanni,
+{\em PSBLAS: A Library for Parallel Linear Algebra
+Computation on Sparse Matrices},
+ACM Transactions on Mathematical Software, 26, 4, 2000, 527--550.
+%
+\bibitem{MPI2}
+W.~Gropp, S.~Huss-Lederman, A.~Lumsdaine, E.~Lusk, B.~Nitzberg, W.~Saphir, M.~Snir,
+{\em MPI: The Complete Reference. Volume 2 - The MPI-2 Extensions},
+MIT Press, 1998.
+%
+\bibitem{blas1}
+C.~L.~Lawson, R.~J.~Hanson, D.~Kincaid, F.~T.~Krogh,
+\emph{Basic Linear Algebra Subprograms for FORTRAN usage},
+ACM Transactions on Mathematical Software, 5, 1979, 308--323.
+%
+\bibitem{SUPERLUDIST}
+X.~S.~Li, J.~W.~Demmel, {\em SuperLU\_DIST: A Scalable Distributed-memory
+Sparse Direct Solver for Unsymmetric Linear Systems},
+ACM Transactions on Mathematical Software, 29, 2, 2003, 110--140.
+%
+%\bibitem{KIVA3PSBLAS}
+%S.~Filippone, P.~D'Ambra, M.~Colajanni,
+%{\em Using a Parallel Library of Sparse Linear Algebra in a Fluid Dynamics
+%Applications Code on Linux Clusters},
+%in G.~Joubert, A.~Murli, F.~Peters, M.~Vanneschi, editors,
+%Parallel Computing - Advances \& Current Issues,
+%pp.~441--448, Imperial College Press, 2002.
+%
+%\bibitem{METIS}
+%Karypis, G. and Kumar, V.,
+%{\em {METIS}: Unstructured Graph Partitioning and Sparse Matrix
+% Ordering System}.
+%Minneapolis, MN 55455: University of Minnesota, Department of
+% Computer Science, 1995.
+%Internet Address: {\verb|http://www.cs.umn.edu/~karypis|}.
+%\bibitem{BLAS1}
+%Lawson, C., Hanson, R., Kincaid, D. and Krogh, F.,
+% Basic {L}inear {A}lgebra {S}ubprograms for {F}ortran usage,
+%{ACM Trans. Math. Softw.} vol.~{5}, 38--329, 1979.
+%
+%\bibitem{machiels}
+%{Machiels, L. and Deville, M.}
+%{\em Fortran 90: An entry to object-oriented programming for the solution
+% of partial differential equations.}
+%{ACM Trans. Math. Softw.} vol.~{23}, 32--49.
+%\bibitem{metcalf}
+%{Metcalf, M., Reid, J. and Cohen, M.}
+%{\em Fortran 95/2003 explained.}
+%{Oxford University Press}, 2004.
+%
+\bibitem{Saad_book}
+Y.~Saad,
+\emph{Iterative methods for sparse linear systems}, 2nd edition,
+SIAM, 2003
+
+\bibitem{dd2_96}
+B.~Smith, P.~Bjorstad, W.~Gropp,
+{\em Domain Decomposition: Parallel Multilevel Methods for Elliptic
+Partial Differential Equations},
+Cambridge University Press, 1996.
+%
+\bibitem{MPI1}
+M.~Snir, S.~Otto, S.~Huss-Lederman, D.~Walker, J.~Dongarra,
+{\em MPI: The Complete Reference. Volume 1 - The MPI Core}, second edition,
+MIT Press, 1998.
+%%
+\bibitem{StubenGMD69_99}
+K.~St\"{u}ben,
+{\em Algebraic Multigrid (AMG): an Introduction with Applications},
+in A.~Sch\"{u}ller, U.~Trottenberg, C.~Oosterlee, editors, Multigrid,
+Academic Press, 2000.
+%
+\bibitem{TUMINARO_TONG}
+R.~S.~Tuminaro, C.~Tong,
+{\em Parallel Smoothed Aggregation Multigrid: Aggregation Strategies on Massively Parallel Machines},
+in J. Donnelley, editor, Proceedings of SuperComputing 2000, Dallas, 2000.
+%
+\bibitem{VANEK_MANDEL_BREZINA}
+P.~Van{\v e}k, J.~Mandel and M.~Brezina,
+{\em Algebraic Multigrid by Smoothed Aggregation for Second and Fourth Order Elliptic Problems},
+Computing, 56, 1996, 179-196.
+%
+
+\end{thebibliography}
diff --git a/docs/src/building.tex b/docs/src/building.tex
new file mode 100644
index 00000000..7c1d699e
--- /dev/null
+++ b/docs/src/building.tex
@@ -0,0 +1,242 @@
+\section{Configuring and Building MLD2P4\label{sec:building}}
+\markboth{\textsc{MLD2P4 User's and Reference Guide}}
+ {\textsc{\ref{sec:building} Configuring and Building MLD2P4}}
+To build MLD2P4 it is necessary to set up a Makefile with appropriate
+values for your system; this is done by means of the \verb|configure|
+script. The distribution also includes the autoconf and automake
+sources employed to generate the script, but usually this is not needed
+to build the software.
+
+MLD2P4 is implemented almost entirely in Fortran~95, with some
+interfaces to external libraries in C; the Fortran compiler
+must support the Fortran~95 standard plus the extension TR15581, which
+enhances the usability of \verb|ALLOCATABLE| variables. Most modern
+Fortran compilers support this language level. In particular, this is
+supported by the GNU Fortran compiler as of version 4.2.0; however we
+recommend to use the latest available release (4.3.1 at the time of
+this writing).
+The software defines data types and interfaces for
+real and complex data, in both single and double precision.
+
+\subsection{Prerequisites}
+
+The following base libraries are needed:
+\begin{description}
+\item[BLAS] \cite{blas3,blas2,blas1} Many vendors provide optimized versions
+ of the Basic Linear Algebra Subprograms; if no vendor version is
+ available for a given platform, the ATLAS software
+ (\verb!http://math-atlas.sourceforge.net/!)
+ may be employed. The reference BLAS from Netlib
+ (\verb|http://www.netlib.org/blas|) are meant to define the standard
+ behaviour of the BLAS interface, so they are not optimized for any
+ particular plaftorm, and should only be used as a last
+ resort. Note that BLAS computations form a relatively small part of
+ the MLD2P4/PSBLAS computations; they are however critical when using
+ preconditioners based on the UMFPACK or SuperLU third party
+ libraries.
+\item[MPI] \cite{MPI2,MPI1} A version of MPI is available on most
+ high-performance computing systems; only version 1.1 is required.
+\item[BLACS] \cite{BLACS} The Basic Linear Algebra Communication Subprograms
+ are available in source form from \verb|http://www.netlib.org/blacs|;
+ some vendors include them in their parallel computing
+ support libraries.
+ \item[PSBLAS] \cite{PSBLASGUIDE,psblas_00} Parallel Sparse BLAS is
+ available from \\ \verb|http://www.ce.uniroma2.it/psblas|; version 2.3
+ (or later) is required. Indeed, all the prerequisites
+ listed so far are also prerequisites of PSBLAS.
+ To build the MLD2P4 library it is necessary to get access to
+ the source PSBLAS directory employed to build the version under use; after
+ the MLD2P4 build process completes, only the compiled form of the
+ PSBLAS library is necessary to build user applications.
+\end{description}
+
+Please note that the four previous libraries must have Fortran
+interfaces compatible with MLD2P4;
+usually this means that they should all be built with the same
+compiler as MLD2P4.
+
+\subsection{Optional third party libraries}
+
+We provide interfaces to the following third-party software libraries;
+note that these are optional, but if you enable them some defaults
+for multilevel preconditioners may change to reflect their presence.
+
+\begin{description}
+\item[UMFPACK] \cite{UMFPACK}
+ A sparse direct factorization package available from \\
+ \verb|http://www.cise.ufl.edu/research/sparse/umfpack/|;
+ provides serial factorization and triangular system solution for double
+ precision real and complex data. We have tested
+ versions 4.4 and 5.1.
+\item[SuperLU] \cite{SUPERLU}
+ A sparse direct factorization package available from \\
+ \verb|http://crd.lbl.gov/~xiaoye/SuperLU/|; provides serial
+ factorization and triangular system solution for single and double precision,
+ real and complex data. We have tested versions 3.0 and 3.1.
+\item[SuperLU\_Dist] \cite{SUPERLUDIST}
+ A sparse direct factorization package available
+ from the same site as SuperLU; provides parallel factorization and
+ triangular system solution for double precision real and complex data.
+ We have tested version 2.1.
+\end{description}
+
+\subsection{Configuration options}
+
+To build MLD2P4 the first step is to use the \verb|configure| script
+in the main directory to generate the necessary makefile(s).
+
+As a minimal example consider the following:
+\begin{verbatim}
+./configure --with-psblas=/home/user/PSBLAS/psblas-2.3
+\end{verbatim}
+which assumes that the various MPI compilers and support libraries are
+available in the standard directories on the system, and specifies
+only the PSBLAS build directory (note that the latter directory must
+be specified with an {\em absolute} path).
+The full set of options may be looked at by issuing the command
+\verb|./configure --help|, which produces:
+\begin{verbatim}
+`configure' configures MLD2P4 1.0 to adapt to many kinds of systems.
+
+Usage: ./configure [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE. See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help display this help and exit
+ --help=short display options specific to this package
+ --help=recursive display the short help of all the included packages
+ -V, --version display version information and exit
+ -q, --quiet, --silent do not print `checking...' messages
+ --cache-file=FILE cache test results in FILE [disabled]
+ -C, --config-cache alias for `--cache-file=config.cache'
+ -n, --no-create do not create output files
+ --srcdir=DIR find the sources in DIR [configure dir or `..']
+
+Installation directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [/usr/local]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [PREFIX]
+
+By default, `make install' will install all the files in
+`/usr/local/bin', `/usr/local/lib' etc. You can specify
+an installation prefix other than `/usr/local' using `--prefix',
+for instance `--prefix=$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+ --bindir=DIR user executables [EPREFIX/bin]
+ --sbindir=DIR system admin executables [EPREFIX/sbin]
+ --libexecdir=DIR program executables [EPREFIX/libexec]
+ --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --libdir=DIR object code libraries [EPREFIX/lib]
+ --includedir=DIR C header files [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc [/usr/include]
+ --datarootdir=DIR read-only arch.-independent data root [PREFIX/share]
+ --datadir=DIR read-only architecture-independent data [DATAROOTDIR]
+ --infodir=DIR info documentation [DATAROOTDIR/info]
+ --localedir=DIR locale-dependent data [DATAROOTDIR/locale]
+ --mandir=DIR man documentation [DATAROOTDIR/man]
+ --docdir=DIR documentation root [DATAROOTDIR/doc/mld2p4]
+ --htmldir=DIR html documentation [DOCDIR]
+ --dvidir=DIR dvi documentation [DOCDIR]
+ --pdfdir=DIR pdf documentation [DOCDIR]
+ --psdir=DIR ps documentation [DOCDIR]
+
+Optional Packages:
+ --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
+ --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
+ --with-psblas The source directory for PSBLAS, for example,
+ --with-psblas=/opt/packages/psblas-2.3
+ --with-libs List additional link flags here. For example,
+ --with-libs=-lspecial_system_lib or
+ --with-libs=-L/path/to/libs
+ --with-clibs additional CLIBS flags to be added: will prepend
+ to CLIBS
+ --with-flibs additional FLIBS flags to be added: will prepend
+ to FLIBS
+ --with-library-path additional LIBRARYPATH flags to be added: will
+ prepend to LIBRARYPATH
+ --with-include-path additional INCLUDEPATH flags to be added: will
+ prepend to INCLUDEPATH
+ --with-module-path additional MODULE_PATH flags to be added: will
+ prepend to MODULE_PATH
+ --with-umfpack=LIBNAME Specify the library name for UMFPACK library.
+ Default: "-lumfpack -lamd"
+ --with-umfpackdir=DIR Specify the directory for UMFPACK library and
+ includes.
+ --with-superlu=LIBNAME Specify the library name for SUPERLU library.
+ Default: "-lslu"
+ --with-superludir=DIR Specify the directory for SUPERLU library and
+ includes.
+ --with-superludist=LIBNAME
+ Specify the libname for SUPERLUDIST library.
+ Requires you also specify SuperLU. Default: "-lslud"
+ --with-superludistdir=DIR
+ Specify the directory for SUPERLUDIST library and
+ includes.
+
+Some influential environment variables:
+ FC Fortran compiler command
+ FCFLAGS Fortran compiler flags
+ LDFLAGS linker flags, e.g. -L if you have libraries in a
+ nonstandard directory
+ LIBS libraries to pass to the linker, e.g. -l
+ CC C compiler command
+ CFLAGS C compiler flags
+ CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if
+ you have headers in a nonstandard directory
+ CPP C preprocessor
+ MPICC MPI C compiler command
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to .
+\end{verbatim}
+Thus, a sample build with libraries in installation
+directories specifics to the GNU 4.3 compiler suite might be as
+follows, specifying only the UMFPACK external package:
+\begin{verbatim}
+ ./configure --with-psblas=/home/user/psblas-2.3/ \
+ --with-libs="-L/usr/local/BLAS/gnu43 -L/usr/local/BLACS/gnu43" \
+ --with-blacs=-lmpiblacs --with-umfpackdir=/usr/local/UMFPACK/gnu43
+\end{verbatim}
+Once the configure script has completed execution, it will have
+generated the file \verb|Make.inc| which will then be used by all
+Makefiles in the directory tree.
+
+To build the library the user will now enter
+\begin{verbatim}
+make
+\end{verbatim}
+followed (optionally) by
+\begin{verbatim}
+make install
+\end{verbatim}
+
+\subsection{Example and test programs\label{sec:ex_and_test}}
+The package contains the \verb|examples| and \verb|tests| directories;
+both of them are further divided into \verb|fileread| and
+\verb|pargen| subdirectories. Their purpose is as follows:
+\begin{description}
+\item[\tt examples] contains a set of simple example programs with a
+ predefined choice of preconditioners, selectable via integer
+ values. These are intended to get an acquaintance with the
+ multilevel preconditioners.
+\item[\tt tests] contains a set of more sophisticated examples that
+ will allow the user, via the input files in the \verb|runs|
+ subdirectories, to experiment with the full range of preconditioners
+ implemented in the library.
+\end{description}
+The \verb|fileread| directories contain sample programs that read
+sparse matrices from files, according to the Matrix Market or the
+Harwell-Boeing storage format; the \verb|pdegen| instead generate
+matrices in full parallel mode from the discretization of a sample PDE.
diff --git a/docs/src/distribution.tex b/docs/src/distribution.tex
new file mode 100644
index 00000000..6541de15
--- /dev/null
+++ b/docs/src/distribution.tex
@@ -0,0 +1,19 @@
+\section{Code Distribution\label{sec:distribution}}
+\markboth{\textsc{MLD2P4 User's and Reference Guide}}
+ {\textsc{\ref{sec:distribution} Code Distribution}}
+
+\noindent
+MLD2P4 is available from the web site
+\begin{quotation}
+\texttt{http://www.mld2p4.it}
+\end{quotation}
+where contact points for further information can be also found.
+To report bugs or ask general usage questions, please, send an email to
+\texttt{bugreport@mld2p4.it}.
+
+
+The software is available under a modified BSD license, as specified
+in Appendix~\ref{sec:license}; please note that some of the optional
+third party libraries may be licensed under a different and more
+stringent license, most notably the GPL, and this should be taken into
+account when treating derived works.
diff --git a/docs/src/errors.tex b/docs/src/errors.tex
new file mode 100644
index 00000000..375a5f69
--- /dev/null
+++ b/docs/src/errors.tex
@@ -0,0 +1,20 @@
+\section{Error Handling\label{sec:errors}}
+\markboth{\textsc{MLD2P4 User's and Reference Guide}}
+ {\textsc{\ref{sec:errors} Error handling}}
+
+The error handling in MLD2P4 is based on the PSBLAS (version 2) error
+handling. Error conditions are signaled via an integer argument
+\verb|info|; whenever an error condition is detected, an error trace
+stack is built by the library up to the top-level, user-callable
+routine. This routine will then decide, according to the user
+preferences, whether the error should be handled by terminating the
+program or by returning the error condition to the user code, which
+will then take action, and whether
+an error message should be printed. These options may be set by using
+the PSBLAS error handling routines; for further details see the PSBLAS
+User's Guide \cite{PSBLASGUIDE}.
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "userguide"
+%%% End:
diff --git a/docs/src/gettingstarted.tex b/docs/src/gettingstarted.tex
new file mode 100644
index 00000000..df94329c
--- /dev/null
+++ b/docs/src/gettingstarted.tex
@@ -0,0 +1,308 @@
+\section{Getting Started\label{sec:started}}
+\markboth{\textsc{MLD2P4 User's and Reference Guide}}
+ {\textsc{\ref{sec:started} Getting Started}}
+
+We describe the basics for building and applying MLD2P4 one-level and multi-level
+Schwarz preconditioners with the Krylov solvers included in PSBLAS \cite{PSBLASGUIDE}.
+The following steps are required:
+\begin{enumerate}
+\item \emph{Declare the preconditioner data structure}. It is a derived data type,
+ \verb|mld_|\-\emph{x}\verb|prec_| \verb|type|, where \emph{x} may be \verb|s|, \verb|d|, \verb|c|
+ or \verb|z|, according to the basic data type of the sparse matrix
+ (\verb|s| = real single precision; \verb|d| = real double precision;
+ \verb|c| = complex single precision; \verb|z| = complex double precision).
+ This data structure is accessed by the user only through the MLD2P4 routines,
+ following an object-oriented approach.
+\item \emph{Allocate and initialize the preconditioner data structure, according to
+ a preconditioner type chosen by the user}. This is performed by the routine
+ \verb|mld_precinit|, which also sets defaults for each preconditioner
+ type selected by the user. The defaults associated to each preconditioner
+ type are given in Table~\ref{tab:precinit}, where the strings used by
+ \verb|mld_precinit| to identify the preconditioner types are also given.
+ Note that these strings are valid also if uppercase letters are substituted by
+ corresponding lowercase ones.
+\item \emph{Modify the selected preconditioner type, by properly setting
+ preconditioner parameters.} This is performed by the routine \verb|mld_precset|.
+ This routine must be called only if the user wants to modify the default values
+ of the parameters associated to the selected preconditioner type, to obtain a variant
+ of the preconditioner. Examples of use of \verb|mld_precset| are given in
+ Section~\ref{sec:examples}; a complete list of all the
+ preconditioner parameters and their allowed and default values is provided in
+ Section~\ref{sec:userinterface}, Tables~\ref{tab:p_type}-\ref{tab:p_coarse}.
+\item \emph{Build the preconditioner for a given matrix.} This is performed by
+ the routine \verb|mld_precbld|.
+\item \emph{Apply the preconditioner at each iteration of a Krylov solver.}
+ This is performed by the routine \verb|mld_precaply|. When using the PSBLAS Krylov solvers,
+ this step is completely transparent to the user, since \verb|mld_precaply| is called
+ by the PSBLAS routine implementing the Krylov solver (\verb|psb_krylov|).
+\item \emph{Free the preconditioner data structure}. This is performed by
+ the routine \verb|mld_| \verb|precfree|. This step is complementary to step 1 and should
+ be performed when the preconditioner is no more used.
+\end{enumerate}
+A detailed description of the above routines is given in Section~\ref{sec:userinterface}.
+Examples showing the basic use of MLD2P4 are reported in Section~\ref{sec:examples}.
+
+Note that the Fortran 95 module \verb|mld_prec_mod|, containing the definition of the
+preconditioner data type and the interfaces to the routines of MLD2P4,
+must be used in any program calling such routines.
+The modules \verb|psb_base_mod|, for the sparse matrix and communication descriptor
+data types, and \verb|psb_krylov_mod|, for interfacing with the
+Krylov solvers, must be also used (see Section~\ref{sec:examples}).
+
+\ \\
+\textbf{Remark 1.} The coarsest-level solver used by the default two-level
+preconditioner has been chosen by taking into account that, on parallel
+machines, it often leads to the smallest execution time when applied to
+linear systems coming from finite-difference discretizations of basic
+elliptic PDE problems, considered as standard tests for multi-level Schwarz
+preconditioners \cite{aaecc_07,apnum_07}. However, this solver does
+not necessarily correspond to the smallest number of iterations of the
+preconditioned Krylov method, which is usually obtained by applying
+a direct solver to the coarsest-level system, e.g.\ based on the LU
+factorization (see Section~\ref{sec:userinterface}
+for the coarsest-level solvers available in MLD2P4).
+
+\ \\
+\textbf{Remark 2.} The include path for MLD2P4 must override
+those for PSBLAS, e.g.\ the latter must come first in the sequence
+passed to the compiler, as the MLD2P4 version of the Krylov solver
+interfaces must override that of PSBLAS. This will change in the future
+when the support for the \verb|class| statement becomes widespread in Fortran
+compilers.
+
+
+\begin{table}[th]
+\begin{center}
+%{\small
+\begin{tabular}{|l|l|p{7.8cm}|}
+\hline
+\textsc{type} & \textsc{string} & \textsc{default preconditioner} \\ \hline
+No preconditioner &\verb|'NOPREC'|& Considered only to use the PSBLAS
+ Krylov solvers with no preconditioner. \\ \hline
+Diagonal & \verb|'DIAG'| & --- \\ \hline
+Block Jacobi & \verb|'BJAC'| & Block Jacobi with ILU(0) on the local blocks.\\ \hline
+Additive Schwarz & \verb|'AS'| & Restricted Additive Schwarz (RAS),
+ with overlap 1 and ILU(0) on the local blocks. \\ \hline
+Multilevel &\verb|'ML'| & Multi-level hybrid preconditioner (additive on the
+ same level and multiplicative through the levels),
+ with post-smoothing only.
+ Number of levels: 2.
+ Post-smoother: RAS with overlap 1 and ILU(0)
+ on the local blocks.
+ Aggregation: decoupled smoothed aggregation with
+ threshold $\theta = 0$.
+ Coarsest matrix: distributed among the processors.
+ Coarsest-level solver:
+ 4 sweeps of the block-Jacobi solver,
+ with LU (or ILU) factorization of the blocks
+ (UMFPACK for the double precision versions and
+ SuperLU for the single precision ones, if the packages
+ have been installed; ILU(0), otherwise). \\
+\hline
+\end{tabular}
+%}
+\end{center}
+
+\caption{Preconditioner types, corresponding strings and default choices.
+\label{tab:precinit}}
+\end{table}
+
+\subsection{Examples\label{sec:examples}}
+
+The code reported in Figure~\ref{fig:ex_default} shows how to set and apply the default
+multi-level preconditioner available in the real double precision version
+of MLD2P4 (see Table~\ref{tab:precinit}). This preconditioner is chosen
+by simply specifying \verb|'ML'| as second argument of \verb|mld_precinit|
+(a call to \verb|mld_precset| is not needed) and is applied with the BiCGSTAB
+solver provided by PSBLAS. As previously observed, the modules \verb|psb_base_mod|,
+\verb|mld_prec_mod| and \verb|psb_krylov_mod| must be used by the example program.
+
+The part of the code concerning the
+reading and assembling of the sparse matrix and the right-hand side vector, performed
+through the PSBLAS routines for sparse matrix and vector management, is not reported
+here for brevity; the statements concerning the deallocation of the PSBLAS
+data structure are neglected too.
+The complete code can be found in the example program file \verb|mld_dexample_ml.f90|,
+in the directory \verb|examples/fileread| of the MLD2P4 tree (see
+Section~\ref{sec:ex_and_test}).
+For details on the use of the PSBLAS routines, see the PSBLAS User's
+Guide \cite{PSBLASGUIDE}.
+
+The setup and application of the default multi-level
+preconditioners for the real single precision and the complex, single and double
+precision, versions are obtained with straightforward modifications of the previous
+example (see Section~\ref{sec:userinterface} for details). If these versions are installed,
+the corresponding Fortran 95 codes are available in \verb|examples/fileread/|.
+
+\begin{figure}[tbp]
+\begin{center}
+\begin{minipage}{.90\textwidth}
+{\small
+\begin{verbatim}
+ use psb_base_mod
+ use mld_prec_mod
+ use psb_krylov_mod
+... ...
+!
+! sparse matrix
+ type(psb_dspmat_type) :: A
+! sparse matrix descriptor
+ type(psb_desc_type) :: desc_A
+! preconditioner
+ type(mld_dprec_type) :: P
+! right-hand side and solution vectors
+ real(kind(1.d0)) :: b(:), x(:)
+... ...
+!
+! initialize the parallel environment
+ call psb_init(ictxt)
+ call psb_info(ictxt,iam,np)
+... ...
+!
+! read and assemble the matrix A and the right-hand side b
+! using PSBLAS routines for sparse matrix / vector management
+... ...
+!
+! initialize the default multi-level preconditioner, i.e. hybrid
+! Schwarz, using RAS (with overlap 1 and ILU(0) on the blocks)
+! as post-smoother and 4 block-Jacobi sweeps (with UMFPACK LU
+! on the blocks) as distributed coarse-level solver
+ call mld_precinit(P,'ML',info)
+!
+! build the preconditioner
+ call mld_precbld(A,desc_A,P,info)
+!
+! set the solver parameters and the initial guess
+ ... ...
+!
+! solve Ax=b with preconditioned BiCGSTAB
+ call psb_krylov('BICGSTAB',A,P,b,x,tol,desc_A,info)
+ ... ...
+!
+! deallocate the preconditioner
+ call mld_precfree(P,info)
+!
+! deallocate other data structures
+ ... ...
+!
+! exit the parallel environment
+ call psb_exit(ictxt)
+ stop
+\end{verbatim}
+}
+\end{minipage}
+\caption{Setup and application of the default multi-level Schwarz preconditioner.
+\label{fig:ex_default}}
+\end{center}
+\end{figure}
+
+Different versions of multi-level preconditioners can be obtained by changing
+the default values of the preconditioner parameters. The code reported in
+Figure~\ref{fig:ex_3lh} shows how to set a three-level hybrid Schwarz
+preconditioner, which uses block Jacobi with ILU(0) on the
+local blocks as post-smoother, has a coarsest matrix replicated on the processors,
+and solves the coarsest-level system with the LU factorization from UMFPACK~\cite{UMFPACK}.
+The number of levels is specified by using \verb|mld_precinit|; the other
+preconditioner parameters are set by calling \verb|mld_precset|. Note that
+the type of multilevel framework (i.e.\ multiplicative among the levels
+with post-smoothing only) is not specified since it is the default
+set by \verb|mld_precinit|.
+
+Figure~\ref{fig:ex_3la} shows how to
+set a three-level additive Schwarz preconditioner,
+which uses RAS, with overlap 1 and ILU(0) on the blocks,
+as pre- and post-smoother, and applies five block-Jacobi sweeps, with
+the UMFPACK LU factorization on the blocks, as distributed coarsest-level
+solver. Again, \verb|mld_precset| is used only to set
+non-default values of the parameters (see Tables~\ref{tab:p_type}-\ref{tab:p_coarse}).
+In both cases, the construction and the application of the preconditioner
+are carried out as for the default multi-level preconditioner.
+The code fragments shown in in Figures~\ref{fig:ex_3lh}-\ref{fig:ex_3la} are
+included in the example program file \verb|mld_dexample_ml.f90| too.
+
+Finally, Figure~\ref{fig:ex_1l} shows the setup of a one-level
+additive Schwarz preconditioner, i.e.\ RAS with overlap 2. The corresponding
+example program is available in \verb|mld_dexample_| \verb|1lev.f90|.
+
+For all the previous preconditioners, example programs where the sparse matrix and
+the right-hand side are generated by discretizing a PDE with Dirichlet
+boundary conditions are also available in the directory \verb|examples/pdegen|.
+
+\ \\
+\textbf{Remark 3.} Any PSBLAS-based program using the basic preconditioners
+implemented in PSBLAS 2.0, i.e.\ the diagonal and block-Jacobi ones,
+can use the diagonal and block-Jacobi preconditioners
+implemented in MLD2P4 without any change in the code.
+The PSBLAS-based program must be only recompiled
+and linked to the MLD2P4 library.
+\\
+
+
+\begin{figure}[tbh]
+\begin{center}
+\begin{minipage}{.90\textwidth}
+{\small
+\begin{verbatim}
+... ...
+! set a three-level hybrid Schwarz preconditioner, which uses
+! block Jacobi (with ILU(0) on the blocks) as post-smoother,
+! a coarsest matrix replicated on the processors, and the
+! LU factorization from UMFPACK as coarse-level solver
+ call mld_precinit(P,'ML',info,nlev=3)
+ call_mld_precset(P,mld_smoother_type_,'BJAC',info)
+ call mld_precset(P,mld_coarse_mat_,'REPL',info)
+ call mld_precset(P,mld_coarse_solve_,'UMF',info)
+... ...
+\end{verbatim}
+}
+\end{minipage}
+
+\caption{Setup of a hybrid three-level Schwarz preconditioner.\label{fig:ex_3lh}}
+\end{center}
+\end{figure}
+
+\begin{figure}[tbh]
+\begin{center}
+\begin{minipage}{.90\textwidth}
+{\small
+\begin{verbatim}
+... ...
+! set a three-level additive Schwarz preconditioner, which uses
+! RAS (with overlap 1 and ILU(0) on the blocks) as pre- and
+! post-smoother, and 5 block-Jacobi sweeps (with UMFPACK LU
+! on the blocks) as distributed coarsest-level solver
+ call mld_precinit(P,'ML',info,nlev=3)
+ call mld_precset(P,mld_ml_type_,'ADD',info)
+ call_mld_precset(P,mld_smoother_pos_,'TWOSIDE',info)
+ call mld_precset(P,mld_coarse_sweeps_,5,info)
+... ...
+\end{verbatim}
+}
+\end{minipage}
+
+\caption{Setup of an additive three-level Schwarz preconditioner.\label{fig:ex_3la}}
+\end{center}
+\end{figure}
+
+\begin{figure}[tbh]
+\begin{center}
+\begin{minipage}{.90\textwidth}
+{\small
+\begin{verbatim}
+... ...
+! set RAS with overlap 2 and ILU(0) on the local blocks
+ call mld_precinit(P,'AS',info)
+ call mld_precset(P,mld_sub_ovr_,2,info)
+... ...
+\end{verbatim}
+}
+\end{minipage}
+\caption{Setup of a one-level Schwarz preconditioner.\label{fig:ex_1l}}
+\end{center}
+\end{figure}
+
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "userguide"
+%%% End:
diff --git a/docs/src/intro.tex b/docs/src/intro.tex
new file mode 100644
index 00000000..864ab263
--- /dev/null
+++ b/docs/src/intro.tex
@@ -0,0 +1,34 @@
+\section{Introduction}\label{sec:intro}
+\markboth{\underline{MLD2P4 User's and Reference Guide}}
+ {\underline{\ref{sec:overview} Introduction}}
+
+The MLD2P4 library provides ....
+
+
+\subsection{Programming model}
+
+The MLD2P4 librarary is based on the Single Program Multiple Data
+(SPMD) programming model: each process participating in the
+computation performs the same actions on a chunk of data. Parallelism
+is thus data-driven.
+
+Because of this structure, many subroutines coordinate their action
+across the various processes, thus providing an implicit
+synchronization point, and therefore \emph{must} be
+called simultaneously by all processes participating in the
+computation.
+However there are many cases where no synchronization, and indeed no
+communication among processes, is implied.
+
+Throughout this user's guide each subroutine will be clearly indicated
+as:
+\begin{description}
+\item[Synchronous:] must be called simultaneously by all the
+ processes in the relevant communication context;
+\item[Asynchronous:] may be called in a totally independent manner.
+\end{description}
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "userguide"
+%%% End:
diff --git a/docs/src/license.tex b/docs/src/license.tex
new file mode 100644
index 00000000..005ffa08
--- /dev/null
+++ b/docs/src/license.tex
@@ -0,0 +1,44 @@
+\section{License\label{sec:license}}
+\markboth{\textsc{MLD2P4 User's and Reference Guide}}
+ {\textsc{\ref{sec:license} License}}
+
+The MLD2P4 is freely distributable under the following copyright
+terms: {\small
+\begin{verbatim}
+ MLD2P4 version 1.0
+MultiLevel Domain Decomposition Parallel Preconditioners Package
+ based on PSBLAS (Parallel Sparse BLAS version 2.3)
+
+(C) Copyright 2008
+
+ Salvatore Filippone University of Rome Tor Vergata
+ Alfredo Buttari University of Rome Tor Vergata
+ Pasqua D'Ambra ICAR-CNR, Naples
+ Daniela di Serafino Second University of Naples
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions, and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3. The name of the MLD2P4 group or the names of its contributors may
+ not be used to endorse or promote products derived from this
+ software without specific written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+\end{verbatim}
+}
\ No newline at end of file
diff --git a/docs/src/overview.tex b/docs/src/overview.tex
new file mode 100644
index 00000000..d4537981
--- /dev/null
+++ b/docs/src/overview.tex
@@ -0,0 +1,90 @@
+\section{General Overview\label{sec:overview}}
+\markboth{\textsc{MLD2P4 User's and Reference Guide}}
+ {\textsc{\ref{sec:overview} General Overview}}
+
+The \textsc{Multi-Level Domain Decomposition Parallel Preconditioners Package based on
+PSBLAS (MLD2P4}) provides \emph{multi-level Schwarz preconditioners}~\cite{dd2_96},
+to be used in the iterative solutions of sparse linear systems:
+\begin{equation}
+Ax=b,
+\label{system1}
+\end{equation}
+where $A$ is a square, real or complex, sparse matrix with a symmetric sparsity pattern.
+%
+%\textbf{NOTA: Caso non simmetrico, aggregazione con $(A+A^T)$ fatta!
+%Dovremmo implementare uno smoothed prolongator
+%adeguato e fare qualcosa di consistente anche con 1-lev Schwarz.}
+%
+These preconditioners have the following general features:
+\begin{itemize}
+\item both \emph{additive and hybrid multilevel} variants are implemented,
+i.e.\ variants that are additive among the levels and inside each level, and variants
+that are multiplicative among the levels and additive inside each level;
+the basic Additive Schwarz (AS) preconditioners are obtained by considering only one level;
+\item a \emph{purely algebraic} approach is used to
+generate a sequence of coarse-level corrections to a basic AS preconditioner, without
+explicitly using any information on the geometry of the original problem (e.g.\ the
+discretization of a PDE). The \emph{smoothed aggregation} technique is applied
+as algebraic coarsening strategy~\cite{BREZINA_VANEK,VANEK_MANDEL_BREZINA}.
+\end{itemize}
+
+The package is written in \emph{Fortran~95}, following an
+\emph{object-oriented approach} through the exploitation of features
+such as abstract data type creation, functional
+overloading and dynamic memory management.
+% , while providing a smooth
+% path towards the integration in legacy application codes.
+The parallel implementation is based
+on a Single Program Multiple Data (SPMD) paradigm for distributed-memory architectures.
+Single and double precision implementations of MLD2P4 are available for both the
+real and the complex case, that can be used through a single interface.
+
+
+MLD2P4 has been designed to implement scalable and easy-to-use multilevel preconditioners
+in the context of the \emph{PSBLAS (Parallel Sparse BLAS)
+computational framework}~\cite{psblas_00}.
+PSBLAS is a library originally developed to address the parallel implementation of
+iterative solvers for sparse linear system, by providing basic linear algebra
+operators and data management facilities for distributed sparse matrices; it
+also includes parallel Krylov solvers, built on the top of the basic PSBLAS kernels.
+The preconditioners available in MLD2P4 can be used with these Krylov solvers.
+The choice of PSBLAS has been mainly motivated by the need of having
+a portable and efficient software infrastructure implementing ``de facto'' standard
+parallel sparse linear algebra kernels, to pursue goals such as performance,
+portability, modularity ed extensibility in the development of the preconditioner
+package. On the other hand, the implementation of MLD2P4 has led to some
+revisions and extentions of the PSBLAS kernels, leading to the
+recent PSBLAS 2.0 version~\cite{PSBLASGUIDE}. The inter-process comunication required
+by MLD2P4 is encapsulated into the PSBLAS routines, except few cases where
+MPI~\cite{MPI1} is explicitly called. Therefore, MLD2P4 can be run on any parallel
+machine where PSBLAS and MPI implementations are available.
+
+MLD2P4 has a layered and modular software architecture where three main layers can be identified.
+The lower layer consists of the PSBLAS kernels, the middle one implements
+the construction and application phases of the preconditioners, and the upper one
+provides a uniform and easy-to-use interface to all the preconditioners.
+This architecture allows for different levels of use of the package:
+few black-box routines at the upper layer allow non-expert users to easily
+build any preconditioner available in MLD2P4 and to apply it within a PSBLAS Krylov solver.
+On the other hand, the routines of the middle and lower layer can be used and extended
+by expert users to build new versions of multi-level Schwarz preconditioners.
+We provide here a description of the upper-layer routines, but not of the
+medium-layer ones.
+
+This guide is organized as follows. General information on the distribution of the source code
+is reported in Section~\ref{sec:distribution}, while details on the configuration
+and installation of the package are given in Section~\ref{sec:building}. A description of
+multi-level Schwarz preconditioners based on smoothed aggregation is provided
+in Section~\ref{sec:background}, to help the users in choosing among the different preconditioners
+implemented in MLD2P4. The basics for building and applying the preconditioners
+with the Krylov solvers implemented in PSBLAS are reported in Section~\ref{sec:started}, where the
+Fortran 95 codes of a few sample programs are also shown. A reference guide for
+the upper-layer routines of MLD2P4, that are the user interface, is provided
+in Section~\ref{sec:userinterface}. The error handling mechanism used by the package is briefly described
+in Section~\ref{sec:errors}. The copyright terms concerning the distribution and modification
+of MLD2P4 are reported in Appendix~\ref{sec:license}.
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "userguide"
+%%% End:
diff --git a/docs/src/precs.tex b/docs/src/precs.tex
new file mode 100644
index 00000000..510606cf
--- /dev/null
+++ b/docs/src/precs.tex
@@ -0,0 +1,280 @@
+\section{Preconditioner routines}
+\label{sec:precs}
+\markboth{\underline{MLD2P4 User's and Reference Guide}}
+ {\underline{\ref{sec:precs} Preconditioners}}
+
+% \section{Preconditioners}
+\label{sec:psprecs}
+The MLD2P4 library contains the implementation of many preconditioning
+techniques. The preconditioners may be applied as normal ``base''
+preconditioners; alternatively multiple ``base'' preconditioners may
+be combined in a multilevel framework.
+
+The base (one-level) preconditioners include:
+\begin{itemize}
+\item Diagonal Scaling
+\item Block Jacobi
+\item Additive Schwarz, Restricted Additive Schwarz and
+ Additive Schwarz with Harmonic extensions;
+\end{itemize}
+The Jacobi and Additive Schwarz preconditioners can make use of the
+following solvers:
+\begin{itemize}
+\item Level-$p$ Incomplete LU factorization ($ILU(p)$);
+\item Threshold Incomplete LU factorization ($ILU(\tau,p)$);
+\item Complete LU factorization by means of the following optional
+ external packages:
+\begin{itemize}
+\item UMFPACK;
+\item SuperLU;
+\item SuperLU\_Dist.
+\end{itemize}
+\end{itemize}
+
+The supporting data type and subroutine interfaces are defined in the
+module \verb|mld_prec_mod|; the module also overrides the variables
+and tyep definitions of \verb|psb_prec_mod| so as to function as a
+drop-in replacement for the PSBLAS methods. Thus if the user does not
+wish to employ the additional MLD2P4 capabitlities, it is possible to
+migrate an existing PSBLAS program without any source code
+modifications, only a recompilation is needed.
+
+%% We also provide a companion package of multi-level Additive
+%% Schwarz preconditioners called MD2P4; this is actually a family of
+%% preconditioners since there is the possibility to choose between
+%% many variants, and is currently in an experimental stateIts
+%% documentation is planned to appear after stabilization of the
+%% package, which will characterize release 2.1 of our library.
+
+
+
+
+\subroutine{mld\_precinit}{Initialize a preconditioner}
+
+\syntax{call mld\_precinit}{prec, ptype, info}
+\syntax*{call mld\_precinit}{prec, ptype, info, nlev}
+
+\begin{description}
+\item[Type:] Asynchronous.
+\item[\bf On Entry]
+\item[ptype] the type of preconditioner.
+Scope: {\bf global} \\
+Type: {\bf required}\\
+Intent: {\bf in}.\\
+Specified as: a character string, see usage notes.
+\item[nlev] Number of levels in a multilevel precondtioner.
+Scope: {\bf global} \\
+Type: {\bf optional}\\
+Specified as: an integer value, see usage notes.
+%% \item[rs]
+%% Scope: {\bf global} \\
+%% Type: {\bf optional}\\
+%% Specified as: a long precision real number.
+\item[\bf On Exit]
+
+\item[prec]
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf inout}.\\
+Specified as: a preconditioner data structure \precdata.
+\item[info]
+Scope: {\bf global} \\
+Type: {\bf required}\\
+Intent: {\bf out}.\\
+Error code: if no error, 0 is returned.
+\end{description}
+\subsection*{Usage Notes}
+%% The PSBLAS 2.0 contains a number of preconditioners, ranging from a
+%% simple diagonal scaling to 2-level domain decomposition. These
+%% preconditioners may use the SuperLU or the UMFPACK software, if
+%% installed; see~\cite{SUPERLU,UMFPACK}.
+Legal inputs to this subroutine are interpreted depending on the
+$ptype$ string as follows\footnote{The string is case-insensitive}:
+\begin{description}
+\item[NONE] No preconditioning, i.e. the preconditioner is just a copy
+ operator.
+\item[DIAG] Diagonal scaling; each entry of the input vector is
+ multiplied by the reciprocal of the sum of the absolute values of
+ the coefficients in the corresponding row of matrix $A$;
+\item[BJAC] Precondition by a factorization of the
+ block-diagonal of matrix $A$, where block boundaries are determined
+ by the data allocation boundaries for each process; requires no
+ communication.
+\item[AS] Additive Schwarz; default is to apply the Restricted
+ Additive Schwarz variant, with an $ILU(0)$ factorization
+\item[ML] Multilevel preconditioner.
+\end{description}
+
+
+
+\subroutine{mld\_precset}{Set preconditioner features}
+
+\syntax{call mld\_precset}{prec, what, val, info, ilev}
+
+
+\begin{description}
+\item[Type:] Asynchronous.
+\item[\bf On Entry]
+\item[prec] the preconditioner.\\
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf inout}.\\
+Specified as: an already initialized precondtioner data structure \precdata\\
+\item[what] The feature to be set. \\
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf in}.\\
+Specified as: an integer constants. Symbolic names are available in
+the library module, see usage notes for legal values.
+\item[val] The value to set the chosen feature to. \\
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf in}.\\
+Specified as: an integer, double precision or character variable.
+Symbolic names for some choices are available in the library module,
+see usage notes for legal values.
+\item[ilev] The level of a multilevel preconditioner to which the
+ feature choice should apply.\\
+Scope: {\bf global} \\
+Type: {\bf optional}\\
+Specified as: an integer value, see usage notes.
+\end{description}
+
+\begin{description}
+\item[\bf On Return]
+\item[prec] the preconditioner.\\
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf inout}.\\
+Specified as: a precondtioner data structure \precdata\\
+\item[info] Error code.\\
+Scope: {\bf local} \\
+Type: {\bf required} \\
+Intent: {\bf out}.\\
+An integer value; 0 means no error has been detected.
+\end{description}
+
+\subsection*{Usage Notes}
+Legal inputs to this subroutine are interpreted depending on the value
+of \verb|what| input as follows
+\begin{description}
+\item[mld\_coarse\_mat\_]
+\end{description}
+
+
+\subroutine{mld\_precbld}{Builds a preconditioner}
+
+\syntax{call mld\_precbld}{a, desc\_a, prec, info}
+
+\begin{description}
+\item[Type:] Synchronous.
+\item[\bf On Entry]
+\item[a] the system sparse matrix.
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf in}, target.\\
+Specified as: a sparse matrix data structure \spdata.
+\item[prec] the preconditioner.\\
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf inout}.\\
+Specified as: an already initialized precondtioner data structure \precdata\\
+\item[desc\_a] the problem communication descriptor.
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf in}, target.\\
+Specified as: a communication descriptor data structure \descdata.
+%% \item[upd]
+%% Scope: {\bf global} \\
+%% Type: {\bf optional}\\
+%% Intent: {\bf in}.\\
+%% Specified as: a character.
+\end{description}
+
+\begin{description}
+\item[\bf On Return]
+\item[prec] the preconditioner.\\
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf inout}.\\
+Specified as: a precondtioner data structure \precdata\\
+\item[info] Error code.\\
+Scope: {\bf local} \\
+Type: {\bf required} \\
+Intent: {\bf out}.\\
+An integer value; 0 means no error has been detected.
+\end{description}
+
+
+
+\subroutine{mld\_precaply}{Preconditioner application routine}
+
+\syntax{call mld\_precaply}{prec,x,y,desc\_a,info,trans,work}
+\syntax*{call mld\_precaply}{prec,x,desc\_a,info,trans}
+
+\begin{description}
+\item[Type:] Synchronous.
+\item[\bf On Entry]
+\item[prec] the preconditioner.
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf in}.\\
+Specified as: a preconditioner data structure \precdata.
+\item[x] the source vector.
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf inout}.\\
+Specified as: a double precision array.
+\item[desc\_a] the problem communication descriptor.
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf in}.\\
+Specified as: a communication data structure \descdata.
+\item[trans]
+Scope: {\bf } \\
+Type: {\bf optional}\\
+Intent: {\bf in}.\\
+Specified as: a character.
+\item[work] an optional work space
+Scope: {\bf local} \\
+Type: {\bf optional}\\
+Intent: {\bf inout}.\\
+Specified as: a double precision array.
+\end{description}
+
+\begin{description}
+\item[\bf On Return]
+\item[y] the destination vector.
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf inout}.\\
+Specified as: a double precision array.
+\item[info] Error code.\\
+Scope: {\bf local} \\
+Type: {\bf required} \\
+Intent: {\bf out}.\\
+An integer value; 0 means no error has been detected.
+\end{description}
+
+
+
+\subroutine{mld\_prec\_descr}{Prints a description of current preconditioner}
+
+\syntax{call mld\_prec\_descr}{prec}
+
+\begin{description}
+\item[Type:] Asynchronous.
+\item[\bf On Entry]
+\item[prec] the preconditioner.
+Scope: {\bf local} \\
+Type: {\bf required}\\
+Intent: {\bf in}.\\
+Specified as: a preconditioner data structure \precdata.
+\end{description}
+
+
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "userguide"
+%%% End:
diff --git a/docs/src/title.tex b/docs/src/title.tex
new file mode 100644
index 00000000..831759b8
--- /dev/null
+++ b/docs/src/title.tex
@@ -0,0 +1,72 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Contents: The title page
+% $Id: title.tex 1999 2007-10-29 15:25:27Z sfilippo $
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\ifcase\pdfoutput % We're not running pdftex
+{\Large\bfseries MLD2P4\\[.8ex] User's and Reference Guide}\\
+\emph{\large A guide for the Multi-Level Domain Decomposition \\[.6ex]
+Parallel Preconditioners Package
+based on PSBLAS}
+{\bfseries Pasqua D'Ambra}\\
+ ICAR-CNR, Naples, Italy\\[3ex]
+{\bfseries Daniela di Serafino}\\
+ Second University of Naples, Italy\\[3ex]
+{\bfseries Salvatore Filippone} \\
+ University of Rome ``Tor Vergata'', Italy
+%\\[10ex]
+%\today
+Software version: 1.0\\
+%\today
+July 24, 2008
+\or
+\pdfbookmark{MLD2P4 User's and Reference Guide}{title}
+\newlength{\centeroffset}
+%\setlength{\centeroffset}{-0.5\oddsidemargin}
+%\addtolength{\centeroffset}{0.5\evensidemargin}
+%\addtolength{\textwidth}{-\centeroffset}
+\thispagestyle{empty}
+\vspace*{\stretch{1}}
+\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth}
+\flushright
+{\Huge\bfseries MLD2P4\\[.8ex] User's and Reference Guide
+}
+\noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex]
+\hfill\emph{\Large A guide for the Multi-Level Domain Decomposition \\[.6ex]
+Parallel Preconditioners Package
+based on PSBLAS}
+\end{minipage}}
+
+\vspace{\stretch{1}}
+\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth}
+\flushright
+{\large\bfseries Pasqua D'Ambra}\\
+\large ICAR-CNR, Naples, Italy\\[3ex]
+{\large\bfseries Daniela di Serafino}\\
+\large Second University of Naples, Italy\\[3ex]
+{\large\bfseries Salvatore Filippone} \\
+\large University of Rome ``Tor Vergata'', Italy
+%\\[10ex]
+%\today
+\end{minipage}}
+
+\vspace{\stretch{1}}
+\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth}
+\flushright
+\large Software version: 1.0\\
+%\today
+\large July 24, 2008
+\end{minipage}}
+%\addtolength{\textwidth}{\centeroffset}
+\vspace{\stretch{2}}
+\fi
+
+\endinput
+
+%
+
+% Local Variables:
+% TeX-master: "userguide"
+% mode: latex
+% mode: flyspell
+% End:
diff --git a/docs/src/userguide.tex b/docs/src/userguide.tex
new file mode 100644
index 00000000..b040f2ae
--- /dev/null
+++ b/docs/src/userguide.tex
@@ -0,0 +1,173 @@
+\documentclass[a4paper,twoside,11pt]{article}
+\usepackage{pstricks}
+\usepackage{fancybox}
+\usepackage{amsfonts}
+\usepackage{ifpdf}
+% \usepackage{minitoc}
+% \setcounter{minitocdepth}{2}
+\usepackage[bookmarks=true,
+ bookmarksnumbered=true,
+ bookmarksopen=false,
+ plainpages=false,
+ pdfpagelabels,
+ colorlinks,
+ citecolor=red,
+ linkcolor=blue]{hyperref}
+\usepackage{ifthen}
+\usepackage{graphicx}
+\newtheorem{theorem}{Theorem}
+\newtheorem{corollary}{Corollary}
+\usepackage{rotating}
+%\newboolean{mtc}
+%\setboolean{mtc}{true}
+
+\pdfoutput=1
+\relax
+\pdfcompresslevel=0 %-- 0 = none, 9 = best
+\pdfinfo{ %-- Info dictionary of PDF output /Author (PD, DdS, SF)
+ /Title (MultiLevel Domain Decomposition Parallel Preconditioners Package
+ based on PSBLAS, V. 1.0)
+ /Subject (MultiLevel Domain Decomposition Parallel Preconditioners Package)
+ /Keywords (Parallel Numerical Software, Algebraic Multilevel Preconditioners, Sparse Iterative Solvers, PSBLAS, MPI)
+ /Creator (pdfLaTeX)
+ /Producer ($Id: userguide.tex 2008-04-08 Pasqua D'Ambra, Daniela di Serafino,
+ Salvatore Filippone$)
+}
+\pdfcatalog{ %-- Catalog dictionary of PDF output.
+% /URI (http://ce.uniroma2.it/psblas)
+}
+
+\setlength\textwidth{1.15\textwidth}
+\setlength\oddsidemargin{0.3in}
+\setlength\evensidemargin{0.2in}
+% \newlength{\centeroffset}
+% \setlength{\centeroffset}{0.5\oddsidemargin}
+% \addtolength{\centeroffset}{0.5\evensidemargin}
+% \addtolength{\textwidth}{-\centeroffset}
+\pagestyle{myheadings}
+
+\newcounter{subroutine}[subsection]
+\newcounter{example}[subroutine]
+\makeatletter
+\def\subroutine{\@ifstar{\@subroutine}{\clearpage\@subroutine}}%
+\def\@subroutine#1#2{%
+\stepcounter{subroutine}%
+ \section*{\flushleft #1---#2 \endflushleft}%
+ \addcontentsline{toc}{subsection}{#1}%
+ \markright{#1}}%
+\newcommand{\subsubroutine}[2]{%
+\stepcounter{subroutine}%
+ \subsection*{\flushleft #1---#2 \endflushleft}%
+ \addcontentsline{toc}{subsubsection}{#1}%
+ \markright{#1}}%
+\newcommand{\examplename}{Example}
+\newcommand{\syntaxname}{Syntax}
+\def\syntax{\@ifstar{\@ssyntax}{\@syntax}}%
+\def\@syntax{\nobreak\section*{\syntaxname}%
+ \@ssyntax}%
+\def\@ssyntax#1#2{%
+ \nobreak
+ \setbox\@tempboxa\hbox{#1\ {\em $($#2$)$}}%
+ \ifdim \wd\@tempboxa >\hsize
+ \setbox\@tempboxa\hbox{\em $($#2$)$}
+ \ifdim\wd\@tempboxa >\hsize
+ \begin{flushright}#1\ \em$($#2$)$\end{flushright}%
+ \else
+ \hbox to\hsize{#1\hfil}%
+ \hbox to\hsize{\hfil\box\@tempboxa}%
+ \fi
+ \else
+ \hbox to\hsize{\hfil\box\@tempboxa\hfil}%
+ \fi\par\vskip\baselineskip}
+\makeatother
+\newcommand{\example}{\stepcounter{example}%
+\section*{\examplename~\theexample}}
+\def\bsideways{\sidewaystable}
+\def\esideways{\endsidewaystable}
+
+\newcommand{\precdata}{\hyperlink{precdata}{{\tt mld\_prec\_type}}}
+\newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}}
+\newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_spmat\_type}}}
+\newcommand{\Ref}[1]{\mbox{(\ref{#1})}}
+
+\begin{document}
+\pdfbookmark{MLD2P4 User's and Reference Guide}{title}
+\newlength{\centeroffset}
+%\setlength{\centeroffset}{-0.5\oddsidemargin}
+%\addtolength{\centeroffset}{0.5\evensidemargin}
+%\addtolength{\textwidth}{-\centeroffset}
+\thispagestyle{empty}
+\vspace*{\stretch{1}}
+\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth}
+\flushright
+{\Huge\bfseries MLD2P4\\[.8ex] User's and Reference Guide
+}
+\noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex]
+\hfill\emph{\Large A guide for the Multi-Level Domain Decomposition \\[.6ex]
+Parallel Preconditioners Package
+based on PSBLAS}
+\end{minipage}}
+
+\vspace{\stretch{1}}
+\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth}
+\flushright
+{\large\bfseries Pasqua D'Ambra}\\
+\large ICAR-CNR, Naples, Italy\\[3ex]
+{\large\bfseries Daniela di Serafino}\\
+\large Second University of Naples, Italy\\[3ex]
+{\large\bfseries Salvatore Filippone} \\
+\large University of Rome ``Tor Vergata'', Italy
+%\\[10ex]
+%\today
+\end{minipage}}
+
+\vspace{\stretch{1}}
+\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth}
+\flushright
+\large Software version: 1.0\\
+%\today
+\large July 24, 2008
+\end{minipage}}
+%\addtolength{\textwidth}{\centeroffset}
+\vspace{\stretch{2}}
+\clearpage
+\ \\
+\thispagestyle{empty}
+\clearpage
+
+\pagenumbering{roman} % Roman numbering
+\setcounter{page}{1} % Abstract start on page i
+
+\include{abstract}
+\cleardoublepage
+
+\begingroup
+ \renewcommand*{\thepage}{toc}
+ %\pagenumbering{roman} % Roman numbering
+ %\setcounter{page}{1} % Abstract start on page ii
+ \tableofcontents
+\endgroup
+\cleardoublepage
+
+\pagenumbering{arabic} % Arabic numbering
+\setcounter{page}{1} % Chapters start on page 1
+
+\include{overview}
+\include{distribution}
+\include{building}
+\include{background}
+\include{gettingstarted}
+\include{userinterface}
+\include{errors}
+\clearpage
+\appendix
+\include{license}
+\cleardoublepage
+\include{bibliography}
+
+
+\end{document}
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: 'userguide'
+%%% End:
diff --git a/docs/src/userhtml.tex b/docs/src/userhtml.tex
new file mode 100644
index 00000000..dc1b4784
--- /dev/null
+++ b/docs/src/userhtml.tex
@@ -0,0 +1,149 @@
+\documentclass[a4paper,twoside,11pt]{article}
+\usepackage{pstricks}
+\usepackage{fancybox}
+\usepackage{amsfonts}
+\usepackage{ifpdf}
+% \usepackage{minitoc}
+% \setcounter{minitocdepth}{2}
+\usepackage[bookmarks=true,
+ bookmarksnumbered=true,
+ bookmarksopen=false,
+ plainpages=false,
+ pdfpagelabels,
+ colorlinks,
+ linkcolor=blue]{hyperref}
+\usepackage{ifthen}
+\usepackage{graphicx}
+\newtheorem{theorem}{Theorem}
+\newtheorem{corollary}{Corollary}
+\usepackage{rotating}
+%\newboolean{mtc}
+%\setboolean{mtc}{true}
+
+\pdfoutput=0
+% \relax
+% \pdfcompresslevel=0 %-- 0 = none, 9 = best
+% \pdfinfo{ %-- Info dictionary of PDF output /Author (PD, DdS, SF)
+% /Title (MultiLevel Domain Decomposition Parallel Preconditioners Package
+% based on PSBLAS, V. 1.0)
+% /Subject (MultiLevel Domain Decomposition Parallel Preconditioners Package)
+% /Keywords (Parallel Numerical Software, Algebraic Multilevel Preconditioners, Sparse Iterative Solvers, PSBLAS, MPI)
+% /Creator (pdfLaTeX)
+% /Producer ($Id: userguide.tex 2008-04-08 Pasqua D'Ambra, Daniela di Serafino,
+% Salvatore Filippone$)
+% }
+% \pdfcatalog{ %-- Catalog dictionary of PDF output.
+% % /URI (http://ce.uniroma2.it/psblas)
+% }
+
+\setlength\textwidth{1.15\textwidth}
+% \setlength\evensidemargin{.7in}
+% \newlength{\centeroffset}
+% \setlength{\centeroffset}{0.5\oddsidemargin}
+% \addtolength{\centeroffset}{0.5\evensidemargin}
+% \addtolength{\textwidth}{-\centeroffset}
+\pagestyle{myheadings}
+
+\newcounter{subroutine}[subsection]
+\newcounter{example}[subroutine]
+\makeatletter
+\def\subroutine{\@ifstar{\@subroutine}{\clearpage\@subroutine}}%
+\def\@subroutine#1#2{%
+\stepcounter{subroutine}%
+ \section*{\flushleft #1---#2 \endflushleft}%
+ \addcontentsline{toc}{subsection}{#1}%
+ \markright{#1}}%
+\newcommand{\subsubroutine}[2]{%
+\stepcounter{subroutine}%
+ \subsection*{\flushleft #1---#2 \endflushleft}%
+ \addcontentsline{toc}{subsubsection}{#1}%
+ \markright{#1}}%
+\newcommand{\examplename}{Example}
+\newcommand{\syntaxname}{Syntax}
+\def\syntax{\@ifstar{\@ssyntax}{\@syntax}}%
+\def\@syntax{\nobreak\section*{\syntaxname}%
+ \@ssyntax}%
+\def\@ssyntax#1#2{%
+ \nobreak
+ \setbox\@tempboxa\hbox{#1\ {\em $($#2$)$}}%
+ \ifdim \wd\@tempboxa >\hsize
+ \setbox\@tempboxa\hbox{\em $($#2$)$}
+ \ifdim\wd\@tempboxa >\hsize
+ \begin{flushright}#1\ \em$($#2$)$\end{flushright}%
+ \else
+ \hbox to\hsize{#1\hfil}%
+ \hbox to\hsize{\hfil\box\@tempboxa}%
+ \fi
+ \else
+ \hbox to\hsize{\hfil\box\@tempboxa\hfil}%
+ \fi\par\vskip\baselineskip}
+\makeatother
+\newcommand{\example}{\stepcounter{example}%
+\section*{\examplename~\theexample}}
+\def\bsideways{\begin{table}}
+\def\esideways{\end{table}}
+
+\newcommand{\precdata}{\hyperlink{precdata}{{\tt mld\_prec\_type}}}
+\newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}}
+\newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_spmat\_type}}}
+\newcommand{\Ref}[1]{\mbox{(\ref{#1})}}
+
+\begin{document}
+{\Large\bfseries MLD2P4\\[.8ex] User's and Reference Guide}\\[\baselineskip]
+\emph{\large A guide for the Multi-Level Domain Decomposition
+Parallel Preconditioners Package
+based on PSBLAS}\\[3ex]
+{\bfseries Pasqua D'Ambra}\\
+ ICAR-CNR, Naples, Italy\\
+{\bfseries Daniela di Serafino}\\
+ Second University of Naples, Italy\\
+{\bfseries Salvatore Filippone} \\
+ University of Rome ``Tor Vergata'', Italy\\[2ex]
+%\\[10ex]
+%\today
+Software version: 1.0\\
+%\today
+July 24, 2008
+\clearpage
+\ \\
+\thispagestyle{empty}
+\clearpage
+
+\pagenumbering{roman} % Roman numbering
+\setcounter{page}{1} % Abstract start on page i
+
+\include{abstract}
+\cleardoublepage
+
+\begingroup
+ \renewcommand*{\thepage}{toc}
+ %\pagenumbering{roman} % Roman numbering
+ %\setcounter{page}{1} % Abstract start on page ii
+ \tableofcontents
+\endgroup
+\cleardoublepage
+
+\pagenumbering{arabic} % Arabic numbering
+\setcounter{page}{1} % Chapters start on page 1
+
+\include{overview}
+\include{distribution}
+\include{building}
+\include{background}
+\include{gettingstarted}
+\include{userinterface}
+%\include{advanced}
+\include{errors}
+%\include{listofroutines}
+\cleardoublepage
+\appendix
+\include{license}
+\cleardoublepage
+\include{bibliography}
+
+
+\end{document}
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: 'userguide'
+%%% End:
diff --git a/docs/src/userinterface.tex b/docs/src/userinterface.tex
new file mode 100644
index 00000000..e3d5cf34
--- /dev/null
+++ b/docs/src/userinterface.tex
@@ -0,0 +1,443 @@
+\section{User Interface\label{sec:userinterface}}
+\markboth{\textsc{MLD2P4 User's and Reference Guide}}
+ {\textsc{\ref{sec:userinterface} User Interface}}
+
+
+The basic user interface of MLD2P4 consists of six routines. The four routines \verb|mld_| \verb|precinit|,
+\verb|mld_precset|, \verb|mld_precbld| and \verb|mld_precaply| encapsulate all the functionalities
+for the setup and the application of any one-level and multi-level
+preconditioner implemented in the package.
+The routine \verb|mld_precfree| deallocates the preconditioner data structure, while
+\verb|mld_precdescr| prints a description of the preconditioner setup by the user.
+
+For each routine, the same user interface is overloaded with
+respect to the real/complex case and the single/double precision;
+arguments with appropriate data types must be passed to the routine,
+i.e.
+\begin{itemize}
+\item the sparse matrix data structure, containing the matrix to be
+ preconditioned, must be of type \verb|mld_|\emph{x}\verb|spmat_type|
+ with \emph{x} = \verb|s| for real single precision, \emph{x} = \verb|d|
+ for real double precision, \emph{x} = \verb|c| for complex single precision,
+ \emph{x} = \verb|z| for complex double precision;
+\item the preconditioner data structure must be of type
+ \verb|mld_|\emph{x}\verb|prec_type|, with \emph{x} =
+ \verb|s|, \verb|d|, \verb|c|, \verb|z|, according to the sparse
+ matrix data structure;
+\item the arrays containing the vectors $v$ and $w$ involved in
+ the preconditioner application $w=M^{-1}v$ must be of type
+ \emph{type}\verb|(|\emph{kind\_parameter}\verb|)|, with \emph{type} =
+ \verb|real|, \verb|complex| and \emph{kind\_parameter} = \verb|kind(1.e0)|,
+ \verb|kind(1.d0)|, according to the sparse matrix and preconditioner
+ data structure; note that the PSBLAS module \verb|psb_base_mod|
+ provides the constants \verb|psb_spk_|
+ = \verb|kind(1.e0)| and \verb|psb_dpk_| = \verb|kind(1.d0)|;
+\item real parameters defining the preconditioner must be declared
+ according to the precision of the sparse matrix and preconditioner
+ data structures (see Section~\ref{sec:precset}).
+\end{itemize}
+A description of each routine is given in the remainder of this section.
+
+\clearpage
+
+\subsection{Subroutine mld\_precinit\label{sec:precinit}}
+
+\begin{center}
+\verb|mld_precinit(p,ptype,info)| \\
+\verb|mld_precinit(p,ptype,info,nlev)| \\
+\end{center}
+
+\noindent
+This routine allocates and initializes the preconditioner data structure,
+according to the preconditioner type chosen by the user.
+
+{\vskip2\baselineskip\noindent\large\bfseries Arguments}
+
+\begin{tabular}{p{1.2cm}p{12cm}}
+\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(inout)|.\\
+ & The preconditioner data structure. Note that \emph{x}
+ must be chosen according to the real/complex, single/double
+ precision version of MLD2P4 under use.\\
+\verb|ptype| & \verb|character(len=*), intent(in)|.\\
+ & The type of preconditioner. Its values are specified
+ in Table~\ref{tab:precinit}.\\
+ & Note that the strings are case insensitive.\\
+\verb|info| & \verb|integer, intent(out)|.\\
+ & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} for details.\\
+\verb|nlev| & \verb|integer, optional, intent(in)|.\\
+ & The number of levels of the multilevel preconditioner.
+ If \verb|nlev| is not present and \verb|ptype|=\verb|'ML'|, \verb|'ml'|,
+ then \verb|nlev|=2 is assumed. Otherwise, \verb|nlev| is ignored.\\
+\end{tabular}
+
+\clearpage
+
+\subsection{Subroutine mld\_precset\label{sec:precset}}
+
+\begin{center}
+\verb|mld_precset(p,what,val,info)|\\
+\end{center}
+
+\noindent
+This routine sets the parameters defining the preconditioner. More
+precisely, the parameter identified by \verb|what| is assigned the value
+contained in \verb|val|.
+
+{\vskip2\baselineskip\noindent\large\bfseries Arguments}
+
+\begin{tabular}{p{1.2cm}p{12cm}}
+\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(inout)|.\\
+ & The preconditioner data structure. Note that \emph{x} must
+ be chosen according to the real/complex, single/double precision
+ version of MLD2P4 under use.\\
+\verb|what| & \verb|integer, intent(in)|. \\
+ & The number identifying the parameter to be set.
+ A mnemonic constant has been associated to each of these
+ numbers, as reported in Tables~\ref{tab:p_type}-\ref{tab:p_coarse}.\\
+\verb|val | & \verb|integer| \emph{or} \verb|character(len=*)| \emph{or}
+ \verb|real(psb_spk_)| \emph{or} \verb|real(psb_dpk_)|,
+ \verb|intent(in)|.\\
+ & The value of the parameter to be set. The list of allowed
+ values and the corresponding data types is given in
+ Tables~\ref{tab:p_type}-\ref{tab:p_coarse}.
+ When the value is of type \verb|character(len=*)|,
+ it is also treated as case insensitive.\\
+\verb|info| & \verb|integer, intent(out)|.\\
+ & Error code. If no error, 0 is returned. See Section~\ref{sec:errors}
+ for details.\\
+%
+%\verb|ilev| & \verb|integer, optional, intent(in)|.\\
+% & For the multilevel preconditioner, the level at which the
+% preconditioner parameter has to be set.
+% The levels are numbered in increasing
+% order starting from the finest one, i.e.\ level 1 is the finest level.
+% If \verb|ilev| is not present, the parameter identified by \verb|what|
+% is set at all the appropriate levels (see Table~\ref{tab:params}).
+\end{tabular}
+
+\ \\
+A variety of (one-level and multi-level) preconditioners can be obtained
+by a suitable setting of the preconditioner parameters. These parameters
+can be logically divided into four groups, i.e.\ parameters defining
+\begin{enumerate}
+ \item the type of multi-level preconditioner;
+ \item the one-level preconditioner used as smoother;
+ \item the aggregation algorithm;
+ \item the coarse-space correction at the coarsest level.
+\end{enumerate}
+A list of the parameters that can be set, along with their allowed and
+default values, is given in Tables~\ref{tab:p_type}-\ref{tab:p_coarse}.
+For a detailed description of the meaning of the parameters, please
+refer to Section~\ref{sec:background}.
+%
+%Note that the routine allows to set different features of the
+%preconditioner at each level through the use of \verb|ilev|.
+%This should be done by users with experience in the field of
+%multi-level preconditioners. Non-expert users are recommended
+%to call \verb| mld_precset| without specifying \verb|ilev|.
+
+\bsideways
+\begin{center}
+\begin{tabular}{|l|l|p{2cm}|l|p{7cm}|}
+\hline
+\verb|what| & \textsc{data type} & \verb|val| & \textsc{default} &
+\textsc{comments} \\ \hline
+%\multicolumn{5}{|c|}{\emph{type of the multi-level preconditioner}}\\ \hline
+\verb|mld_ml_type_| & \verb|character(len=*)|
+ & \texttt{'ADD'} \ \ \ \texttt{'MULT'}
+ & \texttt{'MULT'}
+ & Basic multi-level framework: additive or multiplicative
+ among the levels (always additive inside a level). \\ \hline
+\verb|mld_smoother_type_|& \verb|character(len=*)|
+ & \texttt{'DIAG'} \ \ \ \texttt{'BJAC'} \ \ \ \texttt{'AS'}
+ & \texttt{'AS'}
+ & Basic one-level preconditioner (i.e.\ smoother): diagonal,
+ block Jacobi, AS. \\ \hline
+\verb|mld_smoother_pos_| & \verb|character(len=*)|
+ & \texttt{'PRE'} \ \ \ \texttt{'POST'} \ \ \ \texttt{'TWOSIDE'}
+ & \texttt{'POST'}
+ & ``Position'' of the smoother: pre-smoother, post-smoother,
+ pre- and post-smoother. \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Parameters defining the type of multi-level preconditioner.
+\label{tab:p_type}}
+\esideways
+
+\bsideways
+\begin{center}
+\begin{tabular}{|l|l|p{3.2cm}|l|p{7cm}|}
+\hline
+\verb|what| & \textsc{data type} & \verb|val| & \textsc{default} &
+\textsc{comments} \\ \hline
+%\multicolumn{5}{|c|}{\emph{basic one-level preconditioner (smoother)}} \\ \hline
+\verb|mld_sub_ovr_| & \verb|integer|
+ & any~int.~num.~$\ge 0$
+ & 1
+ & Number of overlap layers. \\ \hline
+\verb|mld_sub_restr_| & \verb|character(len=*)|
+ & \texttt{'HALO'} \hspace{2.5cm} \texttt{'NONE'}
+ & \texttt{'HALO'}
+ & Type of restriction operator:
+ \texttt{'HALO'} for taking into account the overlap, \texttt{'NONE'}
+ for neglecting it. \\ \hline
+\verb|mld_sub_prol_| & \verb|character(len=*)|
+ & \texttt{'SUM'} \hspace{2.5cm} \texttt{'NONE'}
+ & \texttt{'NONE'}
+ & Type of prolongation operator:
+ \texttt{'SUM'} for adding the contributions from the overlap, \texttt{'NONE'}
+ for neglecting them. \\ \hline
+\verb|mld_sub_solve_| & \verb|character(len=*)|
+ & \texttt{'ILU'} \hspace{2.5cm} \texttt{'MILU'} \hspace{2.5cm} \texttt{'ILUT'}
+ \hspace{2.5cm} \texttt{'UMF'} \hspace{2.5cm} \texttt{'SLU'}
+ & \texttt{'UMF'}
+ & Local solver: ILU($p$), MILU($p$), ILU($p,t$), LU from UMFPACK, LU from SuperLU
+ (plus triangular solve). \\ \hline
+\verb|mld_sub_fillin_| & \verb|integer|
+ & Any~int.~num.~$\ge 0$
+ & 0
+ & Fill-in level $p$ of the incomplete LU factorizations. \\ \hline
+\verb|mld_sub_iluthrs_| & \verb|real(|\emph{kind\_parameter}\verb|)|
+ & Any~real~num.~$\ge 0$
+ & 0
+ & Drop tolerance $t$ in the ILU($p,t$) factorization. \\ \hline
+\verb|mld_sub_ren_| & \verb|character(len=*)|
+ & \texttt{'RENUM\_NONE'} \texttt{'RENUM\_GLOBAL'} %, \texttt{'RENUM_GPS'}
+ & \texttt{'RENUM\_NONE'}
+ & Row and column reordering of the local submatrices: no reordering,
+ reordering according to the global numbering of the rows and columns of
+ the whole matrix. \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Parameters defining the one-level preconditioner used as smoother.
+\label{tab:p_smoother}}
+\esideways
+
+\bsideways
+\begin{center}
+\begin{tabular}{|l|l|p{2.3cm}|p{2.6cm}|p{7cm}|}
+\hline
+\verb|what| & \textsc{data type} & \verb|val| & \textsc{default} &
+\textsc{comments} \\ \hline
+%\multicolumn{5}{|c|}{\emph{aggregation algorithm}} \\ \hline
+\verb|mld_aggr_alg_| & \verb|character(len=*)|
+ & \texttt{'DEC'}
+ & \texttt{'DEC'}
+ & Aggregation algorithm. Currently, only the decoupled aggregation is available. \\ \hline
+\verb|mld_aggr_kind_| & \verb|character(len=*)|
+ & \texttt{'SMOOTH'} \hspace{2.5cm} \texttt{'RAW'}
+ & \texttt{'SMOOTH'}
+ & Type of aggregation: smoothed, raw (i.e.\ using the tentative prolongator). \\ \hline
+\verb|mld_aggr_thresh_| & \verb|real(|\emph{kind\_parameter}\verb|)|
+ & Any~real~num. $\in [0, 1]$
+ & 0
+ & Threshold $\theta$ in the aggregation algorithm. \\ \hline
+\verb|mld_aggr_eig_| & \verb|character(len=*)|
+ & \texttt{'A\_NORMI'}
+ & \texttt{'A\_NORMI'}
+ & Estimate of the eigenvalue $D^{-1}A$ with largest modulus,
+ to build the damping parameter $\omega$ in the smoothed aggregation.
+ Currently, only the infinity norm of
+ the matrix is available. \\ \hline
+\verb|mld_aggr_damp_| & \verb|real(|\emph{kind\_parameter}\verb|)|
+ & Any~real~num.
+ & $4/(3||D^{-1}A||_\infty)$
+ & Damping parameter $\omega$ in the smoothed aggregation algorithm.
+ If the user specifies a negative value, then $\omega$
+ is set to its default value;
+ otherwise, $\omega$ is set to the value provided by the
+ user. In the latter case no estimate of the eigenvalue of
+ $D^{-1}A$ with largest modulus is computed.\\
+\hline
+\end{tabular}
+\end{center}
+\caption{Parameters defining the aggregation algorithm.
+\label{tab:p_aggregation}}
+\esideways
+
+\bsideways
+\begin{center}
+\begin{tabular}{|l|l|p{3.2cm}|l|p{7cm}|}
+\hline
+\verb|what| & \textsc{data type} & \verb|val| & \textsc{default} &
+\textsc{comments} \\ \hline
+%\multicolumn{5}{|c|}{\emph{coarse-space correction at the coarsest level}}\\ \hline
+\verb|mld_coarse_mat_| & \verb|character(len=*)|
+ & \texttt{'DISTR'} \hspace{2.5cm} \texttt{'REPL'}
+ & \texttt{'DISTR'}
+ & Coarsest matrix: distributed among the processors or
+ replicated on each of them. \\ \hline
+\verb|mld_coarse_solve_| & \verb|character(len=*)|
+ & \texttt{'BJAC'} \hspace{2.5cm} \texttt{'UMF'} \hspace{2.5cm}
+ \texttt{'SLU'} \hspace{2.5cm} \texttt{'SLUDIST'}
+ & \texttt{'BJAC'}
+ & Solver used at the coarsest level: block Jacobi, sequential
+ LU from UMFPACK, sequential LU from SuperLU,
+ distributed LU from SuperLU\_Dist.
+ \texttt{'BJAC'} and \texttt{'SLUDIST'} require the coarsest
+ matrix to be distributed, while \texttt{'UMF'} and
+ \texttt{'SLU'} require it to be replicated. \\ \hline
+\verb|mld_coarse_subsolve_| & \verb|character(len=*)|
+ & \texttt{'ILU'} \hspace{2.5cm} \texttt{'MILU'}
+ \hspace{2.5cm} \texttt{'ILUT'}
+ \hspace{2.5cm} \texttt{'UMF'} \hspace{2.5cm} \texttt{'SLU'}
+ & \texttt{'UMF'}
+ & Solver for the diagonal blocks of the coarse matrix,
+ in case the block Jacobi solver
+ is chosen as coarsest-level solver: ILU($p$), MILU($p$),
+ ILU($p,t$), LU from UMFPACK,
+ LU from SuperLU, plus triangular solve. \\ \hline
+\verb|mld_coarse_sweeps_|& \verb|integer|
+ & Any~int.~num.~$> 0$
+ & 4
+ & Number of Block-Jacobi sweeps when 'BJAC' is used as
+ coarsest-level solver. \\ \hline
+\verb|mld_coarse_fillin_| & \verb|integer|
+ & Any~int.~num.~$\ge 0$
+ & 0
+ & Fill-in level $p$ of the incomplete LU factorizations. \\ \hline
+\verb|mld_coarse_iluthrs_| & \verb|real(|\emph{kind\_parameter}\verb|)|
+ & Any~real.~num.~$\ge 0$
+ & 0
+ & Drop tolerance $t$ in the ILU($p,t$) factorization. \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Parameters defining the coarse-space correction at the coarsest
+level.\label{tab:p_coarse}}
+\esideways
+
+
+\clearpage
+
+\subsection{Subroutine mld\_precbld\label{sec:precbld}}
+
+\begin{center}
+\verb|mld_precbld(a,desc_a,p,info)|\\
+\end{center}
+
+\noindent
+This routine builds the preconditioner according to the requirements made by
+the user through the routines \verb|mld_precinit| and \verb|mld_precset|.
+
+{\vskip2\baselineskip\noindent\large\bfseries Arguments}
+
+\begin{tabular}{p{1.2cm}p{12cm}}
+\verb|a| & \verb|type(psb_|\emph{x}\verb|spmat_type), intent(in)|. \\
+ & The sparse matrix structure containing the local part of the
+ matrix to be preconditioned. Note that \emph{x} must be chosen according
+ to the real/complex,
+single/double precision version of MLD2P4 under use.
+ See the PSBLAS User's Guide for details \cite{PSBLASGUIDE}.\\
+\verb|desc_a| & \verb|type(psb_desc_type), intent(in)|. \\
+ & The communication descriptor of \verb|a|. See the PSBLAS User's Guide for
+ details \cite{PSBLASGUIDE}.\\
+\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(inout)|.\\
+ & The preconditioner data structure. Note that \emph{x} must be chosen according
+ to the real/complex, single/double precision version of MLD2P4 under use.\\
+\verb|info| & \verb|integer, intent(out)|.\\
+ & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} for details.\\
+\end{tabular}
+
+\clearpage
+\subsection{Subroutine mld\_precaply\label{sec:precaply}}
+
+\begin{center}
+\verb|mld_precaply(p,x,y,desc_a,info)|\\
+\verb|mld_precaply(p,x,y,desc_a,info,trans,work)|\\
+\end{center}
+
+\noindent
+This routine computes $y = op(M^{-1})\, x$, where $M$ is a previously built
+preconditioner, stored into \verb|p|, and $op$
+denotes the preconditioner itself or its transpose, according to
+the value of \verb|trans|.
+Note that, when MLD2P4 is used with a Krylov solver from PSBLAS,
+\verb|mld_precaply| is called within the PSBLAS routine \verb|mld_krylov|
+and hence it is completely transparent to the user.
+
+{\vskip2\baselineskip\noindent\large\bfseries Arguments}
+
+\begin{tabular}{p{1.2cm}p{12cm}}
+\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(inout)|.\\
+ & The preconditioner data structure, containing the local part of $M$.
+ Note that \emph{x} must be chosen according
+ to the real/complex, single/double precision version of MLD2P4 under use.\\
+\verb|x| & \emph{type}\verb|(|\emph{kind\_parameter}\verb|), dimension(:), intent(in)|.\\
+ & The local part of the vector $x$. Note that \emph{type} and
+ \emph{kind\_parameter} must be chosen according
+ to the real/complex, single/double precision version of MLD2P4 under use.\\
+\verb|y| & \emph{type}\verb|(|\emph{kind\_parameter}\verb|), dimension(:), intent(out)|.\\
+ & The local part of the vector $y$. Note that \emph{type} and
+ \emph{kind\_parameter} must be chosen according
+ to the real/complex, single/double precision version of MLD2P4 under use.\\
+\verb|desc_a| & \verb|type(psb_desc_type), intent(in)|. \\
+ & The communication descriptor associated to the matrix to be
+ preconditioned.\\
+\verb|info| & \verb|integer, intent(out)|.\\
+ & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} for details.\\
+\verb|trans| & \verb|character(len=1), optional, intent(in).|\\
+ & If \verb|trans| = \verb|'N','n'| then $op(M^{-1}) = M^{-1}$;
+ if \verb|trans| = \verb|'T','t'| then $op(M^{-1}) = M^{-T}$
+ (transpose of $M^{-1})$; if \verb|trans| = \verb|'C','c'| then $op(M^{-1}) = M^{-C}$
+ (conjugate transpose of $M^{-1})$.\\
+\verb|work| & \emph{type}\verb|(|\emph{kind\_parameter}\verb|), dimension(:), optional, target|.\\
+ & Workspace. Its size should be at
+ least \verb|4 * psb_cd_get_local_| \verb|cols(desc_a)| (see the PSBLAS User's Guide).
+ Note that \emph{type} and \emph{kind\_parameter} must be chosen according
+ to the real/complex, single/double precision version of MLD2P4 under use.\\
+\end{tabular}
+
+\clearpage
+
+\subsection{Subroutine mld\_precfree\label{sec:precfree}}
+
+\begin{center}
+\verb|mld_precfree(p,info)|\\
+\end{center}
+
+\noindent
+This routine deallocates the preconditioner data structure.
+
+{\vskip2\baselineskip\noindent\large\bfseries Arguments}
+
+\begin{tabular}{p{1.2cm}p{10.5cm}}
+\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(inout)|.\\
+ & The preconditioner data structure. Note that \emph{x} must be chosen according
+ to the real/complex, single/double precision version of MLD2P4 under use.\\
+\verb|info| & \verb|integer, intent(out)|.\\
+ & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} for details.\\
+\end{tabular}
+
+\clearpage
+
+\subsection{Subroutine mld\_precdescr\label{sec:precdescr}}
+
+\begin{center}
+\verb|mld_precdescr(p,info)|\\
+\verb|mld_precdescr(p,info,iout)|\\
+\end{center}
+
+\noindent
+This routine prints a description of the preconditioner to the standard output or
+to a file. It must be called after \verb|mld_precbld| has been called.
+
+{\vskip2\baselineskip\noindent\large\bfseries Arguments}
+
+\begin{tabular}{p{1.2cm}p{12cm}}
+\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(in)|.\\
+ & The preconditioner data structure. Note that \emph{x} must be chosen according
+ to the real/complex, single/double precision version of MLD2P4 under use.\\
+\verb|info| & \verb|integer, intent(out)|.\\
+ & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} for details.\\
+\verb|iout| & \verb|integer, intent(in), optional|.\\
+ & The id of the file where the preconditioner description
+ will be printed; the default is the standard output.\\
+\end{tabular}
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "userguide"
+%%% End:
|