Docs updates.

7 months ago · 7e0347281e
parent 5eb4f55406
commit 7e0347281e
34 changed files with 5827 additions and 5827 deletions
--- a/docs/html/index.html
+++ b/docs/html/index.html
@ -23,7 +23,7 @@ class="pplb7t-">Alfredo Buttari </span><br
 class="newline" /><span 
 class="pplb7t-">Fabio Durastante  </span><br 
 class="newline" />Software version: 3.9.0<br 
-class="newline" />June 9th, 2025
+class="newline" />December 23rd, 2025
                                                                  

                                                                  
--- a/docs/html/psblas.png
+++ b/docs/html/psblas.png
--- a/docs/html/userhtml.html
+++ b/docs/html/userhtml.html
@ -23,7 +23,7 @@ class="pplb7t-">Alfredo Buttari </span><br
 class="newline" /><span 
 class="pplb7t-">Fabio Durastante  </span><br 
 class="newline" />Software version: 3.9.0<br 
-class="newline" />June 9th, 2025
+class="newline" />December 23rd, 2025
                                                                  

                                                                  
--- a/docs/html/userhtml22x.png
+++ b/docs/html/userhtml22x.png
--- a/docs/html/userhtml23x.png
+++ b/docs/html/userhtml23x.png
--- a/docs/html/userhtml24x.png
+++ b/docs/html/userhtml24x.png
--- a/docs/html/userhtml33x.png
+++ b/docs/html/userhtml33x.png
--- a/docs/html/userhtml6.html
+++ b/docs/html/userhtml6.html
@ -11,7 +11,7 @@
 </head><body 
 >
         <div class="footnote-text">
-  <!--l. 208--><p class="indent" >       <span class="footnote-mark"><a 
+  <!--l. 209--><p class="indent" >       <span class="footnote-mark"><a 
 id="fn1x0"><a 
 id="x6-4003x2"></a>    <sup class="textsuperscript">1</sup></a></span><span 
 class="pplr7t-x-x-80">In our prototype implementation we provide sample scatter/gather routines.</span></div>
--- a/docs/html/userhtml7.html
+++ b/docs/html/userhtml7.html
@ -11,7 +11,7 @@
 </head><body 
 >
  <div class="footnote-text">
-  <!--l. 252--><p class="noindent" ><span class="footnote-mark"><a 
+  <!--l. 251--><p class="noindent" ><span class="footnote-mark"><a 
 id="fn2x0"><a 
 id="x7-5002x2.1"></a>    <sup class="textsuperscript">2</sup></a></span><span 
 class="pplr7t-x-x-80">This is the normal situation when the pattern of the sparse matrix is symmetric, which is equivalent to</span>
--- a/docs/html/userhtml8.html
+++ b/docs/html/userhtml8.html
@ -11,7 +11,7 @@
 </head><body 
 >
  <div class="footnote-text">
-  <!--l. 420--><p class="noindent" ><span class="footnote-mark"><a 
+  <!--l. 419--><p class="noindent" ><span class="footnote-mark"><a 
 id="fn3x0"><a 
 id="x8-7020x3"></a>    <sup class="textsuperscript">3</sup></a></span><span 
 class="pplr7t-x-x-80">The subroutine style </span><span 
--- a/docs/html/userhtmlli1.html
+++ b/docs/html/userhtmlli1.html
@ -210,7 +210,7 @@ href="userhtmlse6.html#x12-930006.15" id="QQ2-12-122">psb_gefree &#8212; Frees a
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.16 <a 
 href="userhtmlse6.html#x12-940006.16" id="QQ2-12-123">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.17 <a 
-href="userhtmlse6.html#x12-950006.17" id="QQ2-12-124">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
+href="userhtmlse6.html#x12-950006.17" id="QQ2-12-124">psb_glob_to_loc &#8212; Global to local indices conversion</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.18 <a 
 href="userhtmlse6.html#x12-960006.18" id="QQ2-12-125">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.19 <a 
--- a/docs/html/userhtmlli2.html
+++ b/docs/html/userhtmlli2.html
@ -34,8 +34,8 @@ base toolkit to build much more sophisticated preconditioners which can be plugg
 seamlessly into the base solvers.
 <!--l. 20--><p class="indent" >   The software architecture allows us to offer support for many alternatives in the
 implementation, including usage of heterogeneous platforms, and computations
-performed on GPUs throuh CUDA. There is support for GPU computations through
-OpenACC, but it is at this time a highly experimental version; we plan to
+performed on GPUs throuh CUDA. There is also support for GPU computations
+through OpenACC, but it is at this time a highly experimental version; we plan to
 also look at using accelerators through OpenMP as support from compilers
 improves.
 <!--l. 28--><p class="indent" >   The project is lead by Salvatore Filippone; a number of people have been
@ -58,7 +58,7 @@ chronological order: <span class="obeylines-h">
   <br />Dario Pascucci</span>
                                                                  <div class="flushright" 
 >
-<!--l. 48--><p class="noindent" >
+<!--l. 49--><p class="noindent" >
 Salvatore Filippone<br />
                                                                  

@ -71,12 +71,12 @@ Fabio Durastante</div>
                                                                  

                                                                  
-   <!--l. 57--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 58--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse1.html" >next</a>] [<a 
 href="userhtmlli1.html" >prev</a>] [<a 
 href="userhtmlli1.html#tailuserhtmlli1.html" >prev-tail</a>] [<a 
 href="userhtmlli2.html" >front</a>] [<a 
 href="userhtml.html#userhtmlli2.html" >up</a>] </p></div>
-<!--l. 57--><p class="indent" >   <a 
+<!--l. 58--><p class="indent" >   <a 
 id="tailuserhtmlli2.html"></a>  
 </body></html> 
--- a/docs/html/userhtmlse1.html
+++ b/docs/html/userhtmlse1.html
@ -10,7 +10,7 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-   <!--l. 57--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 58--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse2.html" >next</a>] [<a 
 href="userhtmlli2.html" >prev</a>] [<a 
 href="userhtmlli2.html#tailuserhtmlli2.html" >prev-tail</a>] [<a 
@ -18,7 +18,7 @@ href="#tailuserhtmlse1.html">tail</a>] [<a
 href="userhtml.html#userhtmlse1.html" >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">1    </span> <a 
 id="x4-30001"></a>Introduction</h3>
-<!--l. 59--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of
+<!--l. 60--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of
 computationally intensive scientific applications, is designed to address parallel
 implementation of iterative solvers for sparse linear systems through the
 distributed memory paradigm. It includes routines for multiplying sparse
@ -27,30 +27,31 @@ diagonal entries, preprocessing sparse matrices, and contains additional
 routines for dense matrix operations. The current implementation of PSBLAS
 addresses a distributed memory execution model operating with message
 passing.
-<!--l. 70--><p class="indent" >   The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2008&#x00A0;<span class="cite">[<a 
+<!--l. 71--><p class="indent" >   The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2008&#x00A0;<span class="cite">[<a 
 href="userhtmlli3.html#Xmetcalf">17</a>]</span>
 programming language, with reuse and/or adaptation of existing Fortran&#x00A0;77 and
 Fortran&#x00A0;95 software, plus a handful of C routines.
-<!--l. 75--><p class="indent" >   The use of Fortran&#x00A0;2008 offers a number of advantages over Fortran&#x00A0;95, mostly
+<!--l. 76--><p class="indent" >   The use of Fortran&#x00A0;2008 offers a number of advantages over Fortran&#x00A0;95, mostly
 in the handling of requirements for evolution and adaptation of the library to new
 computing architectures and integration of new algorithms. For a detailed
 discussion of our design see&#x00A0;<span class="cite">[<a 
 href="userhtmlli3.html#XSparse03">11</a>]</span>; other works discussing advanced programming in
 Fortran&#x00A0;2008 include&#x00A0;<span class="cite">[<a 
 href="userhtmlli3.html#XDesPat:11">21</a>,&#x00A0;<a 
-href="userhtmlli3.html#XRouXiaXu:11">19</a>]</span>; sufficient support for Fortran&#x00A0;2008 is now available
-from many compilers, including recent versions of the GNU Fortran compiler from
-the Free Software Foundation, and the FLANG compiler from the LLVM
-project.
-<!--l. 87--><p class="indent" >   Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
+href="userhtmlli3.html#XRouXiaXu:11">19</a>]</span>; sufficient support for Fortran&#x00A0;2008 is now
+available from many compilers, including recent versions of the GNU Fortran
+compiler from the Free Software Foundation, the FLANG compiler from the
+LLVM project, and the Intel OneAPI compiler. The README file contains
+a list of compilers against which we have successfully tested the current
+release.
+<!--l. 91--><p class="indent" >   Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
 object-based design, with other languages; these have been advocated by a number
 of authors, e.g.&#x00A0;<span class="cite">[<a 
-href="userhtmlli3.html#Xmachiels">16</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
-management and interface overloading greatly enhance the usability of the PSBLAS
-subroutines. In this way, the library can take care of runtime memory requirements
-that are quite difficult or even impossible to predict at implementation or
-compilation time.
-<!--l. 97--><p class="indent" >   The presentation of the PSBLAS library follows the general structure of the
+href="userhtmlli3.html#Xmachiels">16</a>]</span>. The Fortran&#x00A0;95 facilities for dynamic memory management and
+interface overloading ensure that the library can take care of runtime memory
+requirements that are quite difficult or even impossible to predict at implementation
+or compilation time.
+<!--l. 99--><p class="indent" >   The presentation of the PSBLAS library follows the general structure of the
 proposal for serial Sparse BLAS&#x00A0;<span class="cite">[<a 
 href="userhtmlli3.html#Xsblas97">8</a>,&#x00A0;<a 
 href="userhtmlli3.html#Xsblas02">9</a>]</span>, which in its turn is based on the proposal for
@ -58,16 +59,15 @@ BLAS on dense matrices&#x00A0;<span class="cite">[<a
 href="userhtmlli3.html#XBLAS1">15</a>,&#x00A0;<a 
 href="userhtmlli3.html#XBLAS2">5</a>,&#x00A0;<a 
 href="userhtmlli3.html#XBLAS3">6</a>]</span>.
-<!--l. 102--><p class="indent" >   The applicability of sparse iterative solvers to many different areas causes
-some terminology problems because the same concept may be denoted
-through different names depending on the application area. The PSBLAS
-features presented in this document will be discussed referring to a finite
-difference discretization of a Partial Differential Equation (PDE). However,
-the scope of the library is wider than that: for example, it can be applied
-to finite element discretizations of PDEs, and even to different classes of
-problems such as nonlinear optimization, for example in optimal control
-problems.
-<!--l. 112--><p class="indent" >   The design of a solver for sparse linear systems is driven by many conflicting
+<!--l. 104--><p class="indent" >   The applicability of sparse iterative solvers to many different areas causes some
+terminology problems because the same concept may be denoted by different names
+depending on the application area. The PSBLAS features presented in this document
+will be discussed taking as a reference a finite difference discretization of a Partial
+Differential Equation (PDE). However, the scope of the library is wider than that: it
+can be applied to finite element and other discretizations of PDEs, and even to
+different classes of problems such as nonlinear optimization, for example in optimal
+control problems.
+<!--l. 114--><p class="indent" >   The design of a solver for sparse linear systems is driven by many conflicting
 objectives, such as limiting occupation of storage resources, exploiting regularities in
 the input data, exploiting hardware characteristics of the parallel platform. To
                                                                  
@ -88,12 +88,12 @@ applications.
                                                                  

                                                                  
-   <!--l. 129--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 131--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse2.html" >next</a>] [<a 
 href="userhtmlli2.html" >prev</a>] [<a 
 href="userhtmlli2.html#tailuserhtmlli2.html" >prev-tail</a>] [<a 
 href="userhtmlse1.html" >front</a>] [<a 
 href="userhtml.html#userhtmlse1.html" >up</a>] </p></div>
-<!--l. 129--><p class="indent" >   <a 
+<!--l. 131--><p class="indent" >   <a 
 id="tailuserhtmlse1.html"></a>  
 </body></html> 
--- a/docs/html/userhtmlse11.html
+++ b/docs/html/userhtmlse11.html
@ -464,11 +464,12 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
 <!--l. 158--><p class="noindent" >This subroutine is a driver implementig a Richardson iteration
   <div class="math-display" >
 <img 
-src="userhtml33x.png" alt="x   = M - 1(b - Ax )+ x ,
+src="userhtml33x.png" alt="x   = M - 1(b- Ax  )+ x ,
 k+1             k    k
 " class="math-display" ></div>
 <!--l. 159--><p class="nopar" > with the preconditioner operator <span 
-class="zplmr7m-">M </span>defined in the previous section.
+class="zplmr7m-">M </span>defined in section&#x00A0;<a 
+href="userhtmlse10.html#x16-13600010">10<!--tex4ht:ref: sec:precs --></a>.
 <!--l. 162--><p class="indent" >   The stopping criterion can take the following values:
     <dl class="description"><dt class="description">
     <!--l. 164--><p class="noindent" >
--- a/docs/html/userhtmlse2.html
+++ b/docs/html/userhtmlse2.html
@ -10,7 +10,7 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-   <!--l. 129--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 131--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse6.html" >next</a>] [<a 
 href="userhtmlse1.html" >prev</a>] [<a 
 href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a 
@ -18,7 +18,7 @@ href="#tailuserhtmlse2.html">tail</a>] [<a
 href="userhtml.html#userhtmlse2.html" >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">2    </span> <a 
 id="x5-40002"></a>General overview</h3>
-<!--l. 131--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for
+<!--l. 133--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for
 sparse linear systems on distributed memory parallel computers. The system
 coefficient matrix <span 
 class="zplmr7m-">A </span>must be square; it may be real or complex, nonsymmetric, and
@ -33,14 +33,14 @@ The ongoing discussion focuses on the Fortran&#x00A0;2008 layer immediately
 below the application layer. The serial parts of the computation on each
 process are executed through calls to the serial sparse BLAS subroutines. In a
 similar way, the inter-process message exchanges are encapsulated in an
-applicaiton layer that has been strongly inspired by the Basic Linear Algebra
+application layer that has been strongly inspired by the Basic Linear Algebra
 Communication Subroutines (BLACS) library&#x00A0;<span class="cite">[<a 
 href="userhtmlli3.html#XBLACS">7</a>]</span>. Usually there is no need to deal
 directly with MPI; however, in some cases, MPI routines are used directly
 to improve efficiency. For further details on our communication layer see
 Sec.&#x00A0;<a 
 href="userhtmlse7.html#x13-1060007">7<!--tex4ht:ref: sec:parenv --></a>.
-<!--l. 158--><p class="indent" >   <hr class="figure"><div class="figure" 
+<!--l. 160--><p class="indent" >   <hr class="figure"><div class="figure" 
 >
                                                                  

@ -52,8 +52,8 @@ href="userhtmlse7.html#x13-1060007">7<!--tex4ht:ref: sec:parenv --></a>.
                                                                  
 <div class="center" 
 >
-<!--l. 159--><p class="noindent" >
-<!--l. 161--><p class="noindent" ><img 
+<!--l. 161--><p class="noindent" >
+<!--l. 163--><p class="noindent" ><img 
 src="psblas.png" alt="PIC"  
 width="46" height="46" ></div>
 <br /> <div class="caption" 
@ -62,8 +62,8 @@ class="content">PSBLAS library components hierarchy.</span></div><!--tex4ht:labe
                                                                  

                                                                  
-<!--l. 167--><p class="indent" >   </div><hr class="endfigure">
-<!--l. 170--><p class="indent" >   The type of linear system matrices that we address typically arise in
+<!--l. 169--><p class="indent" >   </div><hr class="endfigure">
+<!--l. 172--><p class="indent" >   The type of linear system matrices that we address typically arise in
 the numerical solution of PDEs; in such a context, it is necessary to pay
 special attention to the structure of the problem from which the application
 originates. The nonzero pattern of a matrix arising from the discretization of a
@ -71,12 +71,12 @@ PDE is influenced by various factors, such as the shape of the domain, the
 discretization strategy, and the equation/unknown ordering. The matrix itself can be
 interpreted as the adjacency matrix of the graph associated with the discretization
 mesh.
-<!--l. 181--><p class="indent" >   The distribution of the coefficient matrix for the linear system is based on the
+<!--l. 183--><p class="indent" >   The distribution of the coefficient matrix for the linear system is based on the
 &#8220;owner computes&#8221; rule: the variable associated to each mesh point is assigned to a
 process that will own the corresponding row in the coefficient matrix and will
 carry out all related computations. This allocation strategy is equivalent to a
 partition of the discretization mesh into <span 
-class="pplri7t-">sub-domains</span>. Our library supports any
+class="pplri7t-">sub-domains</span>; our library supports any
 distribution that keeps together the coefficients of each matrix row; there are no
 other constraints on the variable assignment. This choice is consistent with
 simple data distributions such as <span class="obeylines-h"><span class="verb"><span 
@ -88,9 +88,10 @@ the literature, e.g. METIS&#x00A0;<span class="cite">[<a
 href="userhtmlli3.html#XMETIS">14</a>]</span>. Dense vectors conform to sparse matrices,
 that is, the entries of a vector follow the same distribution of the matrix
 rows.
-<!--l. 203--><p class="indent" >   We assume that the sparse matrix is built in parallel, where each process generates
-its own portion. We never require that the entire matrix be available on a single
-node. However, it is possible to hold the entire matrix in one process and distribute it
+<!--l. 204--><p class="indent" >   We assume that the sparse matrix is built in parallel, where each process generates
+its own portion: we never <span 
+class="pplri7t-">require </span>that the entire matrix be available on a single node.
+However, it is possible to hold the entire matrix in one process and distribute it
 explicitly<span class="footnote-mark"><a 
 href="userhtml6.html#fn1x0"><sup class="textsuperscript">1</sup></a></span><a 
 id="x5-4002f1"></a> ,
@ -98,10 +99,10 @@ even though the resulting memory bottleneck would make this option unattractive
 in most cases.
   <h4 class="subsectionHead"><span class="titlemark">2.1    </span> <a 
 id="x5-50002.1"></a>Basic Nomenclature</h4>
-<!--l. 215--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
+<!--l. 216--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
 memory machine is guided by the structure of the physical model, and specifically
 by the discretization mesh of the PDE.
-<!--l. 220--><p class="indent" >   Each point of the discretization mesh will have (at least) one associated
+<!--l. 221--><p class="indent" >   Each point of the discretization mesh will have (at least) one associated
 equation/variable, and therefore one index. We say that point <span 
 class="zplmr7m-">i </span><span 
 class="pplri7t-">depends </span>on point <span 
@ -117,56 +118,57 @@ class="pplri7t-">sub-domains </span>assigned
 to the parallel processes, we classify the points of a given sub-domain as
 following.
     <dl class="description"><dt class="description">
-     <!--l. 229--><p class="noindent" >
+     <!--l. 230--><p class="noindent" >
 <span 
 class="pplb7t-">Internal.</span> </dt><dd 
 class="description">
-     <!--l. 229--><p class="noindent" >An internal point of a given domain <span 
-class="pplri7t-">depends </span>only on points of the same
-     domain.  If  all  points  of  a  domain  are  assigned  to  one  process,  then
-     a  computational  step  (e.g.,  a  matrix-vector  product)  of  the  equations
+     <!--l. 230--><p class="noindent" >An internal point of a given sub-domain <span 
+class="pplri7t-">depends </span>only on points of the
+     same  sub-domain.  If  all  points  of  a  sub-domain  are  assigned  to  one
+     process, then a computational step (e.g., a matrix-vector product) of the
                                                                  

                                                                  
-     associated  with  the  internal  points  requires  no  data  items  from  other
-     domains and no communications.
+     equations associated with the internal points requires no data items from
+     other sub-domains and no communications.
     </dd><dt class="description">
     <!--l. 238--><p class="noindent" >
 <span 
 class="pplb7t-">Boundary.</span> </dt><dd 
 class="description">
-     <!--l. 238--><p class="noindent" >A  point  of  a  given  domain  is  a  boundary  point  if  it  <span 
-class="pplri7t-">depends  </span>on  points
-     belonging to other domains.
+     <!--l. 238--><p class="noindent" >A point of a given sub-domain is a boundary point if it <span 
+class="pplri7t-">depends </span>on points
+     belonging to other sub-domains.
     </dd><dt class="description">
-     <!--l. 242--><p class="noindent" >
+     <!--l. 241--><p class="noindent" >
 <span 
 class="pplb7t-">Halo.</span> </dt><dd 
 class="description">
-     <!--l. 242--><p class="noindent" >A halo point for a given domain is a point belonging to another domain
-     such that there is a boundary point which <span 
-class="pplri7t-">depends </span>on it. Whenever performing
-     a computational step, such as a matrix-vector product, the values associated
-     with halo points are requested from other domains. A boundary point of
-     a given domain is usually a halo point for some other domain<span class="footnote-mark"><a 
+     <!--l. 241--><p class="noindent" >A  halo  point  for  a  given  sub-domain  is  a  point  belonging  to  another
+     sub-domain  such  that  there  is  a  boundary  point  which  <span 
+class="pplri7t-">depends  </span>on  it.
+     Whenever performing a computational step, such as a matrix-vector product,
+     the values associated with halo points are requested from other sub-domains.
+     A boundary point of a given sub-domain is usually a halo point for some
+     other sub-domain<span class="footnote-mark"><a 
 href="userhtml7.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a 
 id="x5-5001f2"></a> ;
     therefore the cardinality of the boundary points set determines the amount
-     of data sent to other domains.
+     of data sent to other sub-domains.
     </dd><dt class="description">
-     <!--l. 255--><p class="noindent" >
+     <!--l. 254--><p class="noindent" >
 <span 
 class="pplb7t-">Overlap.</span> </dt><dd 
 class="description">
-     <!--l. 255--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any
-     operation  that  involves  an  overlap  point  has  to  be  replicated  for  each
+     <!--l. 254--><p class="noindent" >An overlap point is a boundary point assigned to multiple sub-domains.
+     Any operation that involves an overlap point has to be replicated for each
     assignment.</dd></dl>
-<!--l. 259--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
+<!--l. 258--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
 feature of Domain Decomposition Schwarz preconditioners which are the subject of
 related research work&#x00A0;<span class="cite">[<a 
 href="userhtmlli3.html#X2007c">4</a>,&#x00A0;<a 
 href="userhtmlli3.html#X2007d">3</a>]</span>.
-<!--l. 264--><p class="indent" >   We denote the sets of internal, boundary and halo points for a given subdomain
+<!--l. 263--><p class="indent" >   We denote the sets of internal, boundary and halo points for a given subdomain
 by <span 
 class="zplmr7y-"><img 
 src="zplmr7y-49.png" alt="I" class="x-x-49" /></span>, <span 
@ -203,7 +205,7 @@ class="zplmr7y-">|<img
 src="zplmr7y-48.png" alt="H" class="x-x-48" /></span><sub><span 
 class="zplmr7m-x-x-76">i</span></sub><span 
 class="zplmr7y-">|</span>.
-<!--l. 274--><p class="indent" >   <hr class="figure"><div class="figure" 
+<!--l. 273--><p class="indent" >   <hr class="figure"><div class="figure" 
 >
                                                                  

@ -215,8 +217,8 @@ class="zplmr7y-">|</span>.
                                                                  
 <div class="center" 
 >
-<!--l. 275--><p class="noindent" >
-<!--l. 278--><p class="noindent" ><img 
+<!--l. 274--><p class="noindent" >
+<!--l. 277--><p class="noindent" ><img 
 src="points.png" alt="PIC"  
 width="46" height="46" ></div>
 <br /> <div class="caption" 
@ -225,113 +227,113 @@ class="content">Point classfication.</span></div><!--tex4ht:label?: x5-5003r2 --
                                                                  

                                                                  
-<!--l. 284--><p class="indent" >   </div><hr class="endfigure">
-<!--l. 286--><p class="indent" >   This classification of mesh points guides the naming scheme that we adopted in
+<!--l. 283--><p class="indent" >   </div><hr class="endfigure">
+<!--l. 285--><p class="indent" >   This classification of mesh points guides the naming scheme that we adopted in
 the library internals and in the data structures. We explicitly note that &#8220;Halo&#8221; points
 are also often called &#8220;ghost&#8221; points in the literature.
   <h4 class="subsectionHead"><span class="titlemark">2.2    </span> <a 
 id="x5-60002.2"></a>Library contents</h4>
-<!--l. 295--><p class="noindent" >The PSBLAS library consists of various classes of subroutines:
+<!--l. 294--><p class="noindent" >The PSBLAS library consists of various classes of subroutines:
     <dl class="description"><dt class="description">
-     <!--l. 297--><p class="noindent" >
+     <!--l. 296--><p class="noindent" >
 <span 
 class="pplb7t-">Computational routines</span> </dt><dd 
 class="description">
-     <!--l. 297--><p class="noindent" >comprising:
+     <!--l. 296--><p class="noindent" >comprising:
         <ul class="itemize1">
         <li class="itemize">
-         <!--l. 299--><p class="noindent" >Sparse matrix by dense matrix product;
+         <!--l. 298--><p class="noindent" >Sparse matrix by dense matrix product;
         </li>
         <li class="itemize">
-         <!--l. 300--><p class="noindent" >Sparse triangular systems solution for block diagonal matrices;
+         <!--l. 299--><p class="noindent" >Sparse triangular systems solution for block diagonal matrices;
         </li>
         <li class="itemize">
-         <!--l. 302--><p class="noindent" >Vector and matrix norms;
+         <!--l. 301--><p class="noindent" >Vector and matrix norms;
         </li>
         <li class="itemize">
-         <!--l. 303--><p class="noindent" >Dense matrix sums;
+         <!--l. 302--><p class="noindent" >Dense matrix sums;
         </li>
         <li class="itemize">
-         <!--l. 304--><p class="noindent" >Dot products.</li></ul>
+         <!--l. 303--><p class="noindent" >Dot products.</li></ul>
     </dd><dt class="description">
-     <!--l. 306--><p class="noindent" >
+     <!--l. 305--><p class="noindent" >
 <span 
 class="pplb7t-">Communication routines</span> </dt><dd 
 class="description">
-     <!--l. 306--><p class="noindent" >handling halo and overlap communications;
+     <!--l. 305--><p class="noindent" >handling halo and overlap communications;
     </dd><dt class="description">
-     <!--l. 308--><p class="noindent" >
+     <!--l. 307--><p class="noindent" >
 <span 
 class="pplb7t-">Data management and auxiliary routines</span> </dt><dd 
 class="description">
-     <!--l. 308--><p class="noindent" >including:
+     <!--l. 307--><p class="noindent" >including:
         <ul class="itemize1">
         <li class="itemize">
-         <!--l. 310--><p class="noindent" >Parallel environment management
+         <!--l. 309--><p class="noindent" >Parallel environment management
         </li>
         <li class="itemize">
-         <!--l. 311--><p class="noindent" >Communication descriptors allocation;
+         <!--l. 310--><p class="noindent" >Communication descriptors allocation;
                                                                  

                                                                  
         </li>
         <li class="itemize">
-         <!--l. 312--><p class="noindent" >Dense and sparse matrix allocation;
+         <!--l. 311--><p class="noindent" >Dense and sparse matrix allocation;
         </li>
         <li class="itemize">
-         <!--l. 313--><p class="noindent" >Dense and sparse matrix build and update;
+         <!--l. 312--><p class="noindent" >Dense and sparse matrix build and update;
         </li>
         <li class="itemize">
-         <!--l. 314--><p class="noindent" >Sparse matrix and data distribution preprocessing.</li></ul>
+         <!--l. 313--><p class="noindent" >Sparse matrix and data distribution preprocessing.</li></ul>
     </dd><dt class="description">
-     <!--l. 316--><p class="noindent" >
+     <!--l. 315--><p class="noindent" >
 <span 
 class="pplb7t-">Preconditioner routines</span> </dt><dd 
 class="description">
-     <!--l. 316--><p class="noindent" >
+     <!--l. 315--><p class="noindent" >
     </dd><dt class="description">
-     <!--l. 317--><p class="noindent" >
+     <!--l. 316--><p class="noindent" >
 <span 
 class="pplb7t-">Iterative methods</span> </dt><dd 
 class="description">
-     <!--l. 317--><p class="noindent" >a subset of classical and Krylov subspace iterative methods</dd></dl>
-<!--l. 320--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined
+     <!--l. 316--><p class="noindent" >a subset of classical and Krylov subspace iterative methods</dd></dl>
+<!--l. 319--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined
 in the PSBLAS software package:
     <ul class="itemize1">
     <li class="itemize">
-     <!--l. 323--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 322--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_</span></span></span>
     </li>
     <li class="itemize">
-     <!--l. 325--><p class="noindent" >all data type names are suffixed by <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 324--><p class="noindent" >all data type names are suffixed by <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">_type</span></span></span>
     </li>
     <li class="itemize">
-     <!--l. 326--><p class="noindent" >all constants are suffixed by <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 325--><p class="noindent" >all constants are suffixed by <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">_</span></span></span>
     </li>
     <li class="itemize">
-     <!--l. 327--><p class="noindent" >all top-level subroutine names follow the rule <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 326--><p class="noindent" >all top-level subroutine names follow the rule <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_xxname</span></span></span> where <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">xx</span></span></span> can be
     either:
         <ul class="itemize2">
         <li class="itemize">
-         <!--l. 330--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 329--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ge</span></span></span>: the routine is related to dense data,
         </li>
         <li class="itemize">
-         <!--l. 331--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 330--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">sp</span></span></span>: the routine is related to sparse data,
         </li>
         <li class="itemize">
-         <!--l. 332--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 331--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">cd</span></span></span>: the routine is related to communication descriptor (see&#x00A0;<a 
 href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a>).</li></ul>
                                                                  

                                                                  
-     <!--l. 335--><p class="noindent" >For example the <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 334--><p class="noindent" >For example the <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geins</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdins</span></span></span> perform the same
@ -339,33 +341,33 @@ class="cmtt-10">psb_cdins</span></span></span> perform the same
 href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>) on dense matrices, sparse matrices and communication
     descriptors respectively. Interface overloading allows the usage of the same
     subroutine names for both real and complex data.</li></ul>
-<!--l. 342--><p class="noindent" >In the description of the subroutines, arguments or argument entries are classified
+<!--l. 341--><p class="noindent" >In the description of the subroutines, arguments or argument entries are classified
 as:
     <dl class="description"><dt class="description">
-     <!--l. 345--><p class="noindent" >
+     <!--l. 344--><p class="noindent" >
 <span 
 class="pplb7t-">global</span> </dt><dd 
 class="description">
-     <!--l. 345--><p class="noindent" >For  input  arguments,  the  value  must  be  the  same  on  all  processes
+     <!--l. 344--><p class="noindent" >For  input  arguments,  the  value  must  be  the  same  on  all  processes
     participating  in  the  subroutine  call;  for  output  arguments  the  value  is
     guaranteed to be the same.
     </dd><dt class="description">
-     <!--l. 348--><p class="noindent" >
+     <!--l. 347--><p class="noindent" >
 <span 
 class="pplb7t-">local</span> </dt><dd 
 class="description">
-     <!--l. 348--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
-<!--l. 350--><p class="noindent" >To finish our general description, we define a version string with the constant
+     <!--l. 347--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
+<!--l. 349--><p class="noindent" >To finish our general description, we define a version string with the constant
   <div class="math-display" >
 <img 
 src="userhtml0x.png" alt="psb_version_string_
 " class="math-display" ></div>
-<!--l. 352--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span 
+<!--l. 351--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">3.9.0</span></span></span>
-<!--l. 355--><p class="noindent" >
+<!--l. 354--><p class="noindent" >
   <h4 class="subsectionHead"><span class="titlemark">2.3    </span> <a 
 id="x5-70002.3"></a>Application structure</h4>
-<!--l. 358--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are
+<!--l. 357--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are
 created and exist with reference to a discretized space to which there corresponds
 an index space and a matrix sparsity pattern. As an example, consider a
 cell-centered finite-volume discretization of the Navier-Stokes equations on a
@ -375,13 +377,13 @@ class="zplmr7m-">n </span>is isomorphic to the set of cell centers,
 whereas the pattern of the associated linear system matrix is isomorphic to the
 adjacency graph imposed on the discretization mesh by the discretization
 stencil.
-<!--l. 368--><p class="indent" >   Thus the first order of business is to establish an index space, and this is done
+<!--l. 367--><p class="indent" >   Thus the first order of business is to establish an index space, and this is done
 with a call to <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall</span></span></span> in which we specify the size of the index space <span 
 class="zplmr7m-">n </span>and the
 allocation of the elements of the index space to the various processes making up the
 MPI (virtual) parallel machine.
-<!--l. 374--><p class="indent" >   The index space is partitioned among processes, and this creates a mapping from
+<!--l. 373--><p class="indent" >   The index space is partitioned among processes, and this creates a mapping from
 the &#8220;global&#8221; numbering 1<span 
 class="zplmr7m-">&#x2026;</span><span 
 class="zplmr7m-">n </span>to a numbering &#8220;local&#8221; to each process; each process <span 
@ -393,14 +395,14 @@ class="zplmr7m-x-x-60">i</span></sub></sub>, each element of which corresponds t
 element of 1<span 
 class="zplmr7m-">&#x2026;</span><span 
 class="zplmr7m-">n</span>. The user does not set explicitly this mapping; when the application
-needs to indicate to which element of the index space a certain item is related,
-such as the row and column index of a matrix coefficient, it does so in the
+needs to indicate to which element of the index space a certain item is related, such
+as the row and column index of a matrix coefficient, it usually does so in the
 &#8220;global&#8221; numbering, and the library will translate into the appropriate &#8220;local&#8221;
 numbering.
                                                                  

                                                                  
-<!--l. 384--><p class="indent" >   For a given index space 1<span 
+<!--l. 383--><p class="indent" >   For a given index space 1<span 
 class="zplmr7m-">&#x2026;</span><span 
 class="zplmr7m-">n </span>there are many possible associated topologies, i.e.
 many different discretization stencils; thus the description of the index space is not
@ -423,51 +425,51 @@ class="zplmr7m-">n</span><sub>col<sub>
 class="zplmr7m-x-x-60">i</span></sub></sub>,
 denoting elements of the index space that are <span 
 class="pplri7t-">not </span>assigned to process <span 
-class="zplmr7m-">i</span>; however the
-variables associated with them are needed to complete computations associated with
+class="zplmr7m-">i</span>; the variables
+associated with them are needed to complete computations associated with
 the sparse matrix <span 
 class="zplmr7m-">A</span>, and thus they have to be fetched from (neighbouring)
 processes. The descriptor of the index space is built exactly for the purpose
 of properly sequencing the communication steps required to achieve this
 objective.
-<!--l. 400--><p class="indent" >   A simple application structure will walk through the index space allocation,
+<!--l. 399--><p class="indent" >   A simple application structure will walk through the index space allocation,
 matrix/vector creation and linear system solution as follows:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x5-7002x1">
-     <!--l. 404--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 403--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_init</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7004x2">
-     <!--l. 405--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 404--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7006x3">
-     <!--l. 406--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 405--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geall</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7008x4">
-     <!--l. 408--><p class="noindent" >Loop over all local rows, generate matrix and vector entries, and insert
+     <!--l. 407--><p class="noindent" >Loop over all local rows, generate matrix and vector entries, and insert
     them with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geins</span></span></span>
     </li>
 <li 
  class="enumerate" id="x5-7010x5">
-     <!--l. 410--><p class="noindent" >Assemble the various entities:
+     <!--l. 409--><p class="noindent" >Assemble the various entities:
         <ol  class="enumerate2" >
 <li 
  class="enumerate" id="x5-7012x1">
-         <!--l. 412--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 411--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdasb</span></span></span>,
         </li>
 <li 
  class="enumerate" id="x5-7014x2">
-         <!--l. 413--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 412--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spasb</span></span></span>,
                                                                  

@ -475,12 +477,12 @@ class="cmtt-10">psb_spasb</span></span></span>,
         </li>
 <li 
  class="enumerate" id="x5-7016x3">
-         <!--l. 414--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 413--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geasb</span></span></span>;</li></ol>
     </li>
 <li 
  class="enumerate" id="x5-7018x6">
-     <!--l. 416--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 415--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%set</span></span></span>, and build it with
     <span class="obeylines-h"><span class="verb"><span 
@ -490,39 +492,39 @@ href="userhtml8.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a
     </li>
 <li 
  class="enumerate" id="x5-7022x7">
-     <!--l. 421--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 420--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_krylov</span></span></span>
     with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">bicgstab</span></span></span>.</li></ol>
-<!--l. 424--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span 
+<!--l. 423--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">test/pargen/</span></span></span>.
-<!--l. 427--><p class="indent" >   For a simulation in which the same discretization mesh is used over multiple
+<!--l. 426--><p class="indent" >   For a simulation in which the same discretization mesh is used over multiple
 time steps, the following structure may be more appropriate:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x5-7024x1">
-     <!--l. 430--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 429--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_init</span></span></span>
     </li>
 <li 
  class="enumerate" id="x5-7026x2">
-     <!--l. 431--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 430--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall</span></span></span>
     </li>
 <li 
  class="enumerate" id="x5-7028x3">
-     <!--l. 432--><p class="noindent" >Loop   over   the   topology   of   the   discretization   mesh   and   build   the
+     <!--l. 431--><p class="noindent" >Loop   over   the   topology   of   the   discretization   mesh   and   build   the
     descriptor with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdins</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7030x4">
-     <!--l. 434--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 433--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdasb</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7032x5">
-     <!--l. 435--><p class="noindent" >Allocate  the  sparse  matrices  and  dense  vectors  with;  <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 434--><p class="noindent" >Allocate  the  sparse  matrices  and  dense  vectors  with;  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spall</span></span></span> and
     <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geall</span></span></span>;
@ -532,34 +534,34 @@ class="cmtt-10">psb_geall</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7034x6">
-     <!--l. 437--><p class="noindent" >Loop over the time steps:
+     <!--l. 436--><p class="noindent" >Loop over the time steps:
         <ol  class="enumerate2" >
 <li 
  class="enumerate" id="x5-7036x1">
-         <!--l. 439--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span 
+         <!--l. 438--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_sprn</span></span></span>;
         also zero out the dense vectors;
         </li>
 <li 
  class="enumerate" id="x5-7038x2">
-         <!--l. 442--><p class="noindent" >Loop  over  the  mesh,  generate  the  coefficients  and  insert/update
+         <!--l. 441--><p class="noindent" >Loop  over  the  mesh,  generate  the  coefficients  and  insert/update
         them with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geins</span></span></span>;
         </li>
 <li 
  class="enumerate" id="x5-7040x3">
-         <!--l. 444--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span 
+         <!--l. 443--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geasb</span></span></span>;
         </li>
 <li 
  class="enumerate" id="x5-7042x4">
-         <!--l. 445--><p class="noindent" >
+         <!--l. 444--><p class="noindent" >
         </li>
 <li 
  class="enumerate" id="x5-7044x5">
-         <!--l. 445--><p class="noindent" >Choose   the   preconditioner   to   be   used   with   <span class="obeylines-h"><span class="verb"><span 
+         <!--l. 444--><p class="noindent" >Choose   the   preconditioner   to   be   used   with   <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%init</span></span></span> and
         <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%set</span></span></span>, and build it with <span class="obeylines-h"><span class="verb"><span 
@ -567,21 +569,21 @@ class="cmtt-10">prec%build</span></span></span>;
         </li>
 <li 
  class="enumerate" id="x5-7046x6">
-         <!--l. 448--><p class="noindent" >Call  one  of  the  iterative  drivers  with  the  method  of  choice,  e.g.
+         <!--l. 447--><p class="noindent" >Call  one  of  the  iterative  drivers  with  the  method  of  choice,  e.g.
         <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_krylov</span></span></span> with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">bicgstab</span></span></span>.</li></ol>
     </li></ol>
-<!--l. 452--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
+<!--l. 451--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
 called on the data that is actually allocated to the current process, i.e. each process
 generates its own data.
-<!--l. 457--><p class="indent" >   In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span 
+<!--l. 456--><p class="indent" >   In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span>, nor is there a
 requirement to build a matrix row in its entirety before calling the routine; this
 allows the application programmer to walk through the discretization mesh element
 by element, generating the main part of a given matrix row but also contributions to
 the rows corresponding to neighbouring elements.
-<!--l. 464--><p class="indent" >   From a functional point of view it is even possible to execute one call for each
+<!--l. 463--><p class="indent" >   From a functional point of view it is even possible to execute one call for each
 nonzero coefficient; however this would have a substantial computational
 overhead. It is therefore advisable to pack a certain amount of data into each
 call to the insertion routine, say touching on a few tens of rows; the best
@ -595,23 +597,23 @@ process and pass it in a single call to <span class="obeylines-h"><span class="v
 class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a
 doubling of memory occupation, and thus would be almost always far from
 optimal.
-<!--l. 477--><p class="noindent" >
+<!--l. 476--><p class="noindent" >
   <h5 class="subsubsectionHead"><span class="titlemark">2.3.1    </span> <a 
 id="x5-80002.3.1"></a>User-defined index mappings</h5>
-<!--l. 479--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
+<!--l. 478--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
 constraints outlined in sec.&#x00A0;<a 
 href="#x5-70002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x5-8002x1">
-     <!--l. 482--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span 
+     <!--l. 481--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span 
 class="zplmr7m-">&#x2026;</span><span 
 class="zplmr7m-">n</span><sub>row<sub><span 
 class="zplmr7m-x-x-60">i</span></sub></sub>;
     </li>
 <li 
  class="enumerate" id="x5-8004x2">
-     <!--l. 484--><p class="noindent" >The set of halo points must be mapped to the set <span 
+     <!--l. 483--><p class="noindent" >The set of halo points must be mapped to the set <span 
 class="zplmr7m-">n</span><sub>row<sub><span 
 class="zplmr7m-x-x-60">i</span></sub></sub> <span 
 class="zplmr7t-">+ </span>1<span 
@ -619,14 +621,14 @@ class="zplmr7m-">&#x2026;</span><span
 class="zplmr7m-">n</span><sub>col<sub>
 <span 
 class="zplmr7m-x-x-60">i</span></sub></sub>;</li></ol>
-<!--l. 487--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
+<!--l. 486--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
 consistency of this mapping; some errors may be caught by the library, but
 this is not guaranteed. The application structure to support this usage is as
 follows:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x5-8006x1">
-     <!--l. 493--><p class="noindent" >Initialize                                                                                                       index
+     <!--l. 492--><p class="noindent" >Initialize                                                                                                       index
     space with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the
     vectors <span class="obeylines-h"><span class="verb"><span 
@ -636,7 +638,7 @@ class="cmtt-10">lidx(:)</span></span></span> containing the corresponding local
     </li>
 <li 
  class="enumerate" id="x5-8008x2">
-     <!--l. 498--><p class="noindent" >Add  the  halo  points  <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 497--><p class="noindent" >Add  the  halo  points  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ja(:)</span></span></span> and  their  associated  local  indices  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">lidx(:)</span></span></span>
     with a(some) call(s) to <span class="obeylines-h"><span class="verb"><span 
@ -644,7 +646,7 @@ class="cmtt-10">psb_cdins(nz,ja,desc,info,lidx=lidx)</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-8010x3">
-     <!--l. 501--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 500--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdasb</span></span></span>;
     </li>
 <li 
@ -652,7 +654,7 @@ class="cmtt-10">psb_cdasb</span></span></span>;
                                                                  

                                                                  
-     <!--l. 502--><p class="noindent" >Build   the   sparse   matrices   and   vectors,   optionally   making   use   in
+     <!--l. 501--><p class="noindent" >Build   the   sparse   matrices   and   vectors,   optionally   making   use   in
     <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span> and  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geins</span></span></span> of  the  <span class="obeylines-h"><span class="verb"><span 
@ -661,41 +663,41 @@ class="cmtt-10">local</span></span></span> argument  specifying  that  the
 class="cmtt-10">ia</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
-<!--l. 509--><p class="noindent" >
+<!--l. 508--><p class="noindent" >
   <h4 class="subsectionHead"><span class="titlemark">2.4    </span> <a 
 id="x5-90002.4"></a>Programming model</h4>
-<!--l. 511--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
+<!--l. 510--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
 programming model: each process participating in the computation performs the
 same actions on a chunk of data. Parallelism is thus data-driven.
-<!--l. 516--><p class="indent" >   Because of this structure, many subroutines coordinate their action across the
+<!--l. 515--><p class="indent" >   Because of this structure, many subroutines coordinate their action across the
 various processes, thus providing an implicit synchronization point, and therefore
 <span 
 class="pplri7t-">must </span>be called simultaneously by all processes participating in the computation. This
 is certainly true for the data allocation and assembly routines, for all the
 computational routines and for some of the tools routines.
-<!--l. 524--><p class="indent" >   However there are many cases where no synchronization, and indeed no
-communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a 
-href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a>
-are only acting on the local data structures, and thus may be called independently.
-The most important case is that of the coefficient insertion routines: since the number
-of coefficients in the sparse and dense matrices varies among the processors, and
-since the user is free to choose an arbitrary order in builiding the matrix entries,
-these routines cannot imply a synchronization.
-<!--l. 534--><p class="indent" >   Throughout this user&#8217;s guide each subroutine will be clearly indicated
+<!--l. 523--><p class="indent" >   However there are cases where no synchronization, and indeed no communication
+among processes, is implied; for instance, all the routines in sec.&#x00A0;<a 
+href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a> are only acting on
+the local data structures, and thus may be called independently. The most important
+case is that of the coefficient insertion routines: since the number of coefficients in the
+sparse and dense matrices varies among the processors, and since the user is free to
+choose an arbitrary order in builiding the matrix entries, these routines cannot imply
+a synchronization.
+<!--l. 533--><p class="indent" >   Throughout this user&#8217;s guide each subroutine will be clearly indicated
 as:
     <dl class="description"><dt class="description">
-     <!--l. 537--><p class="noindent" >
+     <!--l. 536--><p class="noindent" >
 <span 
 class="pplb7t-">Synchronous:</span> </dt><dd 
 class="description">
-     <!--l. 537--><p class="noindent" >must  be  called  simultaneously  by  all  the  processes  in  the  relevant
+     <!--l. 536--><p class="noindent" >must  be  called  simultaneously  by  all  the  processes  in  the  relevant
     communication context;
     </dd><dt class="description">
-     <!--l. 539--><p class="noindent" >
+     <!--l. 538--><p class="noindent" >
 <span 
 class="pplb7t-">Asynchronous:</span> </dt><dd 
 class="description">
-     <!--l. 539--><p class="noindent" >may be called in a totally independent manner.</dd></dl>
+     <!--l. 538--><p class="noindent" >may be called in a totally independent manner.</dd></dl>
                                                                  

                                                                  
--- a/docs/html/userhtmlse3.html
+++ b/docs/html/userhtmlse3.html
@ -60,8 +60,8 @@ class="pplb7t-">psb</span><span
 class="pplb7t-">_epk</span><span 
 class="pplb7t-">_</span> </dt><dd 
 class="description">
-     <!--l. 28--><p class="noindent" >Kind parameter for 8-bytes integer data, as is always used by the <code class="lstinline"><span style="color:#000000">sizeof</span></code>
-     methods;
+     <!--l. 28--><p class="noindent" >Kind  parameter  for  8-bytes  integer  data,  as  is  always  returned  by  the
+     <code class="lstinline"><span style="color:#000000">sizeof</span></code> methods;
     </dd><dt class="description">
     <!--l. 30--><p class="noindent" >
 <span 
@ -94,15 +94,15 @@ documentation.
 <!--l. 48--><p class="noindent" >
   <h4 class="subsectionHead"><span class="titlemark">3.1    </span> <a 
 id="x9-110003.1"></a>Descriptor data structure</h4>
-<!--l. 50--><p class="noindent" >All the general matrix informations and elements to be exchanged among processes
-are stored within a data structure of the type <a 
+<!--l. 50--><p class="noindent" >All the general matrix information and the identification of elements to be
+exchanged among processes are stored within a data structure of the type
+<a 
 id="descdata"></a><span 
 class="cmtt-10">psb</span><span 
 class="cmtt-10">_desc</span><span 
-class="cmtt-10">_type</span>. Every structure of this
-type is associated with a discretization pattern and enables data communications
-and other operations that are necessary for implementing the various algorithms of
-interest to us.
+class="cmtt-10">_type</span>. Every structure of this type is associated with a discretization
+pattern and enables data communications and other operations that are necessary
+for implementing the various algorithms of interest to us.
 <!--l. 57--><p class="indent" >   The data structure itself <code class="lstinline"><span style="color:#000000">psb_desc_type</span></code> can be treated as an opaque object
 handled via the tools routines of Sec.&#x00A0;<a 
 href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a> or the query routines detailed below;
--- a/docs/html/userhtmlse4.html
+++ b/docs/html/userhtmlse4.html
@ -3129,7 +3129,7 @@ class="zplmr7m-">x </span>and
 class="zplmr7m-">y</span>
   <div class="math-display" >
 <img 
-src="userhtml22x.png" alt="dot &#x2190; x(i)y(i).
+src="userhtml22x.png" alt="y(i) &#x2190; x(i)y(i).
 " class="math-display" ></div>
 <!--l. 1249--><p class="nopar" >
 <!--l. 1251--><p class="indent" >   <code class="lstinline"><span style="color:#000000">psb_gemlt</span><span style="color:#000000">(</span><span style="color:#000000">x</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">y</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">desc_a</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">info</span><span style="color:#000000">)</span></code>
@ -3314,7 +3314,7 @@ class="zplmr7m-">x </span>and
 class="zplmr7m-">y</span>
   <div class="math-display" >
 <img 
-src="userhtml23x.png" alt="/ &#x2190;  x(i)/y(i).
+src="userhtml23x.png" alt="y(i) &#x2190; x(i)/y(i).
 " class="math-display" ></div>
 <!--l. 1316--><p class="nopar" >
 <!--l. 1318--><p class="indent" >   <code class="lstinline"><span style="color:#000000">psb_gediv</span><span style="color:#000000">(</span><span style="color:#000000">x</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">y</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">desc_a</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">info</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">[</span><span style="color:#000000">flag</span><span style="color:#000000">)</span></code>
@ -3516,7 +3516,7 @@ class="zplmr7m-">x </span>and puts it into
 class="zplmr7m-">y</span>
   <div class="math-display" >
 <img 
-src="userhtml24x.png" alt="/ &#x2190;  1/x(i).
+src="userhtml24x.png" alt="y(i) &#x2190; 1/x(i).
 " class="math-display" ></div>
 <!--l. 1388--><p class="nopar" >
 <!--l. 1390--><p class="indent" >   <code class="lstinline"><span style="color:#000000">psb_geinv</span><span style="color:#000000">(</span><span style="color:#000000">x</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">y</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">desc_a</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">info</span><span style="color:#000000">,</span><span style="color:#000000"> </span><span style="color:#000000">[</span><span style="color:#000000">flag</span><span style="color:#000000">)</span></code>
--- a/docs/html/userhtmlse6.html
+++ b/docs/html/userhtmlse6.html
--- a/docs/html/userhtmlse7.html
+++ b/docs/html/userhtmlse7.html
--- a/docs/html/userhtmlse8.html
+++ b/docs/html/userhtmlse8.html
@ -18,10 +18,10 @@ href="userhtmlse5.html#tailuserhtmlse8.html">tail</a>] [<a
 href="userhtml.html#userhtmlse11.html" >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">8    </span> <a 
 id="x14-1240008"></a>Error handling</h3>
-<!--l. 5--><p class="noindent" >The PSBLAS library error handling policy has been completely rewritten in version
-2.0. The idea behind the design of this new error handling strategy is to keep error
+<!--l. 5--><p class="noindent" >The PSBLAS library error handling policy has been defined at the time version 2.0
+was written. The idea behind the design of error handling strategy is to keep error
 messages on a stack allowing the user to trace back up to the point where the first
-error message has been generated. Every routine in the PSBLAS-2.0 library has, as
+error message has been generated. Every routine in the PSBLAS library has, as
 last non-optional argument, an integer <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">info</span></span></span> variable; whenever, inside the
 routine, an error is detected, this variable is set to a value corresponding to a
@ -38,16 +38,16 @@ execution.
 <!--l. 23--><p class="indent" >   Figure&#x00A0;<a 
 href="#x14-124025r5">5<!--tex4ht:ref: fig:routerr --></a> shows the layout of a generic <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_foo</span></span></span> routine with respect to the
-PSBLAS-2.0 error handling policy. It is possible to see how, whenever an error
-condition is detected, the <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">info</span></span></span> variable is set to the corresponding error code which
-is, then, pushed on top of the stack by means of the <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_errpush</span></span></span>. An error condition
-may be directly detected inside a routine or indirectly checking the error code
-returned returned by a called routine. Whenever an error is encountered, after it has
-been pushed on stack, the program execution skips to a point where the error
-condition is handled; the error condition is handled either by returning control to the
-caller routine or by calling the <span class="obeylines-h"><span class="verb"><span 
+PSBLAS error handling policy. It is possible to see how, whenever an error condition
+is detected, the <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">info</span></span></span> variable is set to the corresponding error code which is, then,
+pushed on top of the stack by means of the <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_errpush</span></span></span>. An error condition may be
+directly detected inside a routine or indirectly checking the error code returned
+returned by a called routine. Whenever an error is encountered, after it has been
+pushed on stack, the program execution skips to a point where the error condition is
+handled; the error condition is handled either by returning control to the caller
+routine or by calling the <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb\_error</span></span></span> routine which prints the content of
 the error stack and aborts the program execution, according to the choice
 made by the user with <span class="obeylines-h"><span class="verb"><span 
@ -292,14 +292,13 @@ error handling policy.</span></div><!--tex4ht:label?: x14-124025r5 -->
                                                                  
   </div><hr class="endfloat" />
 <!--l. 112--><p class="indent" >   Figure&#x00A0;<a 
-href="#x14-124026r6">6<!--tex4ht:ref: fig:errormsg --></a> reports a sample error message generated by the PSBLAS-2.0
-library. This error has been generated by the fact that the user has chosen the
-invalid &#8220;FOO&#8221; storage format to represent the sparse matrix. From this
-error message it is possible to see that the error has been detected inside
-the <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_cest</span></span></span> subroutine called by <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_spasb</span></span></span> ... by process 0 (i.e. the root
-process).
+href="#x14-124026r6">6<!--tex4ht:ref: fig:errormsg --></a> reports a sample error message generated by the PSBLAS library. This
+error has been generated by the fact that the user has chosen the invalid &#8220;FOO&#8221;
+storage format to represent the sparse matrix. From this error message it is possible
+to see that the error has been detected inside the <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cest</span></span></span> subroutine called by
+<span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spasb</span></span></span> ... by process 0 (i.e. the root process).
                                                                  

                                                                  
@ -331,8 +330,8 @@ Aborting...
 <!--l. 156--><p class="nopar" >                                                              </div></div>
 </div>
 <br /> <div class="caption" 
-><span class="id">Listing 6: </span><span  
-class="content">A sample PSBLAS-3.0 error message. Process 0 detected an error
+><span class="id">Listing  6:  </span><span  
+class="content">A  sample  PSBLAS  error  message.  Process  0  detected  an  error
 condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x14-124026r6 -->
                                                                  

--- a/docs/psblas-3.9.pdf
+++ b/docs/psblas-3.9.pdf
--- a/docs/src/datastruct.tex
+++ b/docs/src/datastruct.tex
@ -26,7 +26,7 @@ defined in the library as follows:
 \item[psb\_mpk\_] Kind parameter for 4-bytes integer data, as is
  always used by MPI; 
 \item[psb\_epk\_] Kind parameter for 8-bytes integer data, as is
-  always used by the \fortinline|sizeof| methods;
+  always returned by the \fortinline|sizeof| methods;
 \item[psb\_ipk\_] Kind parameter for ``local'' integer indices and data;
  with default build options this is a 4 bytes integer;
 \item[psb\_lpk\_] Kind parameter for ``global'' integer indices and data;
@ -47,9 +47,9 @@ developer's documentation.

 \subsection{Descriptor data structure}
 \label{sec:desc}
-All the general matrix informations and elements to be
-exchanged among processes are stored within a data structure of the
-type \hypertarget{descdata}{{\tt psb\_desc\_type}}. 
+All the general matrix information and the identification of elements
+to be exchanged among processes are stored within a data structure of
+the type \hypertarget{descdata}{{\tt psb\_desc\_type}}. 
 Every structure of this type is associated with a discretization
 pattern and enables data communications and other operations that are
 necessary for implementing the various algorithms of interest to us. 
--- a/docs/src/error.tex
+++ b/docs/src/error.tex
@ -2,11 +2,11 @@

 \section{Error handling\label{sec:errors}}

-The PSBLAS library error handling policy has been completely rewritten
-in version 2.0. The idea behind the design of this new error handling
-strategy is to keep error messages on a stack allowing the user to
-trace back up to the point where the first error message has been
-generated. Every routine in the PSBLAS-2.0 library has, as last
+The PSBLAS library error handling policy has been defined at the time 
+version 2.0 was written. The idea behind the design of  error
+handling strategy is to keep error messages on a stack allowing the
+user to trace back up to the point where the first error message has
+been generated. Every routine in the PSBLAS library has, as last
 non-optional argument, an integer \verb|info| variable; whenever,
 inside the routine, an error is detected, this variable is set to a
 value corresponding to a specific error code. Then this error code is
@ -21,7 +21,7 @@ levels of nested calls until the level where the user decides to abort
 the program execution.

 Figure~\ref{fig:routerr} shows the layout of a generic \verb|psb_foo|
-routine with respect to the PSBLAS-2.0 error handling policy. It is
+routine with respect to the PSBLAS error handling policy. It is
 possible to see how, whenever an error condition is detected, the
 \verb|info| variable is set to the corresponding error code which is,
 then, pushed on top of the stack by means of the
@ -110,7 +110,7 @@ end subroutine psb_foo


 Figure~\ref{fig:errormsg} reports a sample error message generated by
-the PSBLAS-2.0 library. This error has been generated by the fact that
+the PSBLAS library. This error has been generated by the fact that
 the user has chosen the invalid ``FOO'' storage format to represent
 the sparse matrix. From this error message it is possible to see that
 the error has been detected inside the \verb|psb_cest| subroutine
@ -161,7 +161,7 @@ Aborting...
    \fbox{\TheSbox}
  \end{center}
 \fi
-  \caption{\label{fig:errormsg}A sample PSBLAS-3.0 error
+  \caption{\label{fig:errormsg}A sample PSBLAS error
    message. Process 0 detected an error condition inside the {\textrm
    psb\_cest} subroutine}
 \end{listing}
--- a/docs/src/figures/psblas.eps
+++ b/docs/src/figures/psblas.eps
@ -1,10 +1,9 @@
 %!PS-Adobe-3.0 EPSF-3.0
 %%Title: psblas.fig
-%%Creator: fig2dev Version 3.2 Patchlevel 5d
-%%CreationDate: Thu Dec 15 14:55:15 2011
-%%For: sfilippo@donald (Salvatore Filippone)
-%%BoundingBox: 0 0 197 215
-%Magnification: 0.5000
+%%Creator: fig2dev Version 3.2.9a
+%%CreationDate: 2025-12-23 13:28:52
+%%BoundingBox: 0 0 194 215
+%%Magnification: 0.5000
 %%EndComments
 %%BeginProlog
 /$F2psDict 200 dict def
@ -12,37 +11,7 @@ $F2psDict begin
 $F2psDict /mtrx matrix put
 /col-1 {0 setgray} bind def
 /col0 {0.000 0.000 0.000 srgb} bind def
-/col1 {0.000 0.000 1.000 srgb} bind def
-/col2 {0.000 1.000 0.000 srgb} bind def
-/col3 {0.000 1.000 1.000 srgb} bind def
-/col4 {1.000 0.000 0.000 srgb} bind def
-/col5 {1.000 0.000 1.000 srgb} bind def
-/col6 {1.000 1.000 0.000 srgb} bind def
 /col7 {1.000 1.000 1.000 srgb} bind def
-/col8 {0.000 0.000 0.560 srgb} bind def
-/col9 {0.000 0.000 0.690 srgb} bind def
-/col10 {0.000 0.000 0.820 srgb} bind def
-/col11 {0.530 0.810 1.000 srgb} bind def
-/col12 {0.000 0.560 0.000 srgb} bind def
-/col13 {0.000 0.690 0.000 srgb} bind def
-/col14 {0.000 0.820 0.000 srgb} bind def
-/col15 {0.000 0.560 0.560 srgb} bind def
-/col16 {0.000 0.690 0.690 srgb} bind def
-/col17 {0.000 0.820 0.820 srgb} bind def
-/col18 {0.560 0.000 0.000 srgb} bind def
-/col19 {0.690 0.000 0.000 srgb} bind def
-/col20 {0.820 0.000 0.000 srgb} bind def
-/col21 {0.560 0.000 0.560 srgb} bind def
-/col22 {0.690 0.000 0.690 srgb} bind def
-/col23 {0.820 0.000 0.820 srgb} bind def
-/col24 {0.500 0.190 0.000 srgb} bind def
-/col25 {0.630 0.250 0.000 srgb} bind def
-/col26 {0.750 0.380 0.000 srgb} bind def
-/col27 {1.000 0.500 0.500 srgb} bind def
-/col28 {1.000 0.630 0.630 srgb} bind def
-/col29 {1.000 0.750 0.750 srgb} bind def
-/col30 {1.000 0.880 0.880 srgb} bind def
-/col31 {1.000 0.840 0.000 srgb} bind def

 end

@ -53,6 +22,7 @@ end
 /sa {save} bind def
 /rs {restore} bind def
 /l {lineto} bind def
+/rl {rlineto} bind def
 /m {moveto} bind def
 /rm {rmoveto} bind def
 /n {newpath} bind def
@ -77,14 +47,15 @@ end
  bind def
 /shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
  4 -2 roll mul srgb} bind def
+/xfig_image {image Data flushfile} def
 /$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
 /$F2psEnd {$F2psEnteredState restore end} def

 /pageheader {
-save
-newpath 0 215 moveto 0 0 lineto 197 0 lineto 197 215 lineto closepath clip newpath
-5.3 221.5 translate
-1 -1 scale
+sa
+n 0 215 m 0 0 l 194 0 l 194 215 l cp clip
+-5.3 221.5 tr
+1 -1 sc
 $F2psBegin
 10 setmiterlimit
 0 slj 0 slc
@ -99,7 +70,7 @@ pageheader
 %
 % Fig objects follow
 %
-% 
+%% 
 % here starts figure with depth 50
 /Times-Roman ff 396.88 scf sf
 540 6210 m
@ -115,15 +86,13 @@ n 2025 225 m 1800 225 1800 1350 225 arcto 4 {pop} repeat
  1800 1575 4275 1575 225 arcto 4 {pop} repeat
  4500 1575 4500 450 225 arcto 4 {pop} repeat
  4500 225 2025 225 225 arcto 4 {pop} repeat
- cp gs col0 s gr 
-% Polyline
+ cp gs col0 s gr % Polyline
 gs  clippath
-3180 1765 m 3180 1560 l 3120 1560 l 3120 1765 l 3120 1765 l 3150 1615 l 3180 1765 l cp
+3143 1568 m 3157 1568 l 3180 1765 l 3150 1735 l 3120 1765 l cp
 eoclip
 n 3150 2970 m
 3150 1575 l gs col0 s gr gr
-
-% arrowhead
+%% arrowhead
 15.000 slw
 n 3180 1765 m 3150 1615 l 3120 1765 l 3150 1735 l 3180 1765 l 
 cp gs 0.00 setgray ef gr  col0 s
@ -133,24 +102,20 @@ n 2100 2925 m 1845 2925 1845 4020 255 arcto 4 {pop} repeat
  1845 4275 4425 4275 255 arcto 4 {pop} repeat
  4680 4275 4680 3180 255 arcto 4 {pop} repeat
  4680 2925 2100 2925 255 arcto 4 {pop} repeat
- cp gs col0 s gr 
-% Polyline
+ cp gs col0 s gr % Polyline
 n 405 5670 m 180 5670 180 6750 225 arcto 4 {pop} repeat
  180 6975 2655 6975 225 arcto 4 {pop} repeat
  2880 6975 2880 5895 225 arcto 4 {pop} repeat
  2880 5670 405 5670 225 arcto 4 {pop} repeat
- cp gs col0 s gr 
-% Polyline
+ cp gs col0 s gr % Polyline
 n 2880 6300 m
- 3420 6300 l gs col0 s gr 
-% Polyline
+ 3420 6300 l gs col0 s gr % Polyline
 gs  clippath
-3180 4456 m 3180 4305 l 3120 4305 l 3120 4456 l 3120 4456 l 3150 4336 l 3180 4456 l cp
+3143 4313 m 3157 4313 l 3180 4456 l 3120 4456 l cp
 eoclip
 n 3150 6300 m
 3150 4320 l gs col0 s gr gr
-
-% arrowhead
+%% arrowhead
 n 3180 4456 m 3150 4336 l 3120 4456 l 3180 4456 l  cp gs 0.00 setgray ef gr  col0 s
 % Polyline
 15.000 slw
@ -167,8 +132,7 @@ n 3645 5625 m 3420 5625 3420 6795 225 arcto 4 {pop} repeat
  3420 7020 5895 7020 225 arcto 4 {pop} repeat
  6120 7020 6120 5850 225 arcto 4 {pop} repeat
  6120 5625 3645 5625 225 arcto 4 {pop} repeat
- cp gs col0 s gr 
-/Times-Roman ff 396.88 scf sf
+ cp gs col0 s gr /Times-Roman ff 396.88 scf sf
 2295 990 m
 gs 1 -1 sc (Application) col0 sh gr
 /Times-Roman ff 396.88 scf sf
@ -191,8 +155,8 @@ gs 1 -1 sc (Message Passing) col0 sh gr
 gs 1 -1 sc (MPI) col0 sh gr
 /Times-Roman ff 396.88 scf sf
 4050 2160 m
-gs 1 -1 sc (Fortran 2003) col0 sh gr
-% here ends figure;
+gs 1 -1 sc (Fortran 2008) col0 sh gr
+%% here ends figure;
 pagefooter
 showpage
 %%Trailer
--- a/docs/src/figures/psblas.fig
+++ b/docs/src/figures/psblas.fig
@ -1,15 +1,16 @@
-#FIG 3.2  Produced by xfig version 3.2.5b
+#FIG 3.2  Produced by xfig version 3.2.9
+#encoding: UTF-8
 Landscape
 Center
 Metric
-Letter  
+Letter
 50.00
 Single
 -2
 1200 2
 6 540 5940 2430 6660
-4 0 0 50 -1 0 25 0.0000 4 375 2250 540 6210 Serial Sparse\001
-4 0 0 50 -1 0 25 0.0000 4 285 1080 1080 6660 BLAS\001
+4 0 0 50 -1 0 25 0.0000 4 376 2163 540 6210 Serial Sparse\001
+4 0 0 50 -1 0 25 0.0000 4 288 1032 1080 6660 BLAS\001
 -6
 2 4 0 1 0 7 50 -1 -1 0.000 0 0 15 0 0 5
 	 4500 1575 1800 1575 1800 225 4500 225 4500 1575
@ -31,11 +32,11 @@ Single
 	 945 4995 5490 4995
 2 4 0 1 0 7 50 -1 -1 0.000 0 0 15 0 0 5
 	 6120 7020 3420 7020 3420 5625 6120 5625 6120 7020
-4 0 0 50 -1 0 25 0.0000 4 375 2010 2295 990 Application\001
-4 0 0 50 -1 0 25 0.0000 4 285 1665 2250 3735 PSBLAS \001
-4 0 0 50 -1 0 25 0.0000 4 285 1515 4050 2565 Interface\001
-4 0 0 50 -1 0 25 0.0000 4 285 885 4140 4860 Inner\001
-4 0 0 50 -1 0 25 0.0000 4 285 1515 4140 5310 Interface\001
-4 0 0 50 -1 0 25 0.0000 4 375 2910 3420 6120 Message Passing\001
-4 0 0 50 -1 0 25 0.0000 4 285 750 4275 6660 MPI\001
-4 0 0 50 -1 0 25 0.0000 4 285 2190 4050 2160 Fortran 2003\001
+4 0 0 50 -1 0 25 0.0000 4 376 1956 2295 990 Application\001
+4 0 0 50 -1 0 25 0.0000 4 288 1524 2250 3735 PSBLAS \001
+4 0 0 50 -1 0 25 0.0000 4 288 1466 4050 2565 Interface\001
+4 0 0 50 -1 0 25 0.0000 4 280 873 4140 4860 Inner\001
+4 0 0 50 -1 0 25 0.0000 4 288 1466 4140 5310 Interface\001
+4 0 0 50 -1 0 25 0.0000 4 376 2818 3420 6120 Message Passing\001
+4 0 0 50 -1 0 25 0.0000 4 276 730 4275 6660 MPI\001
+4 0 0 50 -1 0 25 0.0000 4 288 2137 4050 2160 Fortran 2008\001
--- a/docs/src/figures/psblas.pdf
+++ b/docs/src/figures/psblas.pdf
--- a/docs/src/figures/psblas.png
+++ b/docs/src/figures/psblas.png
--- a/docs/src/intro.tex
+++ b/docs/src/intro.tex
@ -21,12 +21,13 @@ The software architecture allows us to offer support for many
 alternatives in the implementation, including usage of
 heterogeneous platforms, and computations performed on GPUs throuh
 CUDA. 
-There is support for GPU computations through OpenACC, but it is at
-this time a highly experimental version; we plan to also look at using 
-accelerators through OpenMP as support from compilers improves.
+There is also support for GPU computations through OpenACC, but it is
+at this time a highly experimental version; we plan to also look at
+using  accelerators through OpenMP as support from compilers improves. 

-The project is lead by Salvatore Filippone; a number of people have been contributing to this package over the
-years; contributors in roughly reverse chronological order:
+The project is lead by Salvatore Filippone; a number of people have
+been contributing to this package over the years; contributors in
+roughly reverse chronological order: 
 \begin{obeylines}
 Luca       Pepè Sciarria
 Theophane  Loloum
@ -81,18 +82,19 @@ works discussing advanced programming in Fortran~2008
 include~\cite{DesPat:11,RouXiaXu:11}; sufficient support for
 Fortran~2008 is now available from many compilers, including recent
 versions of the GNU Fortran compiler from the Free Software
-Foundation, and the FLANG compiler from the LLVM project.  
+Foundation, the FLANG compiler from the LLVM project, and the Intel
+OneAPI compiler.
+The README file contains a list of compilers against which we have
+successfully tested the current release. 


 Previous approaches have been based on mixing Fortran~95, with its
 support for object-based design, with other languages; these have
 been advocated by a number of authors, 
-e.g.~\cite{machiels}.  Moreover, the Fortran~95 facilities for dynamic
-memory management and interface overloading greatly enhance the
-usability of the PSBLAS 
-subroutines. In this way, the library can take care of runtime memory
-requirements that are quite difficult or even impossible to predict at
-implementation or compilation time.  
+e.g.~\cite{machiels}.  The Fortran~95 facilities for dynamic
+memory management and interface overloading ensure that the library
+can take care of runtime memory requirements that are quite difficult
+or even impossible to predict at implementation or compilation time.  

 The presentation of the
 PSBLAS library follows the general structure of the proposal for
@ -101,13 +103,13 @@ proposal for BLAS on dense matrices~\cite{BLAS1,BLAS2,BLAS3}.

 The applicability of sparse iterative solvers to many different areas
 causes some terminology problems because the same concept may be
-denoted through different names depending on the application area. The
-PSBLAS features presented in this document will be discussed referring
-to a   finite difference discretization of a Partial Differential
-Equation (PDE). However, the scope of the library is wider than
-that: for example, it can be applied to finite element discretizations
-of PDEs, and even to different classes of problems such as nonlinear
-optimization, for example in optimal control problems.
+denoted by different names depending on the application area. The
+PSBLAS features presented in this document will be discussed taking as
+a reference a finite difference discretization of a Partial
+Differential Equation (PDE). However, the scope of the library is
+wider than that: it can be applied to finite element and other 
+discretizations of PDEs, and even to different classes of problems
+such as nonlinear optimization, for example in optimal control problems.

 The design of a solver for sparse linear systems is driven by many
 conflicting objectives, such as limiting occupation of storage
@ -145,7 +147,7 @@ application layer.
 The serial parts of the computation on each process are executed through
 calls to the serial sparse BLAS subroutines. 
 In a similar way, the inter-process message exchanges are encapsulated
-in an applicaiton layer that has been strongly inspired by the  Basic
+in an application layer that has been strongly inspired by the  Basic
 Linear Algebra Communication Subroutines (BLACS) library~\cite{BLACS}.  
 Usually  there is no need to deal directly with MPI;  however, in some
 cases, MPI routines are used directly to improve efficiency. For
@ -184,10 +186,9 @@ the variable associated to each mesh point is assigned to a process
 that will  own the corresponding row in the coefficient matrix and
 will  carry out all related computations. This allocation strategy 
 is equivalent to a partition of the discretization mesh into {\em
-sub-domains}. 
-Our library  supports any distribution that keeps together 
-the coefficients of each matrix row; there are no other constraints on
-the variable assignment. 
+sub-domains};  our library  supports any distribution that keeps
+together  the coefficients of each matrix row; there are no other
+constraints on the variable assignment. 
 This choice is consistent with simple  data distributions 
 %commonly used in ScaLAPACK 
 such as  \verb|CYCLIC(N)| and \verb|BLOCK|, 
@ -201,8 +202,8 @@ matrices, that is, the entries of a vector follow the same distribution
 of the matrix rows.  

 We assume that the sparse matrix is built in parallel, where each
-process generates its own portion. We never require that the entire
-matrix be available on a single node. However, it is possible
+process generates its own portion: we never \emph{require} that the
+entire matrix be available on a single node. However, it is possible
 to hold the entire matrix in one process and distribute it
 explicitly\footnote{In our prototype implementation  we provide 
 sample scatter/gather routines.}, even though  the resulting memory 
@ -227,33 +228,31 @@ assigned to the parallel processes,
 we classify the  points of a given sub-domain as following.
 \begin{description}
 \item[Internal.] An internal point of
- a given domain {\em depends} only on  points of the
-same domain. 
-If all points of a domain are assigned to one
-process, then a computational step (e.g., a
-matrix-vector product) of the 
-equations associated with the internal points  requires no data
-items from other domains and no communications.
-
-\item[Boundary.] A point of
-a given domain is a boundary point if it {\em depends} on  points
-belonging to other domains.
-
-\item[Halo.] A halo point for a given domain is a point belonging to
-another domain such that there is a boundary point which {\em depends\/}
+ a given sub-domain {\em depends} only on  points of the
+same sub-domain. 
+If all points of a sub-domain are assigned to one
+process, then a computational step (e.g., a matrix-vector product) of
+the  equations associated with the internal points  requires no data 
+items from other sub-domains and no communications.
+
+\item[Boundary.] A point of a given sub-domain is a boundary point if
+  it {\em depends} on  points belonging to other sub-domains.
+
+\item[Halo.] A halo point for a given sub-domain is a point belonging to
+another sub-domain such that there is a boundary point which {\em depends\/}
 on it. Whenever performing a computational step, such as a
 matrix-vector product, the values associated with halo points are
-requested from other domains. A boundary point of a given 
-domain is usually a halo point for some other domain\footnote{This is
+requested from other sub-domains. A boundary point of a given 
+sub-domain is usually a halo point for some other sub-domain\footnote{This is
  the normal situation when the pattern of the sparse matrix is
  symmetric, which is equivalent to say that the interaction between
  two variables is reciprocal. If the matrix pattern is non-symmetric
  we may have one-way interactions, and these could cause a situation
  in which a boundary point is not a halo point for its neighbour.}; therefore
 the cardinality of the boundary points set determines the amount of data
- sent to other domains. 
+ sent to other sub-domains. 
 \item[Overlap.] An overlap point is a boundary point assigned to
-multiple domains. Any operation that involves an overlap point
+multiple sub-domains. Any operation that involves an overlap point
 has to be replicated for each assignment. 
 \end{description}
 Overlap points do not usually exist in the basic data
@ -378,7 +377,7 @@ $1\dots n_{\hbox{row}_i}$, each element of which corresponds to a certain
 element of $1\dots n$. The user does not set explicitly this mapping;
 when the application needs to indicate to which element of the index
 space a certain item is related, such as the row and column index of a
-matrix coefficient, it does so in the ``global'' numbering, and the
+matrix coefficient, it usually does so in the ``global'' numbering, and the
 library will translate into the appropriate ``local'' numbering. 

 For  a given index space $1\dots n$ there are many possible associated
@ -390,7 +389,7 @@ with a call to \verb|psb_cdasb| and a sparse matrix with a call to
 \verb|psb_spasb|. After \verb|psb_cdasb| each process $i$ will have
 defined a set of ``halo'' (or ``ghost'') indices
 $n_{\hbox{row}_i}+1\dots n_{\hbox{col}_i}$, denoting elements of the index
-space that are \emph{not} assigned to process $i$; however the
+space that are \emph{not} assigned to process $i$; the
 variables associated with them are needed to complete computations
 associated with the sparse matrix $A$, and thus they have to be
 fetched from (neighbouring) processes. The descriptor of the index
@ -521,7 +520,7 @@ computation. This is certainly true for the data allocation and
 assembly routines, for  all the computational routines and for some of
 the tools routines.

-However there are many cases where no synchronization, and indeed no
+However there are cases where no synchronization, and indeed no
 communication among processes, is implied; for instance, all the routines in
 sec.~\ref{sec:datastruct} are only acting on the local data structures,
 and thus may be called independently. The most important case is that
--- a/docs/src/methods.tex
+++ b/docs/src/methods.tex
@ -156,8 +156,8 @@ An integer value; 0 means no error has been detected.
  Richardson Iteration Driver    Routine}

 This subroutine is a driver implementig a Richardson iteration
-\[ x_{k+1} = M^-1 (b-Ax_k) +x_k,\] 
-with the preconditioner operator $M$ defined in the previous section. 
+\[ x_{k+1} = M^{-1} (b-Ax_k) +x_k,\] 
+with the preconditioner operator $M$ defined in section~\ref{sec:precs}. 

 The stopping criterion can take the following values:
 \begin{description}
--- a/docs/src/penv.tex
+++ b/docs/src/penv.tex
@ -94,12 +94,12 @@ Specified as: an integer variable.
 Scope: {\bf local}.\\
 Type: {\bf required}.\\
 Intent: {\bf out}.\\
-Specified as: an integer value. $-1 \le iam \le np-1$\
+Returned as: an integer value. $-1 \le iam \le np-1$\
 \item[np] Number of processes in the PSBLAS virtual parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
 Intent: {\bf out}.\\
-Specified as: an integer variable. \
+Returned as: an integer variable. \
 \end{description}


@ -153,8 +153,9 @@ Specified as: a logical  variable, default value: true.
  same program, or to enter and exit multiple times into the parallel
  environment, this routine may be called to 
  selectively close the contexts with \verb|close=.false.|, while on
-  the last call it should be called with \verb|close=.true.| to
-  shutdown in a clean way the entire parallel environment.
+  the last instance it should close in a clean way the entire
+  parallel environment with \verb|close=.true.|
+  
 \end{enumerate}


--- a/docs/src/psbrout.tex
+++ b/docs/src/psbrout.tex
@ -1246,7 +1246,7 @@ An integer value; 0 means no error has been detected.

 This function computes the entrywise product between two vectors $x$ and
 $y$
-\[dot \leftarrow x(i) y(i).\]
+\[y(i) \leftarrow x(i) y(i).\]

 \fortinline|psb_gemlt(x, y, desc_a, info)|

@ -1313,7 +1313,7 @@ $y$

 This function computes the entrywise division between two vectors $x$ and
 $y$
-\[/ \leftarrow x(i)/y(i).\]
+\[y(i) \leftarrow x(i)/y(i).\]

 \fortinline|psb_gediv(x, y, desc_a, info, [flag)|

@ -1385,7 +1385,7 @@ $y$

 This function computes the entrywise inverse of a vector $x$ and puts it into
 $y$
-\[/ \leftarrow 1/x(i).\]
+\[y(i) \leftarrow 1/x(i).\]

 \fortinline|psb_geinv(x, y, desc_a, info, [flag)|

--- a/docs/src/toolsrout.tex
+++ b/docs/src/toolsrout.tex
@ -598,7 +598,7 @@ An integer value; 0 means no error has been detected.
  state.
 \item The descriptor may be in either the build or assembled state.
 \item Providing a good estimate for the number of nonzeroes $nnz$ in
-  the assembled matrix may substantially improve performance in the
+  the assembled matrix may improve performance in the
  matrix build phase, as it will reduce or eliminate the need for
  (potentially multiple) data reallocations;
 \item Using \verb|psb_matbld_remote_| is likely to cause  a runtime  overhead at
@ -722,7 +722,7 @@ An integer value; 0 means no error has been detected.
  entirety to a   single call to this routine: the buildup of a row
  may be split into   as many calls as desired (even in the CSR format); 
 \item Coefficients from different rows may also be mixed up freely
-  in a single call, according to the application needs; 
+  in a single call (in COO format), according to the application needs; 
 \item Coefficients from matrix rows not owned by the calling
  process are treated according to the value of \verb|bldmode|
  specified at allocation time; if 
@ -1246,7 +1246,7 @@ An integer value; 0 means no error has been detected.
 %%   psb_glob_to_loc %%
 %
 \clearpage\subsection{psb\_glob\_to\_loc --- Global to local indices
-  convertion}
+  conversion}
 %\addcontentsline{toc}{subsection}{psb\_glob\_to\_loc}

 \begin{verbatim}
@ -1261,7 +1261,7 @@ call psb_glob_to_loc(x, desc_a, info, iact,owned)
 Scope: {\bf local} \\
 Type: {\bf required}\\
 Intent: {\bf in, inout}.\\
-Specified as: a rank one integer array.\\
+Specified as: a rank one integer array of global indices, i.e. \verb|psb_lpk_|.\\
 \item[desc\_a] the communication descriptor.\\
 Scope:{\bf local}.\\
 Type:{\bf required}.\\
@ -1292,7 +1292,7 @@ Intent: {\bf inout}.\\
 Specified as: a rank one integer array.
 \item[y] If $y$ is  present,
  then $y$ is overwritten with the translated integer indices, and $x$
-  is left unchanged. 
+  is left unchanged; since $y$ contains local indices it should use  \verb|psb_ipk_|. 
 Scope: {\bf global} \\
 Type: {\bf optional}\\
 Intent: {\bf out}.\\
@ -1325,7 +1325,8 @@ call psb_loc_to_glob(x, desc_a, info, iact)
 \begin{description}
 \item[Type:] Asynchronous.
 \item[\bf On Entry]
-\item[x] An integer vector of indices to be converted.\\
+\item[x] An integer vector of indices to be converted; if $y$ is present,
+ they are local indices, i.e. \verb|psb_ipk_| \\
 Scope: {\bf local} \\
 Type: {\bf required}\\
 Intent: {\bf in, inout}.\\
@ -1346,14 +1347,14 @@ Specified as: a character variable  \verb|I|gnore, \verb|W|arning or
 \begin{description}
 \item[\bf On Return]
 \item[x] If $y$ is not present,
-  then $x$ is overwritten with the translated integer indices. 
+  then $x$ is overwritten with the translated integer  global indices, i.e. \verb|psb_lpk_| 
 Scope: {\bf global} \\
 Type: {\bf required}\\
 Intent: {\bf inout}.\\
 Specified as: a rank one integer array.
-\item[y] If $y$ is not present,
-  then $y$ is overwritten with the translated integer indices, and $x$
-  is left unchanged. 
+\item[y] If $y$  not present,
+  then $y$ is overwritten with the translated global
+  indices i.e. \verb|psb_lpk_|, and $x$   is left unchanged. 
 Scope: {\bf global} \\
 Type: {\bf optional}\\
 Intent: {\bf out}.\\
@ -1773,7 +1774,7 @@ Specified as: a preconditioner data structure \precdata.
 \item[Function value] The memory occupation of the object specified in
  the calling sequence, in bytes.\\
 Scope: {\bf local} \\
-Returned  as: an \verb|integer(psb_long_int_k_)| number.
+Returned  as: an \verb|integer(psb_lpk_)| number.
 \end{description}


@ -1873,9 +1874,10 @@ the (sorted) value of $x$ in the original sequence.
  $O(n^2)$; of the other three, in the average case quicksort will be the 
  fastest and merge-sort the slowest. However note that:
  \begin{enumerate}
-  \item The the best case running time for insertion sort is $\Omega(n)$ while the average
-    and worst case  are $O(n^2)$; however for very short input sequences this is
-    likely to be the  fastest method;
+  \item The  best case running time for insertion sort is $\Omega(n)$
+    while the average and worst case  are $O(n^2)$; moreover, for
+    very short input sequences this is likely to be the  fastest
+    method; 
 \item The worst case running time for quicksort is $O(n^2)$; the algorithm
  implemented here follows the well-known median-of-three heuristics,
  but the worst case may still apply;
@ -1885,8 +1887,8 @@ the (sorted) value of $x$ in the original sequence.
  subsequences that may be already in the desired ordering prior to
  the subroutine call; this situation is relatively common when
  dealing with groups of indices of sparse matrix entries, thus
-  merge-sort is the preferred  choice when a sorting is needed
-  by other routines in the library. 
+  merge-sort is the preferred  choice when a sorting routine is needed 
+  for preprocessing matrix data.
 \end{enumerate}
 \end{enumerate}

--- a/docs/src/userguide.tex
+++ b/docs/src/userguide.tex
@ -136,7 +136,7 @@
 by Salvatore Filippone\\
 Alfredo Buttari \\
 Fabio Durastante}\\ 
-June 9th, 2025
+December 23rd, 2025
 \end{minipage}}
 }
 %\addtolength{\textwidth}{\centeroffset}
--- a/docs/src/userhtml.tex
+++ b/docs/src/userhtml.tex
@ -106,7 +106,7 @@ Fabio Durastante } \\
 %\today
 Software version: 3.9.0\\
 %\today
-June 9th, 2025
+December 23rd, 2025
 \cleardoublepage
 \begingroup
  \renewcommand*{\thepage}{toc}