Merge branch 'development' into cmake

1 year ago · a9685267a4
parent ee64f622d2 67b2937e6e
commit a9685267a4
20 changed files with 1780 additions and 1272 deletions
--- a/Make.inc.in
+++ b/Make.inc.in
@ -26,6 +26,7 @@ MPFC=@MPIFC@
 MPCC=@MPICC@
 FLINK=@FLINK@
 CLINK=@CLINK@
 LIBS=@LIBS@
 FLIBS=@FLIBS@
--- a/cbind/test/pdegen/Makefile
+++ b/cbind/test/pdegen/Makefile
@ -20,7 +20,8 @@ EXEDIR=./runs
 all: pdegen3dc 
 pdegen3dc: pdegen3dc.o
-	$(MPFC) pdegen3dc.o  -o pdegen3dc $(PSBC_LIBS) $(PSB_LIBS) $(PSBLDLIBS) -lm -lgfortran
+	$(FLINK) pdegen3dc.o  -o pdegen3dc $(PSBC_LIBS) $(PSB_LIBS)\
 	    $(PSBLDLIBS) -lm 
 	/bin/mv pdegen3dc $(EXEDIR)
--- a/2344
+++ b/2344
--- a/configure.ac
+++ b/configure.ac
@ -576,6 +576,7 @@ FDEFINES="$psblas_cv_define_prepend-DPSB_LPK${pac_cv_lpk_size} $FDEFINES";
 dnl CDEFINES="-DPSB_IPK${pac_cv_ipk_size} -DPSB_LPK${pac_cv_lpk_size} $CDEFINES"
 FLINK="$MPIFC"
 CLINK="$MPICC"	
 PAC_ARG_OPENMP()
 if test x"$pac_cv_openmp" == x"yes" ; then
   FDEFINES="$psblas_cv_define_prepend-DPSB_OPENMP $FDEFINES";
@ -585,6 +586,7 @@ if test x"$pac_cv_openmp" == x"yes" ; then
   CCOPT="$CCOPT $pac_cv_openmp_ccopt";
   CXXOPT="$CXXOPT $pac_cv_openmp_cxxopt";	
   FLINK="$FLINK $pac_cv_openmp_fcopt";
   CLINK="$CLINK $pac_cv_openmp_fcopt";	
 fi
 #
 # Tests for support of various Fortran features; some of them are critical,
@ -1004,6 +1006,7 @@ AC_SUBST(FIFLAG)
 AC_SUBST(FMFLAG)
 AC_SUBST(MODEXT)
 AC_SUBST(FLINK)
 AC_SUBST(CLINK)
 AC_SUBST(LIBS)
 AC_SUBST(FLIBS)
 AC_SUBST(AR)
--- a/docs/html/index.html
+++ b/docs/html/index.html
@ -23,7 +23,7 @@ class="pplb7t-">Alfredo Buttari </span><br
 class="newline" /><span 
 class="pplb7t-">Fabio Durastante  </span><br 
 class="newline" />Software version: 3.9.0<br 
-class="newline" />Jun 1st, 2025
+class="newline" />June 9th, 2025
--- a/docs/html/userhtml.html
+++ b/docs/html/userhtml.html
@ -23,7 +23,7 @@ class="pplb7t-">Alfredo Buttari </span><br
 class="newline" /><span 
 class="pplb7t-">Fabio Durastante  </span><br 
 class="newline" />Software version: 3.9.0<br 
-class="newline" />Jun 1st, 2025
+class="newline" />June 9th, 2025
--- a/docs/html/userhtml30x.png
+++ b/docs/html/userhtml30x.png
--- a/docs/html/userhtml34x.png
+++ b/docs/html/userhtml34x.png
--- a/docs/html/userhtml6.html
+++ b/docs/html/userhtml6.html
@ -11,7 +11,7 @@
 </head><body 
 >
         <div class="footnote-text">
-  <!--l. 209--><p class="indent" >       <span class="footnote-mark"><a 
+  <!--l. 208--><p class="indent" >       <span class="footnote-mark"><a 
 id="fn1x0"><a 
 id="x6-4003x2"></a>    <sup class="textsuperscript">1</sup></a></span><span 
 class="pplr7t-x-x-80">In our prototype implementation we provide sample scatter/gather routines.</span></div>
--- a/docs/html/userhtml7.html
+++ b/docs/html/userhtml7.html
@ -11,7 +11,7 @@
 </head><body 
 >
  <div class="footnote-text">
-  <!--l. 253--><p class="noindent" ><span class="footnote-mark"><a 
+  <!--l. 252--><p class="noindent" ><span class="footnote-mark"><a 
 id="fn2x0"><a 
 id="x7-5002x2.1"></a>    <sup class="textsuperscript">2</sup></a></span><span 
 class="pplr7t-x-x-80">This is the normal situation when the pattern of the sparse matrix is symmetric, which is equivalent to</span>
--- a/docs/html/userhtml8.html
+++ b/docs/html/userhtml8.html
@ -11,7 +11,7 @@
 </head><body 
 >
  <div class="footnote-text">
-  <!--l. 421--><p class="noindent" ><span class="footnote-mark"><a 
+  <!--l. 420--><p class="noindent" ><span class="footnote-mark"><a 
 id="fn3x0"><a 
 id="x8-7020x3"></a>    <sup class="textsuperscript">3</sup></a></span><span 
 class="pplr7t-x-x-80">The subroutine style </span><span 
--- a/docs/html/userhtmlli2.html
+++ b/docs/html/userhtmlli2.html
@ -41,8 +41,8 @@ improves.
 <!--l. 28--><p class="indent" >   The project is lead by Salvatore Filippone; a number of people have been
 contributing to this package over the years; contributors in roughly reverse
 chronological order: <span class="obeylines-h">
   <br />Luca Pepè Sciarria
   <br />Theophane Loloum
   <br />Fabio Durastante
   <br />Dimitri Walther
   <br />Andea Di Iorio
   <br />Stefano Petrilli
@ -50,7 +50,6 @@ chronological order: <span class="obeylines-h">
   <br />Zaak Beekman
   <br />Ambra Abdullahi Hassan
   <br />Pasqua D&#8217;Ambra
   <br />Alfredo Buttari
   <br />Daniela di Serafino
   <br />Michele Martone
   <br />Michele Colajanni
@ -59,11 +58,11 @@ chronological order: <span class="obeylines-h">
   <br />Dario Pascucci</span>
                                                                  <div class="flushright" 
 >
 <!--l. 48--><p class="noindent" >
 Salvatore Filippone<br />
 <!--l. 49--><p class="noindent" >
 Salvatore Filippone<br />
 Alfredo Buttari<br />
 Fabio Durastante</div>
@ -72,12 +71,12 @@ Fabio Durastante</div>
-   <!--l. 58--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 57--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse1.html" >next</a>] [<a 
 href="userhtmlli1.html" >prev</a>] [<a 
 href="userhtmlli1.html#tailuserhtmlli1.html" >prev-tail</a>] [<a 
 href="userhtmlli2.html" >front</a>] [<a 
 href="userhtml.html#userhtmlli2.html" >up</a>] </p></div>
-<!--l. 58--><p class="indent" >   <a 
+<!--l. 57--><p class="indent" >   <a 
 id="tailuserhtmlli2.html"></a>  
 </body></html> 
--- a/docs/html/userhtmlse1.html
+++ b/docs/html/userhtmlse1.html
@ -10,7 +10,7 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-   <!--l. 58--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 57--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse2.html" >next</a>] [<a 
 href="userhtmlli2.html" >prev</a>] [<a 
 href="userhtmlli2.html#tailuserhtmlli2.html" >prev-tail</a>] [<a 
@ -18,7 +18,7 @@ href="#tailuserhtmlse1.html">tail</a>] [<a
 href="userhtml.html#userhtmlse1.html" >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">1    </span> <a 
 id="x4-30001"></a>Introduction</h3>
-<!--l. 60--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of
+<!--l. 59--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of
 computationally intensive scientific applications, is designed to address parallel
 implementation of iterative solvers for sparse linear systems through the
 distributed memory paradigm. It includes routines for multiplying sparse
@ -27,11 +27,11 @@ diagonal entries, preprocessing sparse matrices, and contains additional
 routines for dense matrix operations. The current implementation of PSBLAS
 addresses a distributed memory execution model operating with message
 passing.
-<!--l. 71--><p class="indent" >   The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2008&#x00A0;<span class="cite">[<a 
+<!--l. 70--><p class="indent" >   The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2008&#x00A0;<span class="cite">[<a 
 href="userhtmlli3.html#Xmetcalf">17</a>]</span>
 programming language, with reuse and/or adaptation of existing Fortran&#x00A0;77 and
 Fortran&#x00A0;95 software, plus a handful of C routines.
-<!--l. 76--><p class="indent" >   The use of Fortran&#x00A0;2008 offers a number of advantages over Fortran&#x00A0;95, mostly
+<!--l. 75--><p class="indent" >   The use of Fortran&#x00A0;2008 offers a number of advantages over Fortran&#x00A0;95, mostly
 in the handling of requirements for evolution and adaptation of the library to new
 computing architectures and integration of new algorithms. For a detailed
 discussion of our design see&#x00A0;<span class="cite">[<a 
@ -42,7 +42,7 @@ href="userhtmlli3.html#XRouXiaXu:11">19</a>]</span>; sufficient support for Fort
 from many compilers, including recent versions of the GNU Fortran compiler from
 the Free Software Foundation, and the FLANG compiler from the LLVM
 project.
-<!--l. 88--><p class="indent" >   Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
+<!--l. 87--><p class="indent" >   Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
 object-based design, with other languages; these have been advocated by a number
 of authors, e.g.&#x00A0;<span class="cite">[<a 
 href="userhtmlli3.html#Xmachiels">16</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
@ -50,7 +50,7 @@ management and interface overloading greatly enhance the usability of the PSBLAS
 subroutines. In this way, the library can take care of runtime memory requirements
 that are quite difficult or even impossible to predict at implementation or
 compilation time.
-<!--l. 98--><p class="indent" >   The presentation of the PSBLAS library follows the general structure of the
+<!--l. 97--><p class="indent" >   The presentation of the PSBLAS library follows the general structure of the
 proposal for serial Sparse BLAS&#x00A0;<span class="cite">[<a 
 href="userhtmlli3.html#Xsblas97">8</a>,&#x00A0;<a 
 href="userhtmlli3.html#Xsblas02">9</a>]</span>, which in its turn is based on the proposal for
@ -58,7 +58,7 @@ BLAS on dense matrices&#x00A0;<span class="cite">[<a
 href="userhtmlli3.html#XBLAS1">15</a>,&#x00A0;<a 
 href="userhtmlli3.html#XBLAS2">5</a>,&#x00A0;<a 
 href="userhtmlli3.html#XBLAS3">6</a>]</span>.
-<!--l. 103--><p class="indent" >   The applicability of sparse iterative solvers to many different areas causes
+<!--l. 102--><p class="indent" >   The applicability of sparse iterative solvers to many different areas causes
 some terminology problems because the same concept may be denoted
 through different names depending on the application area. The PSBLAS
 features presented in this document will be discussed referring to a finite
@ -67,7 +67,7 @@ the scope of the library is wider than that: for example, it can be applied
 to finite element discretizations of PDEs, and even to different classes of
 problems such as nonlinear optimization, for example in optimal control
 problems.
-<!--l. 113--><p class="indent" >   The design of a solver for sparse linear systems is driven by many conflicting
+<!--l. 112--><p class="indent" >   The design of a solver for sparse linear systems is driven by many conflicting
 objectives, such as limiting occupation of storage resources, exploiting regularities in
 the input data, exploiting hardware characteristics of the parallel platform. To
@ -88,12 +88,12 @@ applications.
-   <!--l. 130--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 129--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse2.html" >next</a>] [<a 
 href="userhtmlli2.html" >prev</a>] [<a 
 href="userhtmlli2.html#tailuserhtmlli2.html" >prev-tail</a>] [<a 
 href="userhtmlse1.html" >front</a>] [<a 
 href="userhtml.html#userhtmlse1.html" >up</a>] </p></div>
-<!--l. 130--><p class="indent" >   <a 
+<!--l. 129--><p class="indent" >   <a 
 id="tailuserhtmlse1.html"></a>  
 </body></html> 
--- a/docs/html/userhtmlse2.html
+++ b/docs/html/userhtmlse2.html
@ -10,7 +10,7 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-   <!--l. 130--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 129--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse6.html" >next</a>] [<a 
 href="userhtmlse1.html" >prev</a>] [<a 
 href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a 
@ -18,7 +18,7 @@ href="#tailuserhtmlse2.html">tail</a>] [<a
 href="userhtml.html#userhtmlse2.html" >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">2    </span> <a 
 id="x5-40002"></a>General overview</h3>
-<!--l. 132--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for
+<!--l. 131--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for
 sparse linear systems on distributed memory parallel computers. The system
 coefficient matrix <span 
 class="zplmr7m-">A </span>must be square; it may be real or complex, nonsymmetric, and
@ -40,7 +40,7 @@ directly with MPI; however, in some cases, MPI routines are used directly
 to improve efficiency. For further details on our communication layer see
 Sec.&#x00A0;<a 
 href="userhtmlse7.html#x13-1060007">7<!--tex4ht:ref: sec:parenv --></a>.
-<!--l. 159--><p class="indent" >   <hr class="figure"><div class="figure" 
+<!--l. 158--><p class="indent" >   <hr class="figure"><div class="figure" 
 >
@ -52,8 +52,8 @@ href="userhtmlse7.html#x13-1060007">7<!--tex4ht:ref: sec:parenv --></a>.
 <div class="center" 
 >
-<!--l. 160--><p class="noindent" >
+<!--l. 159--><p class="noindent" >
-<!--l. 162--><p class="noindent" ><img 
+<!--l. 161--><p class="noindent" ><img 
 src="psblas.png" alt="PIC"  
 width="46" height="46" ></div>
 <br /> <div class="caption" 
@ -62,8 +62,8 @@ class="content">PSBLAS library components hierarchy.</span></div><!--tex4ht:labe
-<!--l. 168--><p class="indent" >   </div><hr class="endfigure">
+<!--l. 167--><p class="indent" >   </div><hr class="endfigure">
-<!--l. 171--><p class="indent" >   The type of linear system matrices that we address typically arise in
+<!--l. 170--><p class="indent" >   The type of linear system matrices that we address typically arise in
 the numerical solution of PDEs; in such a context, it is necessary to pay
 special attention to the structure of the problem from which the application
 originates. The nonzero pattern of a matrix arising from the discretization of a
@ -71,7 +71,7 @@ PDE is influenced by various factors, such as the shape of the domain, the
 discretization strategy, and the equation/unknown ordering. The matrix itself can be
 interpreted as the adjacency matrix of the graph associated with the discretization
 mesh.
-<!--l. 182--><p class="indent" >   The distribution of the coefficient matrix for the linear system is based on the
+<!--l. 181--><p class="indent" >   The distribution of the coefficient matrix for the linear system is based on the
 &#8220;owner computes&#8221; rule: the variable associated to each mesh point is assigned to a
 process that will own the corresponding row in the coefficient matrix and will
 carry out all related computations. This allocation strategy is equivalent to a
@ -88,7 +88,7 @@ the literature, e.g. METIS&#x00A0;<span class="cite">[<a
 href="userhtmlli3.html#XMETIS">14</a>]</span>. Dense vectors conform to sparse matrices,
 that is, the entries of a vector follow the same distribution of the matrix
 rows.
-<!--l. 204--><p class="indent" >   We assume that the sparse matrix is built in parallel, where each process generates
+<!--l. 203--><p class="indent" >   We assume that the sparse matrix is built in parallel, where each process generates
 its own portion. We never require that the entire matrix be available on a single
 node. However, it is possible to hold the entire matrix in one process and distribute it
 explicitly<span class="footnote-mark"><a 
@ -98,10 +98,10 @@ even though the resulting memory bottleneck would make this option unattractive
 in most cases.
   <h4 class="subsectionHead"><span class="titlemark">2.1    </span> <a 
 id="x5-50002.1"></a>Basic Nomenclature</h4>
-<!--l. 216--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
+<!--l. 215--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
 memory machine is guided by the structure of the physical model, and specifically
 by the discretization mesh of the PDE.
-<!--l. 221--><p class="indent" >   Each point of the discretization mesh will have (at least) one associated
+<!--l. 220--><p class="indent" >   Each point of the discretization mesh will have (at least) one associated
 equation/variable, and therefore one index. We say that point <span 
 class="zplmr7m-">i </span><span 
 class="pplri7t-">depends </span>on point <span 
@ -117,11 +117,11 @@ class="pplri7t-">sub-domains </span>assigned
 to the parallel processes, we classify the points of a given sub-domain as
 following.
     <dl class="description"><dt class="description">
-     <!--l. 230--><p class="noindent" >
+     <!--l. 229--><p class="noindent" >
 <span 
 class="pplb7t-">Internal.</span> </dt><dd 
 class="description">
-     <!--l. 230--><p class="noindent" >An internal point of a given domain <span 
+     <!--l. 229--><p class="noindent" >An internal point of a given domain <span 
 class="pplri7t-">depends </span>only on points of the same
     domain.  If  all  points  of  a  domain  are  assigned  to  one  process,  then
     a  computational  step  (e.g.,  a  matrix-vector  product)  of  the  equations
@ -131,19 +131,19 @@ class="pplri7t-">depends </span>only on points of the same
     associated  with  the  internal  points  requires  no  data  items  from  other
     domains and no communications.
     </dd><dt class="description">
-     <!--l. 239--><p class="noindent" >
+     <!--l. 238--><p class="noindent" >
 <span 
 class="pplb7t-">Boundary.</span> </dt><dd 
 class="description">
-     <!--l. 239--><p class="noindent" >A  point  of  a  given  domain  is  a  boundary  point  if  it  <span 
+     <!--l. 238--><p class="noindent" >A  point  of  a  given  domain  is  a  boundary  point  if  it  <span 
 class="pplri7t-">depends  </span>on  points
     belonging to other domains.
     </dd><dt class="description">
-     <!--l. 243--><p class="noindent" >
+     <!--l. 242--><p class="noindent" >
 <span 
 class="pplb7t-">Halo.</span> </dt><dd 
 class="description">
-     <!--l. 243--><p class="noindent" >A halo point for a given domain is a point belonging to another domain
+     <!--l. 242--><p class="noindent" >A halo point for a given domain is a point belonging to another domain
     such that there is a boundary point which <span 
 class="pplri7t-">depends </span>on it. Whenever performing
     a computational step, such as a matrix-vector product, the values associated
@ -151,22 +151,22 @@ class="pplri7t-">depends </span>on it. Whenever performing
     a given domain is usually a halo point for some other domain<span class="footnote-mark"><a 
 href="userhtml7.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a 
 id="x5-5001f2"></a> ;
-     therefore the cardinality of the boundary points set denotes the amount
+     therefore the cardinality of the boundary points set determines the amount
     of data sent to other domains.
     </dd><dt class="description">
-     <!--l. 256--><p class="noindent" >
+     <!--l. 255--><p class="noindent" >
 <span 
 class="pplb7t-">Overlap.</span> </dt><dd 
 class="description">
-     <!--l. 256--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any
+     <!--l. 255--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any
     operation  that  involves  an  overlap  point  has  to  be  replicated  for  each
     assignment.</dd></dl>
-<!--l. 260--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
+<!--l. 259--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
 feature of Domain Decomposition Schwarz preconditioners which are the subject of
 related research work&#x00A0;<span class="cite">[<a 
 href="userhtmlli3.html#X2007c">4</a>,&#x00A0;<a 
 href="userhtmlli3.html#X2007d">3</a>]</span>.
-<!--l. 265--><p class="indent" >   We denote the sets of internal, boundary and halo points for a given subdomain
+<!--l. 264--><p class="indent" >   We denote the sets of internal, boundary and halo points for a given subdomain
 by <span 
 class="zplmr7y-"><img 
 src="zplmr7y-49.png" alt="I" class="x-x-49" /></span>, <span 
@ -203,7 +203,7 @@ class="zplmr7y-">|<img
 src="zplmr7y-48.png" alt="H" class="x-x-48" /></span><sub><span 
 class="zplmr7m-x-x-76">i</span></sub><span 
 class="zplmr7y-">|</span>.
-<!--l. 275--><p class="indent" >   <hr class="figure"><div class="figure" 
+<!--l. 274--><p class="indent" >   <hr class="figure"><div class="figure" 
 >
@ -215,8 +215,8 @@ class="zplmr7y-">|</span>.
 <div class="center" 
 >
-<!--l. 276--><p class="noindent" >
+<!--l. 275--><p class="noindent" >
-<!--l. 279--><p class="noindent" ><img 
+<!--l. 278--><p class="noindent" ><img 
 src="points.png" alt="PIC"  
 width="46" height="46" ></div>
 <br /> <div class="caption" 
@ -225,113 +225,113 @@ class="content">Point classfication.</span></div><!--tex4ht:label?: x5-5003r2 --
-<!--l. 285--><p class="indent" >   </div><hr class="endfigure">
+<!--l. 284--><p class="indent" >   </div><hr class="endfigure">
-<!--l. 287--><p class="indent" >   This classification of mesh points guides the naming scheme that we adopted in
+<!--l. 286--><p class="indent" >   This classification of mesh points guides the naming scheme that we adopted in
 the library internals and in the data structures. We explicitly note that &#8220;Halo&#8221; points
 are also often called &#8220;ghost&#8221; points in the literature.
   <h4 class="subsectionHead"><span class="titlemark">2.2    </span> <a 
 id="x5-60002.2"></a>Library contents</h4>
-<!--l. 296--><p class="noindent" >The PSBLAS library consists of various classes of subroutines:
+<!--l. 295--><p class="noindent" >The PSBLAS library consists of various classes of subroutines:
     <dl class="description"><dt class="description">
-     <!--l. 298--><p class="noindent" >
+     <!--l. 297--><p class="noindent" >
 <span 
 class="pplb7t-">Computational routines</span> </dt><dd 
 class="description">
-     <!--l. 298--><p class="noindent" >comprising:
+     <!--l. 297--><p class="noindent" >comprising:
         <ul class="itemize1">
         <li class="itemize">
-         <!--l. 300--><p class="noindent" >Sparse matrix by dense matrix product;
+         <!--l. 299--><p class="noindent" >Sparse matrix by dense matrix product;
         </li>
         <li class="itemize">
-         <!--l. 301--><p class="noindent" >Sparse triangular systems solution for block diagonal matrices;
+         <!--l. 300--><p class="noindent" >Sparse triangular systems solution for block diagonal matrices;
         </li>
         <li class="itemize">
-         <!--l. 303--><p class="noindent" >Vector and matrix norms;
+         <!--l. 302--><p class="noindent" >Vector and matrix norms;
         </li>
         <li class="itemize">
-         <!--l. 304--><p class="noindent" >Dense matrix sums;
+         <!--l. 303--><p class="noindent" >Dense matrix sums;
         </li>
         <li class="itemize">
-         <!--l. 305--><p class="noindent" >Dot products.</li></ul>
+         <!--l. 304--><p class="noindent" >Dot products.</li></ul>
     </dd><dt class="description">
-     <!--l. 307--><p class="noindent" >
+     <!--l. 306--><p class="noindent" >
 <span 
 class="pplb7t-">Communication routines</span> </dt><dd 
 class="description">
-     <!--l. 307--><p class="noindent" >handling halo and overlap communications;
+     <!--l. 306--><p class="noindent" >handling halo and overlap communications;
     </dd><dt class="description">
-     <!--l. 309--><p class="noindent" >
+     <!--l. 308--><p class="noindent" >
 <span 
 class="pplb7t-">Data management and auxiliary routines</span> </dt><dd 
 class="description">
-     <!--l. 309--><p class="noindent" >including:
+     <!--l. 308--><p class="noindent" >including:
         <ul class="itemize1">
         <li class="itemize">
-         <!--l. 311--><p class="noindent" >Parallel environment management
+         <!--l. 310--><p class="noindent" >Parallel environment management
         </li>
         <li class="itemize">
-         <!--l. 312--><p class="noindent" >Communication descriptors allocation;
+         <!--l. 311--><p class="noindent" >Communication descriptors allocation;
         </li>
         <li class="itemize">
-         <!--l. 313--><p class="noindent" >Dense and sparse matrix allocation;
+         <!--l. 312--><p class="noindent" >Dense and sparse matrix allocation;
         </li>
         <li class="itemize">
-         <!--l. 314--><p class="noindent" >Dense and sparse matrix build and update;
+         <!--l. 313--><p class="noindent" >Dense and sparse matrix build and update;
         </li>
         <li class="itemize">
-         <!--l. 315--><p class="noindent" >Sparse matrix and data distribution preprocessing.</li></ul>
+         <!--l. 314--><p class="noindent" >Sparse matrix and data distribution preprocessing.</li></ul>
     </dd><dt class="description">
-     <!--l. 317--><p class="noindent" >
+     <!--l. 316--><p class="noindent" >
 <span 
 class="pplb7t-">Preconditioner routines</span> </dt><dd 
 class="description">
-     <!--l. 317--><p class="noindent" >
+     <!--l. 316--><p class="noindent" >
     </dd><dt class="description">
-     <!--l. 318--><p class="noindent" >
+     <!--l. 317--><p class="noindent" >
 <span 
 class="pplb7t-">Iterative methods</span> </dt><dd 
 class="description">
-     <!--l. 318--><p class="noindent" >a subset of classical and Krylov subspace iterative methods</dd></dl>
+     <!--l. 317--><p class="noindent" >a subset of classical and Krylov subspace iterative methods</dd></dl>
-<!--l. 321--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined
+<!--l. 320--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined
 in the PSBLAS software package:
     <ul class="itemize1">
     <li class="itemize">
-     <!--l. 324--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 323--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_</span></span></span>
     </li>
     <li class="itemize">
-     <!--l. 326--><p class="noindent" >all data type names are suffixed by <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 325--><p class="noindent" >all data type names are suffixed by <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">_type</span></span></span>
     </li>
     <li class="itemize">
-     <!--l. 327--><p class="noindent" >all constants are suffixed by <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 326--><p class="noindent" >all constants are suffixed by <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">_</span></span></span>
     </li>
     <li class="itemize">
-     <!--l. 328--><p class="noindent" >all top-level subroutine names follow the rule <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 327--><p class="noindent" >all top-level subroutine names follow the rule <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_xxname</span></span></span> where <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">xx</span></span></span> can be
     either:
         <ul class="itemize2">
         <li class="itemize">
-         <!--l. 331--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 330--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ge</span></span></span>: the routine is related to dense data,
         </li>
         <li class="itemize">
-         <!--l. 332--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 331--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">sp</span></span></span>: the routine is related to sparse data,
         </li>
         <li class="itemize">
-         <!--l. 333--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 332--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">cd</span></span></span>: the routine is related to communication descriptor (see&#x00A0;<a 
 href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a>).</li></ul>
-     <!--l. 336--><p class="noindent" >For example the <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 335--><p class="noindent" >For example the <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geins</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdins</span></span></span> perform the same
@ -339,33 +339,33 @@ class="cmtt-10">psb_cdins</span></span></span> perform the same
 href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>) on dense matrices, sparse matrices and communication
     descriptors respectively. Interface overloading allows the usage of the same
     subroutine names for both real and complex data.</li></ul>
-<!--l. 343--><p class="noindent" >In the description of the subroutines, arguments or argument entries are classified
+<!--l. 342--><p class="noindent" >In the description of the subroutines, arguments or argument entries are classified
 as:
     <dl class="description"><dt class="description">
-     <!--l. 346--><p class="noindent" >
+     <!--l. 345--><p class="noindent" >
 <span 
 class="pplb7t-">global</span> </dt><dd 
 class="description">
-     <!--l. 346--><p class="noindent" >For  input  arguments,  the  value  must  be  the  same  on  all  processes
+     <!--l. 345--><p class="noindent" >For  input  arguments,  the  value  must  be  the  same  on  all  processes
     participating  in  the  subroutine  call;  for  output  arguments  the  value  is
     guaranteed to be the same.
     </dd><dt class="description">
-     <!--l. 349--><p class="noindent" >
+     <!--l. 348--><p class="noindent" >
 <span 
 class="pplb7t-">local</span> </dt><dd 
 class="description">
-     <!--l. 349--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
+     <!--l. 348--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
-<!--l. 351--><p class="noindent" >To finish our general description, we define a version string with the constant
+<!--l. 350--><p class="noindent" >To finish our general description, we define a version string with the constant
   <div class="math-display" >
 <img 
 src="userhtml0x.png" alt="psb_version_string_
 " class="math-display" ></div>
-<!--l. 353--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span 
+<!--l. 352--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">3.9.0</span></span></span>
-<!--l. 356--><p class="noindent" >
+<!--l. 355--><p class="noindent" >
   <h4 class="subsectionHead"><span class="titlemark">2.3    </span> <a 
 id="x5-70002.3"></a>Application structure</h4>
-<!--l. 359--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are
+<!--l. 358--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are
 created and exist with reference to a discretized space to which there corresponds
 an index space and a matrix sparsity pattern. As an example, consider a
 cell-centered finite-volume discretization of the Navier-Stokes equations on a
@ -375,13 +375,13 @@ class="zplmr7m-">n </span>is isomorphic to the set of cell centers,
 whereas the pattern of the associated linear system matrix is isomorphic to the
 adjacency graph imposed on the discretization mesh by the discretization
 stencil.
-<!--l. 369--><p class="indent" >   Thus the first order of business is to establish an index space, and this is done
+<!--l. 368--><p class="indent" >   Thus the first order of business is to establish an index space, and this is done
 with a call to <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall</span></span></span> in which we specify the size of the index space <span 
 class="zplmr7m-">n </span>and the
 allocation of the elements of the index space to the various processes making up the
 MPI (virtual) parallel machine.
-<!--l. 375--><p class="indent" >   The index space is partitioned among processes, and this creates a mapping from
+<!--l. 374--><p class="indent" >   The index space is partitioned among processes, and this creates a mapping from
 the &#8220;global&#8221; numbering 1<span 
 class="zplmr7m-">&#x2026;</span><span 
 class="zplmr7m-">n </span>to a numbering &#8220;local&#8221; to each process; each process <span 
@ -400,7 +400,7 @@ numbering.
-<!--l. 385--><p class="indent" >   For a given index space 1<span 
+<!--l. 384--><p class="indent" >   For a given index space 1<span 
 class="zplmr7m-">&#x2026;</span><span 
 class="zplmr7m-">n </span>there are many possible associated topologies, i.e.
 many different discretization stencils; thus the description of the index space is not
@ -430,44 +430,44 @@ class="zplmr7m-">A</span>, and thus they have to be fetched from (neighbouring)
 processes. The descriptor of the index space is built exactly for the purpose
 of properly sequencing the communication steps required to achieve this
 objective.
-<!--l. 401--><p class="indent" >   A simple application structure will walk through the index space allocation,
+<!--l. 400--><p class="indent" >   A simple application structure will walk through the index space allocation,
 matrix/vector creation and linear system solution as follows:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x5-7002x1">
-     <!--l. 405--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 404--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_init</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7004x2">
-     <!--l. 406--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 405--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7006x3">
-     <!--l. 407--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 406--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geall</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7008x4">
-     <!--l. 409--><p class="noindent" >Loop over all local rows, generate matrix and vector entries, and insert
+     <!--l. 408--><p class="noindent" >Loop over all local rows, generate matrix and vector entries, and insert
     them with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geins</span></span></span>
     </li>
 <li 
  class="enumerate" id="x5-7010x5">
-     <!--l. 411--><p class="noindent" >Assemble the various entities:
+     <!--l. 410--><p class="noindent" >Assemble the various entities:
         <ol  class="enumerate2" >
 <li 
  class="enumerate" id="x5-7012x1">
-         <!--l. 413--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 412--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdasb</span></span></span>,
         </li>
 <li 
  class="enumerate" id="x5-7014x2">
-         <!--l. 414--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 413--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spasb</span></span></span>,
@ -475,12 +475,12 @@ class="cmtt-10">psb_spasb</span></span></span>,
         </li>
 <li 
  class="enumerate" id="x5-7016x3">
-         <!--l. 415--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+         <!--l. 414--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geasb</span></span></span>;</li></ol>
     </li>
 <li 
  class="enumerate" id="x5-7018x6">
-     <!--l. 417--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 416--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%set</span></span></span>, and build it with
     <span class="obeylines-h"><span class="verb"><span 
@ -490,39 +490,39 @@ href="userhtml8.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a
     </li>
 <li 
  class="enumerate" id="x5-7022x7">
-     <!--l. 422--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 421--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_krylov</span></span></span>
     with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">bicgstab</span></span></span>.</li></ol>
-<!--l. 425--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span 
+<!--l. 424--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">test/pargen/</span></span></span>.
-<!--l. 428--><p class="indent" >   For a simulation in which the same discretization mesh is used over multiple
+<!--l. 427--><p class="indent" >   For a simulation in which the same discretization mesh is used over multiple
 time steps, the following structure may be more appropriate:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x5-7024x1">
-     <!--l. 431--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 430--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_init</span></span></span>
     </li>
 <li 
  class="enumerate" id="x5-7026x2">
-     <!--l. 432--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 431--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall</span></span></span>
     </li>
 <li 
  class="enumerate" id="x5-7028x3">
-     <!--l. 433--><p class="noindent" >Loop   over   the   topology   of   the   discretization   mesh   and   build   the
+     <!--l. 432--><p class="noindent" >Loop   over   the   topology   of   the   discretization   mesh   and   build   the
     descriptor with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdins</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7030x4">
-     <!--l. 435--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 434--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdasb</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7032x5">
-     <!--l. 436--><p class="noindent" >Allocate  the  sparse  matrices  and  dense  vectors  with;  <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 435--><p class="noindent" >Allocate  the  sparse  matrices  and  dense  vectors  with;  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spall</span></span></span> and
     <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geall</span></span></span>;
@ -532,34 +532,34 @@ class="cmtt-10">psb_geall</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-7034x6">
-     <!--l. 438--><p class="noindent" >Loop over the time steps:
+     <!--l. 437--><p class="noindent" >Loop over the time steps:
         <ol  class="enumerate2" >
 <li 
  class="enumerate" id="x5-7036x1">
-         <!--l. 440--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span 
+         <!--l. 439--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_sprn</span></span></span>;
         also zero out the dense vectors;
         </li>
 <li 
  class="enumerate" id="x5-7038x2">
-         <!--l. 443--><p class="noindent" >Loop  over  the  mesh,  generate  the  coefficients  and  insert/update
+         <!--l. 442--><p class="noindent" >Loop  over  the  mesh,  generate  the  coefficients  and  insert/update
         them with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geins</span></span></span>;
         </li>
 <li 
  class="enumerate" id="x5-7040x3">
-         <!--l. 445--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span 
+         <!--l. 444--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geasb</span></span></span>;
         </li>
 <li 
  class="enumerate" id="x5-7042x4">
-         <!--l. 446--><p class="noindent" >
+         <!--l. 445--><p class="noindent" >
         </li>
 <li 
  class="enumerate" id="x5-7044x5">
-         <!--l. 446--><p class="noindent" >Choose   the   preconditioner   to   be   used   with   <span class="obeylines-h"><span class="verb"><span 
+         <!--l. 445--><p class="noindent" >Choose   the   preconditioner   to   be   used   with   <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%init</span></span></span> and
         <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%set</span></span></span>, and build it with <span class="obeylines-h"><span class="verb"><span 
@ -567,21 +567,21 @@ class="cmtt-10">prec%build</span></span></span>;
         </li>
 <li 
  class="enumerate" id="x5-7046x6">
-         <!--l. 449--><p class="noindent" >Call  one  of  the  iterative  drivers  with  the  method  of  choice,  e.g.
+         <!--l. 448--><p class="noindent" >Call  one  of  the  iterative  drivers  with  the  method  of  choice,  e.g.
         <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_krylov</span></span></span> with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">bicgstab</span></span></span>.</li></ol>
     </li></ol>
-<!--l. 453--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
+<!--l. 452--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
 called on the data that is actually allocated to the current process, i.e. each process
 generates its own data.
-<!--l. 458--><p class="indent" >   In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span 
+<!--l. 457--><p class="indent" >   In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span>, nor is there a
 requirement to build a matrix row in its entirety before calling the routine; this
 allows the application programmer to walk through the discretization mesh element
 by element, generating the main part of a given matrix row but also contributions to
 the rows corresponding to neighbouring elements.
-<!--l. 465--><p class="indent" >   From a functional point of view it is even possible to execute one call for each
+<!--l. 464--><p class="indent" >   From a functional point of view it is even possible to execute one call for each
 nonzero coefficient; however this would have a substantial computational
 overhead. It is therefore advisable to pack a certain amount of data into each
 call to the insertion routine, say touching on a few tens of rows; the best
@ -595,23 +595,23 @@ process and pass it in a single call to <span class="obeylines-h"><span class="v
 class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a
 doubling of memory occupation, and thus would be almost always far from
 optimal.
-<!--l. 478--><p class="noindent" >
+<!--l. 477--><p class="noindent" >
   <h5 class="subsubsectionHead"><span class="titlemark">2.3.1    </span> <a 
 id="x5-80002.3.1"></a>User-defined index mappings</h5>
-<!--l. 480--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
+<!--l. 479--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
 constraints outlined in sec.&#x00A0;<a 
 href="#x5-70002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x5-8002x1">
-     <!--l. 483--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span 
+     <!--l. 482--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span 
 class="zplmr7m-">&#x2026;</span><span 
 class="zplmr7m-">n</span><sub>row<sub><span 
 class="zplmr7m-x-x-60">i</span></sub></sub>;
     </li>
 <li 
  class="enumerate" id="x5-8004x2">
-     <!--l. 485--><p class="noindent" >The set of halo points must be mapped to the set <span 
+     <!--l. 484--><p class="noindent" >The set of halo points must be mapped to the set <span 
 class="zplmr7m-">n</span><sub>row<sub><span 
 class="zplmr7m-x-x-60">i</span></sub></sub> <span 
 class="zplmr7t-">+ </span>1<span 
@ -619,14 +619,14 @@ class="zplmr7m-">&#x2026;</span><span
 class="zplmr7m-">n</span><sub>col<sub>
 <span 
 class="zplmr7m-x-x-60">i</span></sub></sub>;</li></ol>
-<!--l. 488--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
+<!--l. 487--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
 consistency of this mapping; some errors may be caught by the library, but
 this is not guaranteed. The application structure to support this usage is as
 follows:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x5-8006x1">
-     <!--l. 494--><p class="noindent" >Initialize                                                                                                       index
+     <!--l. 493--><p class="noindent" >Initialize                                                                                                       index
     space with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the
     vectors <span class="obeylines-h"><span class="verb"><span 
@ -636,7 +636,7 @@ class="cmtt-10">lidx(:)</span></span></span> containing the corresponding local
     </li>
 <li 
  class="enumerate" id="x5-8008x2">
-     <!--l. 499--><p class="noindent" >Add  the  halo  points  <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 498--><p class="noindent" >Add  the  halo  points  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ja(:)</span></span></span> and  their  associated  local  indices  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">lidx(:)</span></span></span>
     with a(some) call(s) to <span class="obeylines-h"><span class="verb"><span 
@ -644,7 +644,7 @@ class="cmtt-10">psb_cdins(nz,ja,desc,info,lidx=lidx)</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x5-8010x3">
-     <!--l. 502--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 501--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdasb</span></span></span>;
     </li>
 <li 
@ -652,7 +652,7 @@ class="cmtt-10">psb_cdasb</span></span></span>;
-     <!--l. 503--><p class="noindent" >Build   the   sparse   matrices   and   vectors,   optionally   making   use   in
+     <!--l. 502--><p class="noindent" >Build   the   sparse   matrices   and   vectors,   optionally   making   use   in
     <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span> and  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geins</span></span></span> of  the  <span class="obeylines-h"><span class="verb"><span 
@ -661,19 +661,19 @@ class="cmtt-10">local</span></span></span> argument  specifying  that  the
 class="cmtt-10">ia</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
-<!--l. 510--><p class="noindent" >
+<!--l. 509--><p class="noindent" >
   <h4 class="subsectionHead"><span class="titlemark">2.4    </span> <a 
 id="x5-90002.4"></a>Programming model</h4>
-<!--l. 512--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
+<!--l. 511--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
 programming model: each process participating in the computation performs the
 same actions on a chunk of data. Parallelism is thus data-driven.
-<!--l. 517--><p class="indent" >   Because of this structure, many subroutines coordinate their action across the
+<!--l. 516--><p class="indent" >   Because of this structure, many subroutines coordinate their action across the
 various processes, thus providing an implicit synchronization point, and therefore
 <span 
 class="pplri7t-">must </span>be called simultaneously by all processes participating in the computation. This
 is certainly true for the data allocation and assembly routines, for all the
 computational routines and for some of the tools routines.
-<!--l. 525--><p class="indent" >   However there are many cases where no synchronization, and indeed no
+<!--l. 524--><p class="indent" >   However there are many cases where no synchronization, and indeed no
 communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a 
 href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a>
 are only acting on the local data structures, and thus may be called independently.
@ -681,21 +681,21 @@ The most important case is that of the coefficient insertion routines: since the
 of coefficients in the sparse and dense matrices varies among the processors, and
 since the user is free to choose an arbitrary order in builiding the matrix entries,
 these routines cannot imply a synchronization.
-<!--l. 535--><p class="indent" >   Throughout this user&#8217;s guide each subroutine will be clearly indicated
+<!--l. 534--><p class="indent" >   Throughout this user&#8217;s guide each subroutine will be clearly indicated
 as:
     <dl class="description"><dt class="description">
-     <!--l. 538--><p class="noindent" >
+     <!--l. 537--><p class="noindent" >
 <span 
 class="pplb7t-">Synchronous:</span> </dt><dd 
 class="description">
-     <!--l. 538--><p class="noindent" >must  be  called  simultaneously  by  all  the  processes  in  the  relevant
+     <!--l. 537--><p class="noindent" >must  be  called  simultaneously  by  all  the  processes  in  the  relevant
     communication context;
     </dd><dt class="description">
-     <!--l. 540--><p class="noindent" >
+     <!--l. 539--><p class="noindent" >
 <span 
 class="pplb7t-">Asynchronous:</span> </dt><dd 
 class="description">
-     <!--l. 540--><p class="noindent" >may be called in a totally independent manner.</dd></dl>
+     <!--l. 539--><p class="noindent" >may be called in a totally independent manner.</dd></dl>
--- a/docs/html/userhtmlse5.html
+++ b/docs/html/userhtmlse5.html
@ -21,7 +21,7 @@ href="userhtml.html#userhtmlse8.html" >up</a>] </p></div>
 <!--l. 7--><p class="noindent" >The routines in this chapter implement various global communication operators on
 vectors associated with a discretization mesh. For auxiliary communication routines
 not tied to a discretization space see&#x00A0;<a 
-href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>.
+href="userhtmlse7.html#x13-1060007">7<!--tex4ht:ref: sec:parenv --></a>.
--- a/docs/psblas-3.9.pdf
+++ b/docs/psblas-3.9.pdf
--- a/docs/src/commrout.tex
+++ b/docs/src/commrout.tex
@ -6,7 +6,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 The routines in this chapter implement various global communication operators
 on vectors associated with a discretization mesh. For auxiliary communication
-routines not tied to a discretization space see~\ref{sec:toolsrout}.
+routines not tied to a discretization space see~\ref{sec:parenv}.
 \clearpage\subsection{psb\_halo --- Halo Data Communication}
--- a/docs/src/intro.tex
+++ b/docs/src/intro.tex
@ -28,8 +28,8 @@ accelerators through OpenMP as support from compilers improves.
 The project is lead by Salvatore Filippone; a number of people have been contributing to this package over the
 years; contributors in roughly reverse chronological order:
 \begin{obeylines}
 Luca       Pepè Sciarria
 Theophane  Loloum
 Fabio      Durastante
 Dimitri    Walther
 Andea      Di Iorio
 Stefano    Petrilli
@ -37,7 +37,6 @@ years; contributors in roughly reverse chronological order:
 Zaak       Beekman
 Ambra	   Abdullahi Hassan
 Pasqua	   D'Ambra
 Alfredo    Buttari
 Daniela    di Serafino
 Michele    Martone
 Michele    Colajanni
@ -251,7 +250,7 @@ domain is usually a halo point for some other domain\footnote{This is
  two variables is reciprocal. If the matrix pattern is non-symmetric
  we may have one-way interactions, and these could cause a situation
  in which a boundary point is not a halo point for its neighbour.}; therefore
-the cardinality of the boundary points set denotes the amount of data
+the cardinality of the boundary points set determines the amount of data
 sent to other domains. 
 \item[Overlap.] An overlap point is a boundary point assigned to
 multiple domains. Any operation that involves an overlap point
--- a/docs/src/userguide.tex
+++ b/docs/src/userguide.tex
@ -136,7 +136,7 @@
 by Salvatore Filippone\\
 Alfredo Buttari \\
 Fabio Durastante}\\ 
-Jun 1st, 2025
+June 9th, 2025
 \end{minipage}}
 }
 %\addtolength{\textwidth}{\centeroffset}
--- a/docs/src/userhtml.tex
+++ b/docs/src/userhtml.tex
@ -106,7 +106,7 @@ Fabio Durastante } \\
 %\today
 Software version: 3.9.0\\
 %\today
-Jun  1st, 2025
+June 9th, 2025
 \cleardoublepage
 \begingroup
  \renewcommand*{\thepage}{toc}