Merge branch 'repackage' of github.com:sfilippone/psblas3 into repackage

3 months ago · f825bf37a1
parent 8907a7600a 7187575915
commit f825bf37a1
84 changed files with 19296 additions and 17720 deletions
--- a/docs/Makefile
+++ b/docs/Makefile
@ -1,7 +1,7 @@
 all: guide 
 guide:
-	cd src && $(MAKE)
+	cd src && $(MAKE)  clean all 
 doxy: 
 	doxygen doxypsb
--- a/docs/html/dia-.png
+++ b/docs/html/dia-.png
--- a/docs/html/ell-.png
+++ b/docs/html/ell-.png
--- a/docs/html/hdia-.png
+++ b/docs/html/hdia-.png
--- a/docs/html/hll-.png
+++ b/docs/html/hll-.png
--- a/docs/html/index.html
+++ b/docs/html/index.html
@ -10,16 +10,16 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-<!--l. 91--><p class="noindent" ><span 
+<!--l. 99--><p class="noindent" ><span 
-class="cmbx-12x-x-144">PSBLAS</span><br 
+class="pplb7t-x-x-172">PSBLAS</span><br 
 class="newline" /> <span 
-class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br 
+class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br 
 class="newline" /> <span 
-class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br 
+class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br 
 class="newline" /> <span 
-class="cmbx-10">Salvatore Filippone</span><br 
+class="pplb7t-">Salvatore Filippone</span><br 
 class="newline" /><span 
-class="cmbx-10">Alfredo Buttari  </span><br 
+class="pplb7t-">Alfredo Buttari  </span><br 
 class="newline" />Software version: 3.9.0<br 
 class="newline" />Aug 1st, 2024
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
 <br />   &#x00A0;<span class="sectionToc" >10 <a 
 href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
 <br />   &#x00A0;<span class="sectionToc" >11 <a 
-href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
+href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
 <br />   &#x00A0;<span class="sectionToc" >12 <a 
-href="userhtmlse12.html#x19-14500012" id="QQ2-19-175">Extensions</a></span>
+href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
 <br />   &#x00A0;<span class="sectionToc" >13 <a 
-href="userhtmlse13.html#x20-15400013" id="QQ2-20-190">CUDA Environment Routines</a></span>
+href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
 <br />   &#x00A0;<span class="likesectionToc" ><a 
-href="userhtmlli2.html#x21-169000" id="QQ2-21-219">References</a></span>
+href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
   </div>
--- a/docs/html/mat-.png
+++ b/docs/html/mat-.png
--- a/docs/html/userhtml.css
+++ b/docs/html/userhtml.css
@ -1,33 +1,62 @@
 /* start css.sty */
-.cmr-7{font-size:70%;}
+.pplb7t-x-x-172{font-size:172%;font-weight: bold;}
-.cmmi-5{font-size:50%;font-style: italic;}
+.pplb7t-x-x-172{font-weight: bold;}
-.cmmi-7{font-size:70%;font-style: italic;}
+.pplb7t-x-x-172{font-weight: bold;}
-.cmmi-10{font-style: italic;}
+.pplri7t-{font-style: italic;}
-.cmsy-7{font-size:70%;}
+.pplri7t-{font-style: italic;}
-.cmbx-12x-x-144{font-size:172%; font-weight: bold;}
+.pplri7t-x-x-120{font-size:120%;font-style: italic;}
-.cmbx-12x-x-144{ font-weight: bold;}
+.pplri7t-x-x-120{font-style: italic;}
-.cmbx-12x-x-144{ font-weight: bold;}
+.pplb7t-{font-weight: bold;}
-.cmti-10{ font-style: italic;}
+.pplb7t-{font-weight: bold;}
-.cmti-12{font-size:120%; font-style: italic;}
+.pplb7t-{font-weight: bold;}
 .cmbx-10{ font-weight: bold;}
 .cmbx-10{ font-weight: bold;}
 .cmbx-10{ font-weight: bold;}
 .cmtt-10{font-family: monospace,monospace;}
 .cmtt-10{font-family: monospace,monospace;}
 .cmtt-10{font-family: monospace,monospace;}
-.cmr-9{font-size:90%;}
+.pplr7t-x-x-76{font-size:76%;}
-.cmr-8{font-size:80%;}
+.zplmr7m-{font-style: italic;}
-.cmbx-12{font-size:120%; font-weight: bold;}
+.zplmr7m-{font-style: italic;}
-.cmbx-12{ font-weight: bold;}
+.zplmr7m-{font-style: italic;}
-.cmbx-12{ font-weight: bold;}
+.zplmr7m-{font-style: italic;}
 .zplmr7m-{font-style: italic;}
 .zplmr7m-x-x-76{font-size:76%;font-style: italic;}
 .zplmr7m-x-x-76{font-style: italic;}
 .zplmr7m-x-x-76{font-style: italic;}
 .zplmr7m-x-x-76{font-style: italic;}
 .zplmr7m-x-x-76{font-style: italic;}
 .zplmr7m-x-x-60{font-size:60%;font-style: italic;}
 .zplmr7m-x-x-60{font-style: italic;}
 .zplmr7m-x-x-60{font-style: italic;}
 .zplmr7m-x-x-60{font-style: italic;}
 .zplmr7m-x-x-60{font-style: italic;}
 .zplmr7y-x-x-76{font-size:76%;}
 .zplmr7t-x-x-76{font-size:76%;}
 .pplr7t-x-x-90{font-size:90%;}
 .pplr7t-x-x-80{font-size:80%;}
 .pplb7t-x-x-120{font-size:120%;font-weight: bold;}
 .pplb7t-x-x-120{font-weight: bold;}
 .pplb7t-x-x-120{font-weight: bold;}
 .cmtt-8{font-size:80%;font-family: monospace,monospace;}
 .cmtt-8{font-family: monospace,monospace;}
 .cmtt-8{font-family: monospace,monospace;}
 .cmtt-9{font-size:90%;font-family: monospace,monospace;}
 .cmtt-9{font-family: monospace,monospace;}
 .cmtt-9{font-family: monospace,monospace;}
-.cmmi-8{font-size:80%;font-style: italic;}
+.pplr7t-x-x-70{font-size:70%;}
 .zplmr7m-x-x-90{font-size:90%;font-style: italic;}
 .zplmr7m-x-x-90{font-style: italic;}
 .zplmr7m-x-x-90{font-style: italic;}
 .zplmr7m-x-x-90{font-style: italic;}
 .zplmr7m-x-x-90{font-style: italic;}
 .zplmr7y-x-x-90{font-size:90%;}
 .zplmr7m-x-x-80{font-size:80%;font-style: italic;}
 .zplmr7m-x-x-80{font-style: italic;}
 .zplmr7m-x-x-80{font-style: italic;}
 .zplmr7m-x-x-80{font-style: italic;}
 .zplmr7m-x-x-80{font-style: italic;}
 .zplmr7t-x-x-80{font-size:80%;}
 .pplrc7t-x-x-90{font-size:90%;}
 .small-caps{font-variant: small-caps; }
 p{margin-top:0;margin-bottom:0}
 p.indent{text-indent:0;}
 p + p{margin-top:1em;}
@ -158,5 +187,11 @@ pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top
 pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; }
 pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; }
 .lstinputlisting .label{margin-right:0.5em;}
 #TBL-24-1{border-left: 1px solid black;}
 #TBL-24-1{border-right:1px solid black;}
 #TBL-24-2{border-right:1px solid black;}
 #TBL-24-3{border-right:1px solid black;}
 #TBL-24-4{border-right:1px solid black;}
 #TBL-24-5{border-right:1px solid black;}
 /* end css.sty */
--- a/docs/html/userhtml.html
+++ b/docs/html/userhtml.html
@ -10,16 +10,16 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-<!--l. 91--><p class="noindent" ><span 
+<!--l. 99--><p class="noindent" ><span 
-class="cmbx-12x-x-144">PSBLAS</span><br 
+class="pplb7t-x-x-172">PSBLAS</span><br 
 class="newline" /> <span 
-class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br 
+class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br 
 class="newline" /> <span 
-class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br 
+class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br 
 class="newline" /> <span 
-class="cmbx-10">Salvatore Filippone</span><br 
+class="pplb7t-">Salvatore Filippone</span><br 
 class="newline" /><span 
-class="cmbx-10">Alfredo Buttari  </span><br 
+class="pplb7t-">Alfredo Buttari  </span><br 
 class="newline" />Software version: 3.9.0<br 
 class="newline" />Aug 1st, 2024
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
 <br />   &#x00A0;<span class="sectionToc" >10 <a 
 href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
 <br />   &#x00A0;<span class="sectionToc" >11 <a 
-href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
+href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
 <br />   &#x00A0;<span class="sectionToc" >12 <a 
-href="userhtmlse12.html#x19-14500012" id="QQ2-19-175">Extensions</a></span>
+href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
 <br />   &#x00A0;<span class="sectionToc" >13 <a 
-href="userhtmlse13.html#x20-15400013" id="QQ2-20-190">CUDA Environment Routines</a></span>
+href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
 <br />   &#x00A0;<span class="likesectionToc" ><a 
-href="userhtmlli2.html#x21-169000" id="QQ2-21-219">References</a></span>
+href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
   </div>
--- a/docs/html/userhtml10x.png
+++ b/docs/html/userhtml10x.png
--- a/docs/html/userhtml11x.png
+++ b/docs/html/userhtml11x.png
--- a/docs/html/userhtml12x.png
+++ b/docs/html/userhtml12x.png
--- a/docs/html/userhtml13x.png
+++ b/docs/html/userhtml13x.png
--- a/docs/html/userhtml14x.png
+++ b/docs/html/userhtml14x.png
--- a/docs/html/userhtml15x.png
+++ b/docs/html/userhtml15x.png
--- a/docs/html/userhtml16.html
+++ b/docs/html/userhtml16.html
@ -14,6 +14,6 @@
  <!--l. 72--><p class="indent" >       <span class="footnote-mark"><a 
 id="fn4x0"><a 
 id="x16-136002x10.1"></a>    <sup class="textsuperscript">4</sup></a></span><span 
-class="cmr-8">The string is case-insensitive</span></div>
+class="pplr7t-x-x-80">The string is case-insensitive</span></div>
 </body></html> 
--- a/docs/html/userhtml16x.png
+++ b/docs/html/userhtml16x.png
--- a/docs/html/userhtml17x.png
+++ b/docs/html/userhtml17x.png
--- a/docs/html/userhtml18.html
+++ b/docs/html/userhtml18.html
@ -13,8 +13,11 @@
  <div class="footnote-text">
  <!--l. 53--><p class="noindent" ><span class="footnote-mark"><a 
 id="fn5x0"><a 
- id="x18-143004x11.1"></a>   <sup class="textsuperscript">5</sup></a></span><span 
+ id="x18-144004x11.1"></a>    <sup class="textsuperscript">5</sup></a></span><span 
-class="cmr-8">Note: the implementation is for </span><span 
+class="pplr7t-x-x-80">Note: the implementation is for </span><span 
-class="cmmi-8">FCG</span><span 
+class="zplmr7m-x-x-80">FCG</span><span 
-class="cmr-8">(1).</span></div> 
+class="zplmr7t-x-x-80">(</span><span 
 class="pplr7t-x-x-80">1</span><span 
 class="zplmr7t-x-x-80">)</span><span 
 class="pplr7t-x-x-80">.</span></div> 
 </body></html> 
--- a/docs/html/userhtml18x.png
+++ b/docs/html/userhtml18x.png
--- a/docs/html/userhtml19x.png
+++ b/docs/html/userhtml19x.png
--- a/docs/html/userhtml1x.png
+++ b/docs/html/userhtml1x.png
--- a/docs/html/userhtml20x.png
+++ b/docs/html/userhtml20x.png
--- a/docs/html/userhtml21x.png
+++ b/docs/html/userhtml21x.png
--- a/docs/html/userhtml22x.png
+++ b/docs/html/userhtml22x.png
--- a/docs/html/userhtml23x.png
+++ b/docs/html/userhtml23x.png
--- a/docs/html/userhtml24x.png
+++ b/docs/html/userhtml24x.png
--- a/docs/html/userhtml25x.png
+++ b/docs/html/userhtml25x.png
--- a/docs/html/userhtml26x.png
+++ b/docs/html/userhtml26x.png
--- a/docs/html/userhtml27x.png
+++ b/docs/html/userhtml27x.png
--- a/docs/html/userhtml28x.png
+++ b/docs/html/userhtml28x.png
--- a/docs/html/userhtml29x.png
+++ b/docs/html/userhtml29x.png
--- a/docs/html/userhtml2x.png
+++ b/docs/html/userhtml2x.png
--- a/docs/html/userhtml30x.png
+++ b/docs/html/userhtml30x.png
--- a/docs/html/userhtml31x.png
+++ b/docs/html/userhtml31x.png
--- a/docs/html/userhtml32x.png
+++ b/docs/html/userhtml32x.png
--- a/docs/html/userhtml33x.png
+++ b/docs/html/userhtml33x.png
--- a/docs/html/userhtml34x.png
+++ b/docs/html/userhtml34x.png
--- a/docs/html/userhtml35x.png
+++ b/docs/html/userhtml35x.png
--- a/docs/html/userhtml36x.png
+++ b/docs/html/userhtml36x.png
--- a/docs/html/userhtml3x.png
+++ b/docs/html/userhtml3x.png
--- a/docs/html/userhtml4x.png
+++ b/docs/html/userhtml4x.png
--- a/docs/html/userhtml5.html
+++ b/docs/html/userhtml5.html
@ -14,6 +14,6 @@
  <!--l. 151--><p class="indent" >       <span class="footnote-mark"><a 
 id="fn1x0"><a 
 id="x5-3003x2"></a>    <sup class="textsuperscript">1</sup></a></span><span 
-class="cmr-8">In our prototype implementation we provide sample scatter/gather routines.</span></div>
+class="pplr7t-x-x-80">In our prototype implementation we provide sample scatter/gather routines.</span></div>
 </body></html> 
--- a/docs/html/userhtml5x.png
+++ b/docs/html/userhtml5x.png
--- a/docs/html/userhtml6.html
+++ b/docs/html/userhtml6.html
@ -14,11 +14,11 @@
  <!--l. 195--><p class="noindent" ><span class="footnote-mark"><a 
 id="fn2x0"><a 
 id="x6-4002x2.1"></a>    <sup class="textsuperscript">2</sup></a></span><span 
-class="cmr-8">This is the normal situation when the pattern of the sparse matrix is symmetric, which is</span>
+class="pplr7t-x-x-80">This is the normal situation when the pattern of the sparse matrix is symmetric, which is equivalent to</span>
  <span 
-class="cmr-8">equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is</span>
+class="pplr7t-x-x-80">say that the interaction between two variables is reciprocal. If the matrix pattern is non-symmetric we may</span>
  <span 
-class="cmr-8">non-symmetric we may have one-way interactions, and these could cause a situation in which a</span>
+class="pplr7t-x-x-80">have one-way interactions, and these could cause a situation in which a boundary point is not a halo point</span>
  <span 
-class="cmr-8">boundary point is not a halo point for its neighbour.</span></div> 
+class="pplr7t-x-x-80">for its neighbour.</span></div> 
 </body></html> 
--- a/docs/html/userhtml6x.png
+++ b/docs/html/userhtml6x.png
--- a/docs/html/userhtml7.html
+++ b/docs/html/userhtml7.html
@ -11,16 +11,16 @@
 </head><body 
 >
  <div class="footnote-text">
-  <!--l. 362--><p class="noindent" ><span class="footnote-mark"><a 
+  <!--l. 363--><p class="noindent" ><span class="footnote-mark"><a 
 id="fn3x0"><a 
 id="x7-6020x3"></a>    <sup class="textsuperscript">3</sup></a></span><span 
-class="cmr-8">The subroutine style </span><span 
+class="pplr7t-x-x-80">The subroutine style </span><span 
 class="cmtt-8">psb</span><span 
 class="cmtt-8">_precinit </span><span 
-class="cmr-8">and </span><span 
+class="pplr7t-x-x-80">and </span><span 
 class="cmtt-8">psb</span><span 
-class="cmtt-8">_precbl </span><span 
+class="cmtt-8">_precbld </span><span 
-class="cmr-8">are still supported for backward</span>
+class="pplr7t-x-x-80">are still supported for backward</span>
  <span 
-class="cmr-8">compatibility</span></div> 
+class="pplr7t-x-x-80">compatibility</span></div> 
 </body></html> 
--- a/docs/html/userhtml7x.png
+++ b/docs/html/userhtml7x.png
--- a/docs/html/userhtml8x.png
+++ b/docs/html/userhtml8x.png
--- a/docs/html/userhtml9x.png
+++ b/docs/html/userhtml9x.png
--- a/docs/html/userhtmlli1.html
+++ b/docs/html/userhtmlli1.html
@ -10,7 +10,7 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-   <!--l. 106--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 114--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse1.html" >next</a>] [<a 
 href="#tailuserhtmlli1.html">tail</a>] [<a 
 href="userhtml.html#userhtmlli1.html" >up</a>] </p></div>
@ -297,61 +297,63 @@ href="userhtmlse10.html#x15-13500010">Preconditioner routines</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.1 <a 
 href="userhtmlse10.html#x15-13600010.1" id="QQ2-15-166">init &#8212; Initialize a preconditioner</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.2 <a 
-href="userhtmlse10.html#x15-13700010.2" id="QQ2-15-167">build &#8212; Builds a preconditioner</a></span>
+href="userhtmlse10.html#x15-13700010.2" id="QQ2-15-167">Set &#8212; set preconditioner parameters</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.3 <a 
-href="userhtmlse10.html#x15-13800010.3" id="QQ2-15-168">apply &#8212; Preconditioner application routine</a></span>
+href="userhtmlse10.html#x15-13800010.3" id="QQ2-15-169">build &#8212; Builds a preconditioner</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.4 <a 
-href="userhtmlse10.html#x15-13900010.4" id="QQ2-15-169">descr &#8212; Prints a description of current preconditioner</a></span>
+href="userhtmlse10.html#x15-13900010.4" id="QQ2-15-170">apply &#8212; Preconditioner application routine</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.5 <a 
-href="userhtmlse10.html#x15-14000010.5" id="QQ2-15-170">clone &#8212; clone current preconditioner</a></span>
+href="userhtmlse10.html#x15-14000010.5" id="QQ2-15-171">descr &#8212; Prints a description of current preconditioner</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.6 <a 
-href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-171">free &#8212; Free a preconditioner</a></span>
+href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-172">clone &#8212; clone current preconditioner</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.7 <a 
 href="userhtmlse10.html#x15-14200010.7" id="QQ2-15-173">free &#8212; Free a preconditioner</a></span>
 <br />   &#x00A0;<span class="sectionToc" >11 <a 
-href="userhtmlse11.html#x17-14200011">Iterative Methods</a></span>
+href="userhtmlse11.html#x17-14300011">Iterative Methods</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >11.1 <a 
-href="userhtmlse11.html#x17-14300011.1" id="QQ2-17-173">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
+href="userhtmlse11.html#x17-14400011.1" id="QQ2-17-175">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >11.2 <a 
-href="userhtmlse11.html#x17-14400011.2" id="QQ2-17-174">psb_richardson &#8212; Richardson Iteration Driver Routine</a></span>
+href="userhtmlse11.html#x17-14500011.2" id="QQ2-17-176">psb_richardson &#8212; Richardson Iteration Driver Routine</a></span>
 <br />   &#x00A0;<span class="sectionToc" >12 <a 
-href="userhtmlse12.html#x19-14500012">Extensions</a></span>
+href="userhtmlse12.html#x19-14600012">Extensions</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >12.1 <a 
-href="userhtmlse12.html#x19-14600012.1" id="QQ2-19-176">Using the extensions</a></span>
+href="userhtmlse12.html#x19-14700012.1" id="QQ2-19-178">Using the extensions</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >12.2 <a 
-href="userhtmlse12.html#x19-14700012.2" id="QQ2-19-177">Extensions&#8217; Data Structures</a></span>
+href="userhtmlse12.html#x19-14800012.2" id="QQ2-19-179">Extensions&#8217; Data Structures</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >12.3 <a 
-href="userhtmlse12.html#x19-14800012.3" id="QQ2-19-180">CPU-class extensions</a></span>
+href="userhtmlse12.html#x19-14900012.3" id="QQ2-19-182">CPU-class extensions</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" >12.4 <a 
-href="userhtmlse12.html#x19-15300012.4" id="QQ2-19-189">CUDA-class extensions</a></span>
+href="userhtmlse12.html#x19-15400012.4" id="QQ2-19-191">CUDA-class extensions</a></span>
 <br />   &#x00A0;<span class="sectionToc" >13 <a 
-href="userhtmlse13.html#x20-15400013">CUDA Environment Routines</a></span>
+href="userhtmlse13.html#x20-15500013">CUDA Environment Routines</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-192">psb_cuda_init</a></span>
+href="userhtmlse13.html#Q1-20-194">psb_cuda_init</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-194">psb_cuda_exit</a></span>
+href="userhtmlse13.html#Q1-20-196">psb_cuda_exit</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-196">psb_cuda_DeviceSync</a></span>
+href="userhtmlse13.html#Q1-20-198">psb_cuda_DeviceSync</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-198">psb_cuda_getDeviceCount</a></span>
+href="userhtmlse13.html#Q1-20-200">psb_cuda_getDeviceCount</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-200">psb_cuda_getDevice</a></span>
+href="userhtmlse13.html#Q1-20-202">psb_cuda_getDevice</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-202">psb_cuda_setDevice</a></span>
+href="userhtmlse13.html#Q1-20-204">psb_cuda_setDevice</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-204">psb_cuda_DeviceHasUVA</a></span>
+href="userhtmlse13.html#Q1-20-206">psb_cuda_DeviceHasUVA</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-206">psb_cuda_WarpSize</a></span>
+href="userhtmlse13.html#Q1-20-208">psb_cuda_WarpSize</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-208">psb_cuda_MultiProcessors</a></span>
+href="userhtmlse13.html#Q1-20-210">psb_cuda_MultiProcessors</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-210">psb_cuda_MaxThreadsPerMP</a></span>
+href="userhtmlse13.html#Q1-20-212">psb_cuda_MaxThreadsPerMP</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-212">psb_cuda_MaxRegisterPerBlock</a></span>
+href="userhtmlse13.html#Q1-20-214">psb_cuda_MaxRegisterPerBlock</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-214">psb_cuda_MemoryClockRate</a></span>
+href="userhtmlse13.html#Q1-20-216">psb_cuda_MemoryClockRate</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-216">psb_cuda_MemoryBusWidth</a></span>
+href="userhtmlse13.html#Q1-20-218">psb_cuda_MemoryBusWidth</a></span>
 <br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
-href="userhtmlse13.html#Q1-20-218">psb_cuda_MemoryPeakBandwidth</a></span>
+href="userhtmlse13.html#Q1-20-220">psb_cuda_MemoryPeakBandwidth</a></span>
   </div>
--- a/docs/html/userhtmlli2.html
+++ b/docs/html/userhtmlli2.html
@ -16,194 +16,210 @@ href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
 href="#tailuserhtmlli2.html">tail</a>] [<a 
 href="userhtml.html# " >up</a>] </p></div>
   <h3 class="likesectionHead"><a 
- id="x21-169000"></a>References</h3>
+ id="x21-170000"></a>References</h3>
 <!--l. 2--><p class="noindent" >
    <div class="thebibliography">
    <p class="bibitem" ><span class="biblabel">
  [1]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XPARA04FOREST"></a>G.&#x00A0;Bella, S.&#x00A0;Filippone, A.&#x00A0;De Maio and M.&#x00A0;Testa, <span 
-class="cmti-10">A Simulation Model</span>
+class="pplri7t-">A Simulation Model</span>
    <span 
-class="cmti-10">for  Forest  Fires</span>,  in  J.&#x00A0;Dongarra,  K.&#x00A0;Madsen,  J.&#x00A0;Wasniewski,  editors,
+class="pplri7t-">for   Forest   Fires</span>,   in   J.&#x00A0;Dongarra,   K.&#x00A0;Madsen,   J.&#x00A0;Wasniewski,   editors,
    Proceedings  of  PARA&#x00A0;04  Workshop  on  State  of  the  Art  in  Scientific
    Computing, pp.&#x00A0;546&#8211;553, Lecture Notes in Computer Science, Springer,
    2005.
    </p>
    <p class="bibitem" ><span class="biblabel">
  [2]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XBERTACCINIFILIPPONE"></a>D.          Bertaccini&#x00A0;and&#x00A0;S.          Filippone,          <span 
 class="pplri7t-">Sparse          approximate</span>
    <span 
 class="pplri7t-">inverse preconditioners on high performance GPU platforms</span>, Comput. Math.
    Appl., 71, (2016), no.&#x00A0;3, 693&#8211;711.
    </p>
    <p class="bibitem" ><span class="biblabel">
  [3]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="X2007d"></a>A.  Buttari,  D.  di  Serafino,  P.  D&#8217;Ambra,  S.  Filippone,   2LEV-D2P4:
    a  package  of  high-performance  preconditioners,  Applicable  Algebra  in
-    Engineering, Communications and Computing, Volume 18, Number 3, May,
+    Engineering,  Communications  and  Computing,  Volume  18,  Number  3,
-    2007, pp. 223-239
+    May, 2007, pp. 223-239
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [3]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [4]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
- id="X2007c"></a>P.  D&#8217;Ambra,  S.  Filippone,  D.  Di  Serafino   On  the  Development
+ id="X2007c"></a>P.  D&#8217;Ambra,  S.  Filippone,  D.  Di  Serafino   On  the  Development  of
-    of  PSBLAS-based  Parallel  Two-level  Schwarz  Preconditioners   Applied
+    PSBLAS-based  Parallel  Two-level  Schwarz  Preconditioners      Applied
    Numerical   Mathematics,   Elsevier   Science,   Volume   57,   Issues   11-12,
    November-December 2007, Pages 1181-1196.
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [4]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [5]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XBLAS2"></a>Dongarra,  J.  J.,  DuCroz,  J.,  Hammarling,  S.  and  Hanson,  R.,  An
    Extended Set of Fortran Basic Linear Algebra Subprograms, ACM Trans.
    Math. Softw. vol.&#x00A0;14, 1&#8211;17, 1988.
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [5]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [6]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XBLAS3"></a>Dongarra, J., DuCroz, J., Hammarling, S. and Duff, I., A Set of level
    3 Basic Linear Algebra Subprograms, ACM Trans. Math. Softw. vol.&#x00A0;16,
    1&#8211;17, 1990.
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [6]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [7]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XBLACS"></a>J.&#x00A0;J.&#x00A0;Dongarra   and   R.&#x00A0;C.&#x00A0;Whaley,   <span 
-class="cmti-10">A  User&#8217;s  Guide  to  the  BLACS</span>
+class="pplri7t-">A   User&#8217;s   Guide   to   the   BLACS</span>
    <span 
-class="cmti-10">v.</span><span 
+class="pplri7t-">v.</span><span 
-class="cmti-10">&#x00A0;1.1</span>, Lapack Working Note 94, Tech.&#x00A0;Rep.&#x00A0;UT-CS-95-281, University of
+class="pplri7t-">&#x00A0;1.1</span>, Lapack Working Note 94, Tech.&#x00A0;Rep.&#x00A0;UT-CS-95-281, University of
    Tennessee, March 1995 (updated May 1997).
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [7]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [8]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="Xsblas97"></a>I.&#x00A0;Duff, M.&#x00A0;Marrone, G.&#x00A0;Radicati and C.&#x00A0;Vittoli, <span 
-class="cmti-10">Level 3 Basic Linear</span>
+class="pplri7t-">Level 3 Basic Linear</span>
    <span 
-class="cmti-10">Algebra Subprograms for Sparse Matrices: a User Level Interface</span>, ACM
+class="pplri7t-">Algebra  Subprograms  for  Sparse  Matrices:  a  User  Level  Interface</span>,  ACM
    Transactions on Mathematical Software, 23(3), pp.&#x00A0;379&#8211;401, 1997.
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [8]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [9]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="Xsblas02"></a>I.&#x00A0;Duff, M.&#x00A0;Heroux and R.&#x00A0;Pozo, <span 
-class="cmti-10">An Overview of the Sparse Basic</span>
+class="pplri7t-">An Overview of the Sparse Basic Linear</span>
    <span 
 class="cmti-10">Linear Algebra Subprograms: the New Standard from the BLAS Technical</span>
    <span 
-class="cmti-10">Forum</span>, ACM Transactions on Mathematical Software, 28(2), pp.&#x00A0;239&#8211;267,
+class="pplri7t-">Algebra Subprograms: the New Standard from the BLAS Technical Forum</span>, ACM
-    2002.
+    Transactions on Mathematical Software, 28(2), pp.&#x00A0;239&#8211;267, 2002.
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [9]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [10]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XPSBLAS"></a>S.&#x00A0;Filippone   and   M.&#x00A0;Colajanni,   <span 
-class="cmti-10">PSBLAS:  A  Library  for  Parallel</span>
+class="pplri7t-">PSBLAS:   A   Library   for   Parallel</span>
    <span 
-class="cmti-10">Linear Algebra Computation on Sparse Matrices</span>,  ACM Transactions on
+class="pplri7t-">Linear  Algebra  Computation  on  Sparse  Matrices</span>,     ACM  Transactions  on
    Mathematical Software, 26(4), pp.&#x00A0;527&#8211;550, 2000.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [10]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [11]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XSparse03"></a>S.&#x00A0;Filippone   and   A.&#x00A0;Buttari,   <span 
-class="cmti-10">Object-Oriented Techniques for Sparse</span>
+class="pplri7t-">Object-Oriented   Techniques   for   Sparse</span>
    <span 
-class="cmti-10">Matrix Computations in Fortran 2003</span>, ACM Transactions on Mathematical
+class="pplri7t-">Matrix Computations in Fortran 2003</span>,  ACM Transactions on Mathematical
    Software, 38(4), 2012.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [11]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [12]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XKIVA3PSBLAS"></a>S.&#x00A0;Filippone,  P.&#x00A0;D&#8217;Ambra,  M.&#x00A0;Colajanni,  <span 
-class="cmti-10">Using  a  Parallel  Library</span>
+class="pplri7t-">Using  a  Parallel  Library  of</span>
    <span 
-class="cmti-10">of  Sparse  Linear  Algebra  in  a  Fluid  Dynamics  Applications  Code  on</span>
+class="pplri7t-">Sparse  Linear  Algebra  in  a  Fluid  Dynamics  Applications  Code  on  Linux</span>
    <span 
-class="cmti-10">Linux Clusters</span>, in G.&#x00A0;Joubert, A.&#x00A0;Murli, F.&#x00A0;Peters, M.&#x00A0;Vanneschi, editors,
+class="pplri7t-">Clusters</span>,   in   G.&#x00A0;Joubert,   A.&#x00A0;Murli,   F.&#x00A0;Peters,   M.&#x00A0;Vanneschi,   editors,
    Parallel Computing - Advances &amp; Current Issues, pp.&#x00A0;441&#8211;448, Imperial
    College Press, 2002.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [12]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [13]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XDesignPatterns"></a> Gamma, E.,   Helm, R.,   Johnson, R.,   and   Vlissides, J. 1995. <span 
-class="cmti-10">Design</span>
+class="pplri7t-">Design</span>
    <span 
-class="cmti-10">Patterns: Elements of Reusable Object-Oriented Software</span>. Addison-Wesley.
+class="pplri7t-">Patterns: Elements of Reusable Object-Oriented Software</span>. Addison-Wesley.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [13]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [14]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XMETIS"></a>Karypis,  G.  and  Kumar,  V.,  <span 
-class="cmti-10">METIS: Unstructured Graph Partitioning</span>
+class="pplri7t-">METIS:  Unstructured  Graph  Partitioning</span>
    <span 
-class="cmti-10">and Sparse Matrix Ordering System</span>. Minneapolis, MN 55455: University
+class="pplri7t-">and  Sparse  Matrix  Ordering  System</span>.  Minneapolis,  MN  55455:  University
    of Minnesota, Department of Computer Science, 1995. Internet Address:
    <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">http://www.cs.umn.edu/~karypis</span></span></span>.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [14]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [15]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XBLAS1"></a>Lawson,  C.,  Hanson,  R.,  Kincaid,  D.  and  Krogh,  F.,  Basic  Linear
    Algebra Subprograms for Fortran usage, ACM Trans. Math. Softw. vol.&#x00A0;5,
    38&#8211;329, 1979.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [15]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [16]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="Xmachiels"></a>Machiels,  L.  and  Deville,  M.  <span 
-class="cmti-10">Fortran 90: An entry to object-oriented</span>
+class="pplri7t-">Fortran  90:  An  entry  to  object-oriented</span>
    <span 
-class="cmti-10">programming for the solution of partial differential equations. </span>ACM Trans.
+class="pplri7t-">programming  for  the  solution  of  partial  differential  equations.  </span>ACM  Trans.
    Math. Softw. vol.&#x00A0;23, 32&#8211;49.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [16]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [17]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="Xmetcalf"></a>Metcalf, M., Reid, J. and Cohen, M. <span 
-class="cmti-10">Fortran 95/2003 explained. </span>Oxford
+class="pplri7t-">Fortran 95/2003 explained. </span>Oxford
    University Press, 2004.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [17]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [18]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XMRC:11"></a>Metcalf, M., Reid, J. and Cohen, M. <span 
-class="cmti-10">Modern Fortran explained. </span>Oxford
+class="pplri7t-">Modern Fortran explained. </span>Oxford
    University Press, 2011.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [18]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [19]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XRouXiaXu:11"></a>Rouson,   D.W.I.,   Xia,   J.,   Xu,   X.:   Scientific   Software   Design:   The
    Object-Oriented Way. Cambridge University Press (2011)
    </p>
    <p class="bibitem" ><span class="biblabel">
- [19]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [20]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XMPI1"></a>M.&#x00A0;Snir,  S.&#x00A0;Otto,  S.&#x00A0;Huss-Lederman,  D.&#x00A0;Walker  and  J.&#x00A0;Dongarra,
    <span 
-class="cmti-10">MPI: The Complete Reference. Volume 1 - The MPI Core</span>, second edition,
+class="pplri7t-">MPI: The Complete Reference. Volume 1 - The MPI Core</span>, second edition, MIT
-    MIT Press, 1998.
+    Press, 1998.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [20]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span>
+ [21]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
-    <a 
+ id="XDesPat:11"></a>D.&#x00A0;Barbieri,                                                                              V.&#x00A0;Cardellini,
- id="XDesPat:11"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, S.&#x00A0;Filippone and D.&#x00A0;Rouson <span 
+    S.&#x00A0;Filippone  and  D.&#x00A0;Rouson  <span 
-class="cmti-10">Design Patterns</span>
+class="pplri7t-">Design  Patterns  for  Scientific  Computations</span>
    <span 
-class="cmti-10">for Scientific Computations on Sparse Matrices</span>, HPSS 2011, Algorithms
+class="pplri7t-">on  Sparse  Matrices</span>,  HPSS  2011,  Algorithms  and  Programming  Tools  for
-    and Programming Tools for Next-Generation High-Performance Scientific
+    Next-Generation  High-Performance  Scientific  Software,  Bordeaux,  Sep.
-    Software, Bordeaux, Sep. 2011
+    2011
    </p>
    <p class="bibitem" ><span class="biblabel">
- [21]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [22]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XCaFiRo:2014"></a> Cardellini, V.,   Filippone, S.,   and   Rouson, D. 2014, Design patterns
    for sparse-matrix computations on hybrid CPU/GPU platforms, <span 
-class="cmti-10">Scientific</span>
+class="pplri7t-">Scientific</span>
    <span 
-class="cmti-10">Programming</span>&#x00A0;<span 
+class="pplri7t-">Programming</span>&#x00A0;<span 
-class="cmti-10">22,</span>&#x00A0;1, 1&#8211;19.
+class="pplri7t-">22,</span>&#x00A0;1, 1&#8211;19.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [22]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [23]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XOurTechRep"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, A.&#x00A0;Fanfarillo, S.&#x00A0;Filippone, Three storage
    formats  for  sparse  matrices  on  GPGPUs,  Tech.  Rep.  DICII  RR-15.6,
    Università di Roma Tor Vergata (February 2015).
    </p>
    <p class="bibitem" ><span class="biblabel">
 [24]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XFilippone:2017:SMM:3034774.3017994"></a>S.&#x00A0;Filippone,  V.&#x00A0;Cardellini,  D.&#x00A0;Barbieri,  and  A.&#x00A0;Fanfarillo.  Sparse
    matrix-vector   multiplication   on   GPGPUs.   <span 
 class="pplri7t-">ACM   Trans.   Math.   Softw.</span>,
    43(4):30:1&#8211;30:49, 2017.
 </p>
    </div>
-   <!--l. 130--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 138--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse13.html" >prev</a>] [<a 
 href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a 
 href="userhtmlli2.html" >front</a>] [<a 
 href="userhtml.html# " >up</a>] </p></div>
-<!--l. 130--><p class="indent" >   <a 
+<!--l. 138--><p class="indent" >   <a 
 id="tailuserhtmlli2.html"></a> 
 </body></html> 
--- a/docs/html/userhtmlse1.html
+++ b/docs/html/userhtmlse1.html
@ -20,60 +20,62 @@ href="userhtml.html#userhtmlse1.html" >up</a>] </p></div>
 id="x3-20001"></a>Introduction</h3>
 <!--l. 3--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of
 computationally intensive scientific applications, is designed to address parallel
-implementation of iterative solvers for sparse linear systems through the distributed
+implementation of iterative solvers for sparse linear systems through the
-memory paradigm. It includes routines for multiplying sparse matrices by dense
+distributed memory paradigm. It includes routines for multiplying sparse
-matrices, solving block diagonal systems with triangular diagonal entries,
+matrices by dense matrices, solving block diagonal systems with triangular
-preprocessing sparse matrices, and contains additional routines for dense matrix
+diagonal entries, preprocessing sparse matrices, and contains additional
-operations. The current implementation of PSBLAS addresses a distributed memory
+routines for dense matrix operations. The current implementation of PSBLAS
-execution model operating with message passing.
+addresses a distributed memory execution model operating with message
 passing.
 <!--l. 14--><p class="indent" >   The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2003&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#Xmetcalf">16</a>]</span>
+href="userhtmlli2.html#Xmetcalf">17</a>]</span>
 programming language, with reuse and/or adaptation of existing Fortran&#x00A0;77 and
 Fortran&#x00A0;95 software, plus a handful of C routines.
-<!--l. 19--><p class="indent" >   The use of Fortran&#x00A0;2003 offers a number of advantages over Fortran&#x00A0;95, mostly in
+<!--l. 19--><p class="indent" >   The use of Fortran&#x00A0;2003 offers a number of advantages over Fortran&#x00A0;95, mostly
-the handling of requirements for evolution and adaptation of the library to new
+in the handling of requirements for evolution and adaptation of the library to new
-computing architectures and integration of new algorithms. For a detailed discussion
+computing architectures and integration of new algorithms. For a detailed
-of our design see&#x00A0;<span class="cite">[<a 
+discussion of our design see&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XSparse03">10</a>]</span>; other works discussing advanced programming in Fortran&#x00A0;2003
+href="userhtmlli2.html#XSparse03">11</a>]</span>; other works discussing advanced programming in
-include&#x00A0;<span class="cite">[<a 
+Fortran&#x00A0;2003 include&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XDesPat:11">20</a>,&#x00A0;<a 
+href="userhtmlli2.html#XDesPat:11">21</a>,&#x00A0;<a 
-href="userhtmlli2.html#XRouXiaXu:11">18</a>]</span>; sufficient support for Fortran&#x00A0;2003 is now available from many
+href="userhtmlli2.html#XRouXiaXu:11">19</a>]</span>; sufficient support for Fortran&#x00A0;2003 is now available
-compilers, including the GNU Fortran compiler from the Free Software Foundation
+from many compilers, including the GNU Fortran compiler from the Free Software
-(as of version 4.8).
+Foundation (as of version 4.8).
 <!--l. 30--><p class="indent" >   Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
-object-based design, with other languages; these have been advocated by a number of
+object-based design, with other languages; these have been advocated by a number
-authors, e.g.&#x00A0;<span class="cite">[<a 
+of authors, e.g.&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#Xmachiels">15</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
+href="userhtmlli2.html#Xmachiels">16</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
 management and interface overloading greatly enhance the usability of the PSBLAS
 subroutines. In this way, the library can take care of runtime memory requirements
 that are quite difficult or even impossible to predict at implementation or
 compilation time.
 <!--l. 40--><p class="indent" >   The presentation of the PSBLAS library follows the general structure of the
 proposal for serial Sparse BLAS&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#Xsblas97">7</a>,&#x00A0;<a 
+href="userhtmlli2.html#Xsblas97">8</a>,&#x00A0;<a 
-href="userhtmlli2.html#Xsblas02">8</a>]</span>, which in its turn is based on the proposal for
+href="userhtmlli2.html#Xsblas02">9</a>]</span>, which in its turn is based on the proposal for
 BLAS on dense matrices&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XBLAS1">14</a>,&#x00A0;<a 
+href="userhtmlli2.html#XBLAS1">15</a>,&#x00A0;<a 
-href="userhtmlli2.html#XBLAS2">4</a>,&#x00A0;<a 
+href="userhtmlli2.html#XBLAS2">5</a>,&#x00A0;<a 
-href="userhtmlli2.html#XBLAS3">5</a>]</span>.
+href="userhtmlli2.html#XBLAS3">6</a>]</span>.
-<!--l. 45--><p class="indent" >   The applicability of sparse iterative solvers to many different areas causes some
+<!--l. 45--><p class="indent" >   The applicability of sparse iterative solvers to many different areas causes
-terminology problems because the same concept may be denoted through different
+some terminology problems because the same concept may be denoted
-names depending on the application area. The PSBLAS features presented in this
+through different names depending on the application area. The PSBLAS
-document will be discussed referring to a finite difference discretization of a Partial
+features presented in this document will be discussed referring to a finite
-Differential Equation (PDE). However, the scope of the library is wider than that: for
+difference discretization of a Partial Differential Equation (PDE). However,
-example, it can be applied to finite element discretizations of PDEs, and even to
+the scope of the library is wider than that: for example, it can be applied
-different classes of problems such as nonlinear optimization, for example in optimal
+to finite element discretizations of PDEs, and even to different classes of
-control problems.
+problems such as nonlinear optimization, for example in optimal control
 problems.
 <!--l. 55--><p class="indent" >   The design of a solver for sparse linear systems is driven by many conflicting
 objectives, such as limiting occupation of storage resources, exploiting regularities in
 the input data, exploiting hardware characteristics of the parallel platform. To
 achieve an optimal communication to computation ratio on distributed memory
 machines it is essential to keep the <span 
 class="cmti-10">data locality </span>as high as possible; this can be
 done through an appropriate data allocation strategy. The choice of the
 machines it is essential to keep the <span 
 class="pplri7t-">data locality </span>as high as possible; this can
 be done through an appropriate data allocation strategy. The choice of the
 preconditioner is another very important factor that affects efficiency of the
 implemented application. Optimal data distribution requirements for a given
 preconditioner may conflict with distribution requirements of the rest of the solver.
--- a/docs/html/userhtmlse10.html
+++ b/docs/html/userhtmlse10.html
--- a/docs/html/userhtmlse11.html
+++ b/docs/html/userhtmlse11.html
@ -16,7 +16,7 @@ href="userhtmlse10.html#tailuserhtmlse10.html" >prev-tail</a>] [<a
 href="userhtmlse8.html#tailuserhtmlse11.html">tail</a>] [<a 
 href="userhtml.html# " >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">11    </span> <a 
- id="x17-14200011"></a>Iterative Methods</h3>
+ id="x17-14300011"></a>Iterative Methods</h3>
 <!--l. 4--><p class="noindent" >In this chapter we provide routines for preconditioners and iterative methods. The
 interfaces for iterative methods are available in the module <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_linsolve_mod</span></span></span>.
@ -24,39 +24,39 @@ class="cmtt-10">psb_linsolve_mod</span></span></span>.
   <h4 class="subsectionHead"><span class="titlemark">11.1    </span> <a 
- id="x17-14300011.1"></a>psb_krylov &#8212; Krylov Methods Driver Routine</h4>
+ id="x17-14400011.1"></a>psb_krylov  &#8212; Krylov Methods Driver Routine</h4>
 <!--l. 17--><p class="noindent" >This subroutine is a driver that provides a general interface for all the Krylov-Subspace
 family methods implemented in PSBLAS version 2.
 <!--l. 20--><p class="indent" >   The stopping criterion can take the following values:
     <dl class="description"><dt class="description">
     <!--l. 22--><p class="noindent" >
 <span 
-class="cmbx-10">1</span> </dt><dd 
+class="pplb7t-">1</span> </dt><dd 
 class="description">
     <!--l. 22--><p class="noindent" >normwise backward error in the infinity norm; the iteration is stopped
     when
 <div class="math-display" >
 <img 
-src="userhtml30x.png" alt="      -----&#x2225;ri&#x2225;------
+src="userhtml30x.png" alt="     ------&#x2225;ri&#x2225;------
 err = (&#x2225;A &#x2225;&#x2225;xi&#x2225; + &#x2225;b&#x2225;) &#x003C; eps
 " class="math-display" ></div>
     <!--l. 24--><p class="nopar" >
     </dd><dt class="description">
     <!--l. 25--><p class="noindent" >
 <span 
-class="cmbx-10">2</span> </dt><dd 
+class="pplb7t-">2</span> </dt><dd 
 class="description">
     <!--l. 25--><p class="noindent" >Relative residual in the 2-norm; the iteration is stopped when
 <div class="math-display" >
 <img 
-src="userhtml31x.png" alt="      &#x2225;ri&#x2225;-
+src="userhtml31x.png" alt="      &#x2225;ri&#x2225;
-err = &#x2225;b&#x2225;2 &#x003C; eps
+err = &#x2225;b&#x2225;-2 &#x003C; eps
 " class="math-display" ></div>
     <!--l. 27--><p class="nopar" >
     </dd><dt class="description">
     <!--l. 28--><p class="noindent" >
 <span 
-class="cmbx-10">3</span> </dt><dd 
+class="pplb7t-">3</span> </dt><dd 
 class="description">
     <!--l. 28--><p class="noindent" >Relative residual reduction in the 2-norm; the iteration is stopped when
 <div class="math-display" >
@ -66,23 +66,24 @@ err = &#x2225;r0&#x2225;2 &#x003C; eps
 " class="math-display" ></div>
     <!--l. 30--><p class="nopar" ></dd></dl>
 <!--l. 32--><p class="noindent" >The behaviour is controlled by the istop argument (see later). In the above formulae, <span 
-class="cmmi-10">x</span><sub><span 
+class="zplmr7m-">x</span><sub><span 
-class="cmmi-7">i</span></sub>
+class="zplmr7m-x-x-76">i</span></sub>
 is the tentative solution and <span 
-class="cmmi-10">r</span><sub><span 
+class="zplmr7m-">r</span><sub><span 
-class="cmmi-7">i</span></sub> = <span 
+class="zplmr7m-x-x-76">i</span></sub> <span 
-class="cmmi-10">b </span><span 
+class="zplmr7t-">= </span><span 
-class="cmsy-10">- </span><span 
+class="zplmr7m-">b</span><span 
-class="cmmi-10">Ax</span><sub><span 
+class="zplmr7y-">-</span><span 
-class="cmmi-7">i</span></sub> the corresponding residual at the <span 
+class="zplmr7m-">Ax</span><sub><span 
-class="cmmi-10">i</span>-th
+class="zplmr7m-x-x-76">i</span></sub> the corresponding residual at the <span 
 class="zplmr7m-">i</span>-th
 iteration.
   <!--l. 37-->
-   <pre class="lstlisting" id="listing-167"><span class="label"><a 
+   <pre class="lstlisting" id="listing-218"><span class="label"><a 
- id="x17-143001r1"></a></span><span style="color:#000000"><span 
+ id="x17-144001r1"></a></span><span style="color:#000000"><span 
 class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
 class="cmtt-10">psb_krylov</span></span><span style="color:#000000"><span 
 class="cmtt-10">(</span></span><span style="color:#000000"><span 
@ -103,7 +104,7 @@ class="cmtt-10">,</span></span><span style="color:#000000"><span
 class="cmtt-10">info</span></span><span style="color:#000000"><span 
 class="cmtt-10">,&amp;</span></span> 
 <span class="label"><a 
- id="x17-143002r2"></a></span><span 
+ id="x17-144002r2"></a></span><span 
 class="cmtt-10">&#x00A0;</span><span 
 class="cmtt-10">&#x00A0;</span><span 
 class="cmtt-10">&#x00A0;</span><span 
@ -128,58 +129,58 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 43--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 43--><p class="noindent" >Synchronous.
     </dd><dt class="description">
     <!--l. 44--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span> </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 44--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 45--><p class="noindent" >
 <span 
-class="cmbx-10">method</span> </dt><dd 
+class="pplb7t-">method</span> </dt><dd 
 class="description">
     <!--l. 45--><p class="noindent" >a string that defines the iterative method to be used. Supported values
     are:
         <dl class="description"><dt class="description">
         <!--l. 48--><p class="noindent" >
     <span 
-class="cmbx-10">CG:</span> </dt><dd 
+class="pplb7t-">CG:</span> </dt><dd 
 class="description">
         <!--l. 48--><p class="noindent" >the Conjugate Gradient method;
         </dd><dt class="description">
         <!--l. 49--><p class="noindent" >
     <span 
-class="cmbx-10">CGS:</span> </dt><dd 
+class="pplb7t-">CGS:</span> </dt><dd 
 class="description">
         <!--l. 49--><p class="noindent" >the Conjugate Gradient Stabilized method;
         </dd><dt class="description">
         <!--l. 51--><p class="noindent" >
     <span 
-class="cmbx-10">GCR:</span> </dt><dd 
+class="pplb7t-">GCR:</span> </dt><dd 
 class="description">
         <!--l. 51--><p class="noindent" >the Generalized Conjugate Residual method;
         </dd><dt class="description">
         <!--l. 52--><p class="noindent" >
     <span 
-class="cmbx-10">FCG:</span> </dt><dd 
+class="pplb7t-">FCG:</span> </dt><dd 
 class="description">
         <!--l. 52--><p class="noindent" >the Flexible Conjugate Gradient method<span class="footnote-mark"><a 
 href="userhtml18.html#fn5x0"><sup class="textsuperscript">5</sup></a></span><a 
- id="x17-143003f5"></a> ;
+ id="x17-144003f5"></a> ;
         </dd><dt class="description">
         <!--l. 55--><p class="noindent" >
     <span 
-class="cmbx-10">BICG:</span> </dt><dd 
+class="pplb7t-">BICG:</span> </dt><dd 
 class="description">
         <!--l. 55--><p class="noindent" >the Bi-Conjugate Gradient method;
         </dd><dt class="description">
         <!--l. 56--><p class="noindent" >
     <span 
-class="cmbx-10">BICGSTAB:</span> </dt><dd 
+class="pplb7t-">BICGSTAB:</span> </dt><dd 
 class="description">
         <!--l. 56--><p class="noindent" >the Bi-Conjugate Gradient Stabilized method;
@ -188,28 +189,28 @@ class="description">
         </dd><dt class="description">
         <!--l. 57--><p class="noindent" >
     <span 
-class="cmbx-10">BICGSTABL:</span> </dt><dd 
+class="pplb7t-">BICGSTABL:</span> </dt><dd 
 class="description">
         <!--l. 57--><p class="noindent" >the Bi-Conjugate Gradient Stabilized method with restarting;
         </dd><dt class="description">
         <!--l. 58--><p class="noindent" >
     <span 
-class="cmbx-10">RGMRES:</span> </dt><dd 
+class="pplb7t-">RGMRES:</span> </dt><dd 
 class="description">
         <!--l. 58--><p class="noindent" >the Generalized Minimal Residual method with restarting.</dd></dl>
     </dd><dt class="description">
     <!--l. 60--><p class="noindent" >
 <span 
-class="cmbx-10">a</span> </dt><dd 
+class="pplb7t-">a</span> </dt><dd 
 class="description">
     <!--l. 60--><p class="noindent" >the local portion of global sparse matrix <span 
-class="cmmi-10">A</span>. <br 
+class="zplmr7m-">A</span>. <br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a structured data of type <a 
 href="userhtmlse3.html#spdata"><span 
 class="cmtt-10">psb</span><span 
@ -218,32 +219,32 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 66--><p class="noindent" >
 <span 
-class="cmbx-10">prec</span> </dt><dd 
+class="pplb7t-">prec</span> </dt><dd 
 class="description">
     <!--l. 66--><p class="noindent" >The data structure containing the preconditioner.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a structured data of type <a 
 href="userhtmlse3.html#precdata"><span 
 class="cmtt-10">psb</span><span 
-class="cmtt-10">_prec</span><span 
+class="cmtt-10">_Tprec</span><span 
 class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 71--><p class="noindent" >
 <span 
-class="cmbx-10">b</span> </dt><dd 
+class="pplb7t-">b</span> </dt><dd 
 class="description">
     <!--l. 71--><p class="noindent" >The RHS vector. <br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a rank one array or an object of type <a 
 href="userhtmlse3.html#vdata"><span 
 class="cmtt-10">psb</span><span 
@ -253,15 +254,15 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 76--><p class="noindent" >
 <span 
-class="cmbx-10">x</span> </dt><dd 
+class="pplb7t-">x</span> </dt><dd 
 class="description">
     <!--l. 76--><p class="noindent" >The initial guess. <br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">inout</span>.<br 
+class="pplb7t-">inout</span>.<br 
 class="newline" />Specified as: a rank one array or an object of type <a 
 href="userhtmlse3.html#vdata"><span 
 class="cmtt-10">psb</span><span 
@ -271,32 +272,32 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 81--><p class="noindent" >
 <span 
-class="cmbx-10">eps</span> </dt><dd 
+class="pplb7t-">eps</span> </dt><dd 
 class="description">
     <!--l. 81--><p class="noindent" >The stopping tolerance. <br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a real number.
     </dd><dt class="description">
     <!--l. 86--><p class="noindent" >
 <span 
-class="cmbx-10">desc</span><span 
+class="pplb7t-">desc</span><span 
-class="cmbx-10">_a</span> </dt><dd 
+class="pplb7t-">_a</span> </dt><dd 
 class="description">
     <!--l. 86--><p class="noindent" >contains data structures for communications.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a structured data of type <a 
 href="userhtmlse3.html#descdata"><span 
 class="cmtt-10">psb</span><span 
@ -305,90 +306,93 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 91--><p class="noindent" >
 <span 
-class="cmbx-10">itmax</span> </dt><dd 
+class="pplb7t-">itmax</span> </dt><dd 
 class="description">
     <!--l. 91--><p class="noindent" >The maximum number of iterations to perform.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Default: <span 
-class="cmmi-10">itmax </span>= 1000.<br 
+class="zplmr7m-">itmax </span><span 
 class="zplmr7t-">= </span>1000.<br 
 class="newline" />Specified as: an integer variable <span 
-class="cmmi-10">itmax </span><span 
+class="zplmr7m-">itmax </span><span 
-class="cmsy-10">&#x2265; </span>1.
+class="zplmr7y-">&#x2265; </span>1.
     </dd><dt class="description">
     <!--l. 97--><p class="noindent" >
 <span 
-class="cmbx-10">itrace</span> </dt><dd 
+class="pplb7t-">itrace</span> </dt><dd 
 class="description">
     <!--l. 97--><p class="noindent" >If <span 
-class="cmmi-10">&#x003E; </span>0 print out an informational message about convergence every <span 
+class="zplmr7m-">&#x003E; </span>0 print out an informational message about convergence every <span 
-class="cmmi-10">itrace</span>
+class="zplmr7m-">itrace</span>
-     iterations. If = 0 print a message in case of convergence failure.<br 
+     iterations. If <span 
 class="zplmr7t-">= </span>0 print a message in case of convergence failure.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Default: <span 
-class="cmmi-10">itrace </span>= <span 
+class="zplmr7m-">itrace </span><span 
-class="cmsy-10">-</span>1.<br 
+class="zplmr7t-">= </span><span 
 class="zplmr7y-">-</span>1.<br 
 class="newline" />
     </dd><dt class="description">
     <!--l. 104--><p class="noindent" >
 <span 
-class="cmbx-10">irst</span> </dt><dd 
+class="pplb7t-">irst</span> </dt><dd 
 class="description">
     <!--l. 104--><p class="noindent" >An integer specifying the restart parameter.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Values: <span 
-class="cmmi-10">irst &#x003E; </span>0. This is employed for the BiCGSTABL or RGMRES methods,
+class="zplmr7m-">irst &#x003E; </span>0. This is employed for the BiCGSTABL or RGMRES methods,
     otherwise it is ignored.
     </dd><dt class="description">
     <!--l. 111--><p class="noindent" >
 <span 
-class="cmbx-10">istop</span> </dt><dd 
+class="pplb7t-">istop</span> </dt><dd 
 class="description">
     <!--l. 111--><p class="noindent" >An integer specifying the stopping criterion.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Values: 1: use the normwise backward error, 2: use the scaled 2-norm
     of the residual, 3: use the residual reduction in the 2-norm. Default:
     2.
     </dd><dt class="description">
     <!--l. 117--><p class="noindent" >
 <span 
-class="cmbx-10">On Return</span> </dt><dd 
+class="pplb7t-">On Return</span> </dt><dd 
 class="description">
     <!--l. 117--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 118--><p class="noindent" >
 <span 
-class="cmbx-10">x</span> </dt><dd 
+class="pplb7t-">x</span> </dt><dd 
 class="description">
     <!--l. 118--><p class="noindent" >The computed solution. <br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">inout</span>.<br 
+class="pplb7t-">inout</span>.<br 
 class="newline" />Specified as: a rank one array or an object of type <a 
 href="userhtmlse3.html#vdata"><span 
 class="cmtt-10">psb</span><span 
@ -398,65 +402,65 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 123--><p class="noindent" >
 <span 
-class="cmbx-10">iter</span> </dt><dd 
+class="pplb7t-">iter</span> </dt><dd 
 class="description">
     <!--l. 123--><p class="noindent" >The number of iterations performed.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">out</span>.<br 
+class="pplb7t-">out</span>.<br 
 class="newline" />Returned as: an integer variable.
     </dd><dt class="description">
     <!--l. 128--><p class="noindent" >
 <span 
-class="cmbx-10">err</span> </dt><dd 
+class="pplb7t-">err</span> </dt><dd 
 class="description">
     <!--l. 128--><p class="noindent" >The convergence estimate on exit.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">out</span>.<br 
+class="pplb7t-">out</span>.<br 
 class="newline" />Returned as: a real number.
     </dd><dt class="description">
     <!--l. 133--><p class="noindent" >
 <span 
-class="cmbx-10">cond</span> </dt><dd 
+class="pplb7t-">cond</span> </dt><dd 
 class="description">
     <!--l. 133--><p class="noindent" >An estimate of the condition number of matrix <span 
-class="cmmi-10">A</span>; only available with the <span 
+class="zplmr7m-">A</span>; only available with the <span 
-class="cmmi-10">CG</span>
+class="zplmr7m-">CG</span>
     method on real data.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">out</span>.<br 
+class="pplb7t-">out</span>.<br 
 class="newline" />Returned as: a real number. A correct result will be greater than or
     equal to one; if specified for non-real data, or an error occurred, zero is
     returned.
     </dd><dt class="description">
     <!--l. 141--><p class="noindent" >
 <span 
-class="cmbx-10">info</span> </dt><dd 
+class="pplb7t-">info</span> </dt><dd 
 class="description">
     <!--l. 141--><p class="noindent" >Error code.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">out</span>.<br 
+class="pplb7t-">out</span>.<br 
 class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
   <h4 class="subsectionHead"><span class="titlemark">11.2    </span> <a 
- id="x17-14400011.2"></a>psb_richardson &#8212; Richardson Iteration Driver Routine</h4>
+ id="x17-14500011.2"></a>psb_richardson  &#8212; Richardson Iteration Driver Routine</h4>
 <!--l. 158--><p class="noindent" >This subroutine is a driver implementig a Richardson iteration
   <div class="math-display" >
 <img 
@ -464,25 +468,25 @@ src="userhtml33x.png" alt="x   = M - 1(b - Ax )+ x ,
 k+1             k    k
 " class="math-display" ></div>
 <!--l. 159--><p class="nopar" > with the preconditioner operator <span 
-class="cmmi-10">M </span>defined in the previous section.
+class="zplmr7m-">M </span>defined in the previous section.
 <!--l. 162--><p class="indent" >   The stopping criterion can take the following values:
     <dl class="description"><dt class="description">
     <!--l. 164--><p class="noindent" >
 <span 
-class="cmbx-10">1</span> </dt><dd 
+class="pplb7t-">1</span> </dt><dd 
 class="description">
     <!--l. 164--><p class="noindent" >normwise backward error in the infinity norm; the iteration is stopped
     when
 <div class="math-display" >
 <img 
-src="userhtml34x.png" alt="      -----&#x2225;ri&#x2225;------
+src="userhtml34x.png" alt="     ------&#x2225;ri&#x2225;------
 err = (&#x2225;A &#x2225;&#x2225;xi&#x2225; + &#x2225;b&#x2225;) &#x003C; eps
 " class="math-display" ></div>
     <!--l. 166--><p class="nopar" >
     </dd><dt class="description">
     <!--l. 167--><p class="noindent" >
 <span 
-class="cmbx-10">2</span> </dt><dd 
+class="pplb7t-">2</span> </dt><dd 
 class="description">
     <!--l. 167--><p class="noindent" >Relative residual in the 2-norm; the iteration is stopped when
 <div class="math-display" >
@ -494,7 +498,7 @@ err = &#x2225;b&#x2225;2 &#x003C; eps
     </dd><dt class="description">
     <!--l. 170--><p class="noindent" >
 <span 
-class="cmbx-10">3</span> </dt><dd 
+class="pplb7t-">3</span> </dt><dd 
 class="description">
     <!--l. 170--><p class="noindent" >Relative residual reduction in the 2-norm; the iteration is stopped when
 <div class="math-display" >
@ -507,20 +511,21 @@ err = &#x2225;r0&#x2225;2 &#x003C; eps
 <!--l. 174--><p class="noindent" >The behaviour is controlled by the istop argument (see later). In the above formulae, <span 
-class="cmmi-10">x</span><sub><span 
+class="zplmr7m-">x</span><sub><span 
-class="cmmi-7">i</span></sub>
+class="zplmr7m-x-x-76">i</span></sub>
 is the tentative solution and <span 
-class="cmmi-10">r</span><sub><span 
+class="zplmr7m-">r</span><sub><span 
-class="cmmi-7">i</span></sub> = <span 
+class="zplmr7m-x-x-76">i</span></sub> <span 
-class="cmmi-10">b </span><span 
+class="zplmr7t-">= </span><span 
-class="cmsy-10">- </span><span 
+class="zplmr7m-">b</span><span 
-class="cmmi-10">Ax</span><sub><span 
+class="zplmr7y-">-</span><span 
-class="cmmi-7">i</span></sub> the corresponding residual at the <span 
+class="zplmr7m-">Ax</span><sub><span 
-class="cmmi-10">i</span>-th
+class="zplmr7m-x-x-76">i</span></sub> the corresponding residual at the <span 
 class="zplmr7m-">i</span>-th
 iteration.
   <!--l. 179-->
-   <pre class="lstlisting" id="listing-168"><span class="label"><a 
+   <pre class="lstlisting" id="listing-219"><span class="label"><a 
- id="x17-144001r1"></a></span><span style="color:#000000"><span 
+ id="x17-145001r1"></a></span><span style="color:#000000"><span 
 class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
 class="cmtt-10">psb_richardson</span></span><span style="color:#000000"><span 
 class="cmtt-10">(</span></span><span style="color:#000000"><span 
@ -539,7 +544,7 @@ class="cmtt-10">,</span></span><span style="color:#000000"><span
 class="cmtt-10">info</span></span><span style="color:#000000"><span 
 class="cmtt-10">,&amp;</span></span> 
 <span class="label"><a 
- id="x17-144002r2"></a></span><span 
+ id="x17-145002r2"></a></span><span 
 class="cmtt-10">&#x00A0;</span><span 
 class="cmtt-10">&#x00A0;</span><span 
 class="cmtt-10">&#x00A0;</span><span 
@ -560,28 +565,28 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 185--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 185--><p class="noindent" >Synchronous.
     </dd><dt class="description">
     <!--l. 186--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span> </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 186--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 187--><p class="noindent" >
 <span 
-class="cmbx-10">a</span> </dt><dd 
+class="pplb7t-">a</span> </dt><dd 
 class="description">
     <!--l. 187--><p class="noindent" >the local portion of global sparse matrix <span 
-class="cmmi-10">A</span>. <br 
+class="zplmr7m-">A</span>. <br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a structured data of type <a 
 href="userhtmlse3.html#spdata"><span 
 class="cmtt-10">psb</span><span 
@ -590,32 +595,32 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 193--><p class="noindent" >
 <span 
-class="cmbx-10">prec</span> </dt><dd 
+class="pplb7t-">prec</span> </dt><dd 
 class="description">
     <!--l. 193--><p class="noindent" >The data structure containing the preconditioner.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a structured data of type <a 
 href="userhtmlse3.html#precdata"><span 
 class="cmtt-10">psb</span><span 
-class="cmtt-10">_prec</span><span 
+class="cmtt-10">_Tprec</span><span 
 class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 198--><p class="noindent" >
 <span 
-class="cmbx-10">b</span> </dt><dd 
+class="pplb7t-">b</span> </dt><dd 
 class="description">
     <!--l. 198--><p class="noindent" >The RHS vector. <br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a rank one array or an object of type <a 
 href="userhtmlse3.html#vdata"><span 
 class="cmtt-10">psb</span><span 
@ -625,18 +630,18 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 203--><p class="noindent" >
 <span 
-class="cmbx-10">x</span> </dt><dd 
+class="pplb7t-">x</span> </dt><dd 
 class="description">
     <!--l. 203--><p class="noindent" >The initial guess. <br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">inout</span>.<br 
+class="pplb7t-">inout</span>.<br 
 class="newline" />Specified as: a rank one array or an object of type <a 
 href="userhtmlse3.html#vdata"><span 
 class="cmtt-10">psb</span><span 
@ -646,29 +651,29 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 208--><p class="noindent" >
 <span 
-class="cmbx-10">eps</span> </dt><dd 
+class="pplb7t-">eps</span> </dt><dd 
 class="description">
     <!--l. 208--><p class="noindent" >The stopping tolerance. <br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a real number.
     </dd><dt class="description">
     <!--l. 213--><p class="noindent" >
 <span 
-class="cmbx-10">desc</span><span 
+class="pplb7t-">desc</span><span 
-class="cmbx-10">_a</span> </dt><dd 
+class="pplb7t-">_a</span> </dt><dd 
 class="description">
     <!--l. 213--><p class="noindent" >contains data structures for communications.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a structured data of type <a 
 href="userhtmlse3.html#descdata"><span 
 class="cmtt-10">psb</span><span 
@ -677,74 +682,77 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 218--><p class="noindent" >
 <span 
-class="cmbx-10">itmax</span> </dt><dd 
+class="pplb7t-">itmax</span> </dt><dd 
 class="description">
     <!--l. 218--><p class="noindent" >The maximum number of iterations to perform.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Default: <span 
-class="cmmi-10">itmax </span>= 1000.<br 
+class="zplmr7m-">itmax </span><span 
 class="zplmr7t-">= </span>1000.<br 
 class="newline" />Specified as: an integer variable <span 
-class="cmmi-10">itmax </span><span 
+class="zplmr7m-">itmax </span><span 
-class="cmsy-10">&#x2265; </span>1.
+class="zplmr7y-">&#x2265; </span>1.
     </dd><dt class="description">
     <!--l. 224--><p class="noindent" >
 <span 
-class="cmbx-10">itrace</span> </dt><dd 
+class="pplb7t-">itrace</span> </dt><dd 
 class="description">
     <!--l. 224--><p class="noindent" >If <span 
-class="cmmi-10">&#x003E; </span>0 print out an informational message about convergence every <span 
+class="zplmr7m-">&#x003E; </span>0 print out an informational message about convergence every <span 
-class="cmmi-10">itrace</span>
+class="zplmr7m-">itrace</span>
-     iterations. If = 0 print a message in case of convergence failure.<br 
+     iterations. If <span 
 class="zplmr7t-">= </span>0 print a message in case of convergence failure.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Default: <span 
-class="cmmi-10">itrace </span>= <span 
+class="zplmr7m-">itrace </span><span 
-class="cmsy-10">-</span>1.<br 
+class="zplmr7t-">= </span><span 
 class="zplmr7y-">-</span>1.<br 
 class="newline" />
     </dd><dt class="description">
     <!--l. 232--><p class="noindent" >
 <span 
-class="cmbx-10">istop</span> </dt><dd 
+class="pplb7t-">istop</span> </dt><dd 
 class="description">
     <!--l. 232--><p class="noindent" >An integer specifying the stopping criterion.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Values: 1: use the normwise backward error, 2: use the scaled 2-norm of
     the residual, 3: use the residual reduction in the 2-norm. Default: 2.
     </dd><dt class="description">
     <!--l. 238--><p class="noindent" >
 <span 
-class="cmbx-10">On Return</span> </dt><dd 
+class="pplb7t-">On Return</span> </dt><dd 
 class="description">
     <!--l. 238--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 239--><p class="noindent" >
 <span 
-class="cmbx-10">x</span> </dt><dd 
+class="pplb7t-">x</span> </dt><dd 
 class="description">
     <!--l. 239--><p class="noindent" >The computed solution. <br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">inout</span>.<br 
+class="pplb7t-">inout</span>.<br 
 class="newline" />Specified as: a rank one array or an object of type <a 
 href="userhtmlse3.html#vdata"><span 
 class="cmtt-10">psb</span><span 
@ -754,41 +762,41 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 244--><p class="noindent" >
 <span 
-class="cmbx-10">iter</span> </dt><dd 
+class="pplb7t-">iter</span> </dt><dd 
 class="description">
     <!--l. 244--><p class="noindent" >The number of iterations performed.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">out</span>.<br 
+class="pplb7t-">out</span>.<br 
 class="newline" />Returned as: an integer variable.
     </dd><dt class="description">
     <!--l. 249--><p class="noindent" >
 <span 
-class="cmbx-10">err</span> </dt><dd 
+class="pplb7t-">err</span> </dt><dd 
 class="description">
     <!--l. 249--><p class="noindent" >The convergence estimate on exit.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">out</span>.<br 
+class="pplb7t-">out</span>.<br 
 class="newline" />Returned as: a real number.
     </dd><dt class="description">
     <!--l. 254--><p class="noindent" >
 <span 
-class="cmbx-10">info</span> </dt><dd 
+class="pplb7t-">info</span> </dt><dd 
 class="description">
     <!--l. 254--><p class="noindent" >Error code.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">out</span>.<br 
+class="pplb7t-">out</span>.<br 
 class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
--- a/docs/html/userhtmlse12.html
+++ b/docs/html/userhtmlse12.html
@ -16,14 +16,14 @@ href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a
 href="userhtmlse9.html#tailuserhtmlse12.html">tail</a>] [<a 
 href="userhtml.html# " >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">12    </span> <a 
- id="x19-14500012"></a>Extensions</h3>
+ id="x19-14600012"></a>Extensions</h3>
 <!--l. 3--><p class="noindent" >The EXT, CUDA and RSB subdirectories contains a set of extensions to the base
 library. The extensions provide additional storage formats beyond the ones already
 contained in the base library, as well as interfaces to:
     <dl class="description"><dt class="description">
     <!--l. 8--><p class="noindent" >
 <span 
-class="cmbx-10">SPGPU</span> </dt><dd 
+class="pplb7t-">SPGPU</span> </dt><dd 
 class="description">
     <!--l. 8--><p class="noindent" >a                            CUDA                            library                            originally
     published as <a 
@ -34,7 +34,7 @@ class="cmtt-10">cuda</span></span></span> subdir, for computations on NVIDIA GPU
     </dd><dt class="description">
     <!--l. 11--><p class="noindent" >
 <span 
-class="cmbx-10">LIBRSB</span> </dt><dd 
+class="pplb7t-">LIBRSB</span> </dt><dd 
 class="description">
     <!--l. 11--><p class="noindent" ><a 
 href="http://sourceforge.net/projects/librsb/" class="url" ><span 
@ -42,14 +42,14 @@ class="cmtt-10">http://sourceforge.net/projects/librsb/</span></a>, for computat
     multicore parallel machines.</dd></dl>
 <!--l. 14--><p class="noindent" >The infrastructure laid out in the base library to allow for these extensions is detailed in
 the references&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XDesPat:11">20</a>,&#x00A0;<a 
+href="userhtmlli2.html#XDesPat:11">21</a>,&#x00A0;<a 
-href="userhtmlli2.html#XCaFiRo:2014">21</a>,&#x00A0;<a 
+href="userhtmlli2.html#XCaFiRo:2014">22</a>,&#x00A0;<a 
-href="userhtmlli2.html#XSparse03">10</a>]</span>; the CUDA-specific data formats are described
+href="userhtmlli2.html#XSparse03">11</a>]</span>; the CUDA-specific data formats are described
 in&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XOurTechRep">22</a>]</span>.
+href="userhtmlli2.html#XOurTechRep">23</a>]</span>.
 <!--l. 19--><p class="noindent" >
   <h4 class="subsectionHead"><span class="titlemark">12.1    </span> <a 
- id="x19-14600012.1"></a>Using the extensions</h4>
+ id="x19-14700012.1"></a>Using the extensions</h4>
 <!--l. 21--><p class="noindent" >A sample application using the PSBLAS extensions will contain the following
 steps:
     <ul class="itemize1">
@ -61,15 +61,15 @@ class="cmtt-10">psb_cuda_mod</span></span></span>);
     </li>
     <li class="itemize">
     <!--l. 26--><p class="noindent" >Declare      a      <span 
-class="cmti-10">mold     </span>variable     of     the     necessary     type     (e.g.
+class="pplri7t-">mold      </span>variable      of      the      necessary      type      (e.g.
     <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_d_ell_sparse_mat</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_d_hlg_sparse_mat</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_d_vect_cuda</span></span></span>);
     </li>
     <li class="itemize">
-     <!--l. 29--><p class="noindent" >Pass the mold variable to the base library interface where needed to ensure
+     <!--l. 29--><p class="noindent" >Pass  the  mold  variable  to  the  base  library  interface  where  needed  to
-     the appropriate dynamic type.</li></ul>
+     ensure the appropriate dynamic type.</li></ul>
@ -142,125 +142,127 @@ speed of the sparse matrix-vector product with the various data structures inclu
 in the library.
 <!--l. 146--><p class="noindent" >
   <h4 class="subsectionHead"><span class="titlemark">12.2    </span> <a 
- id="x19-14700012.2"></a>Extensions&#8217; Data Structures</h4>
+ id="x19-14800012.2"></a>Extensions&#8217; Data Structures</h4>
 <!--l. 150--><p class="noindent" >Access to the facilities provided by the EXT library is mainly achieved through
 the data types that are provided within. The data classes are derived from
 the base classes in PSBLAS, through the Fortran&#x00A0;2003 mechanism of <span 
-class="cmti-10">type</span>
+class="pplri7t-">type</span>
 <span 
-class="cmti-10">extension</span>&#x00A0;<span class="cite">[<a 
+class="pplri7t-">extension</span>&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XMRC:11">17</a>]</span>.
+href="userhtmlli2.html#XMRC:11">18</a>]</span>.
 <!--l. 155--><p class="indent" >   The data classes are divided between the general purpose CPU extensions, the
 GPU interfaces and the RSB interfaces. In the description we will make use of the
 notation introduced in Table&#x00A0;<a 
-href="#x19-147001r21">21<!--tex4ht:ref: tab:notation --></a>.
+href="#x19-148001r22">22<!--tex4ht:ref: tab:notation --></a>.
   <div class="table">
 <!--l. 160--><p class="indent" >   <a 
- id="x19-147001r21"></a><hr class="float"><div class="float" 
+ id="x19-148001r22"></a><hr class="float"><div class="float" 
 >
 <div class="caption" 
-><span class="id">Table&#x00A0;21: </span><span  
+><span class="id">Table&#x00A0;22: </span><span  
-class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-147001r21 -->
+class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-148001r22 -->
 <div class="center" 
 >
 <!--l. 162--><p class="noindent" >
-<div class="tabular"> <table id="TBL-23" class="tabular" 
+<div class="tabular"> <table id="TBL-25" class="tabular" 
-><colgroup id="TBL-23-1g"><col 
+><colgroup id="TBL-25-1g"><col 
-id="TBL-23-1"><col 
+id="TBL-25-1"><col 
-id="TBL-23-2"></colgroup><tr 
+id="TBL-25-2"></colgroup><tr 
 class="hline"><td><hr></td><td><hr></td></tr><tr  
- style="vertical-align:baseline;" id="TBL-23-1-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-1-1"  
+ style="vertical-align:baseline;" id="TBL-25-1-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-1-1"  
 class="td11"><span 
-class="cmr-8">Name     </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-1-2"  
+class="pplr7t-x-x-80">Name      </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-1-2"  
 class="td11"><span 
-class="cmr-8">Description                                    </span></td>
+class="pplr7t-x-x-80">Description                                                 </span></td>
 </tr><tr 
 class="hline"><td><hr></td><td><hr></td></tr><tr  
- style="vertical-align:baseline;" id="TBL-23-2-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-2-1"  
+ style="vertical-align:baseline;" id="TBL-25-2-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-2-1"  
 class="td11"><span 
-class="cmr-8">M           </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-2-2"  
+class="pplr7t-x-x-80">M              </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-2-2"  
 class="td11"><span 
-class="cmr-8">Number of rows in matrix                 </span></td></tr><tr  
+class="pplr7t-x-x-80">Number of rows in matrix                        </span></td></tr><tr  
- style="vertical-align:baseline;" id="TBL-23-3-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-3-1"  
+ style="vertical-align:baseline;" id="TBL-25-3-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-3-1"  
 class="td
 11"><span 
 class="pplr7t-x-x-80">N               </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-3-2"  
 class="td11"><span 
-class="cmr-8">N </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-3-2"  
+class="pplr7t-x-x-80">Number of columns in matrix                 </span></td></tr><tr  
 style="vertical-align:baseline;" id="TBL-25-4-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-4-1"  
 class="td
 11"><span 
 class="pplr7t-x-x-80">NZ            </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-4-2"  
 class="td11"><span 
-class="cmr-8">Number of columns in matrix</span></td>
+class="pplr7t-x-x-80">Number of nonzeros in matrix                </span></td></tr><tr  
-</tr><tr  
+ style="vertical-align:baseline;" id="TBL-25-5-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-5-1"  
- style="vertical-align:baseline;" id="TBL-23-4-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-4-1"  
+class="td
-class="td11"><span 
+11"><span 
-class="cmr-8">NZ         </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-4-2"  
+class="pplr7t-x-x-80">AVGNZR </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-5-2"  
 class="td11"><span 
 class="cmr-8">Number of nonzeros in matrix           </span></td></tr><tr  
 style="vertical-align:baseline;" id="TBL-23-5-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-5-1"  
 class="td11"><span 
-class="cmr-8">AVGNZR </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-5-2"  
+class="pplr7t-x-x-80">Average number of nonzeros per row    </span></td>
 class="td11"><span 
 class="cmr-8">Average number of nonzeros per row</span></td>
 </tr><tr  
- style="vertical-align:baseline;" id="TBL-23-6-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-6-1"  
+ style="vertical-align:baseline;" id="TBL-25-6-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-6-1"  
 class="td11"><span 
-class="cmr-8">MAXNZR</span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-6-2"  
+class="pplr7t-x-x-80">MAXNZR</span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-6-2"  
 class="td11"><span 
-class="cmr-8">Maximum number of nonzeros per row</span></td>
+class="pplr7t-x-x-80">Maximum number of nonzeros per row</span></td>
 </tr><tr  
- style="vertical-align:baseline;" id="TBL-23-7-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-7-1"  
+ style="vertical-align:baseline;" id="TBL-25-7-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-7-1"  
 class="td11"><span 
-class="cmr-8">NDIAG   </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-7-2"  
+class="pplr7t-x-x-80">NDIAG    </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-7-2"  
 class="td11"><span 
-class="cmr-8">Numero of nonzero diagonals             </span></td>
+class="pplr7t-x-x-80">Numero of nonzero diagonals                 </span></td>
 </tr><tr  
- style="vertical-align:baseline;" id="TBL-23-8-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-8-1"  
+ style="vertical-align:baseline;" id="TBL-25-8-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-8-1"  
 class="td11"><span 
-class="cmr-8">AS          </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-8-2"  
+class="pplr7t-x-x-80">AS             </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-8-2"  
 class="td11"><span 
-class="cmr-8">Coefficients array                            </span></td>
+class="pplr7t-x-x-80">Coefficients array                                       </span></td>
 </tr><tr  
- style="vertical-align:baseline;" id="TBL-23-9-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-9-1"  
+ style="vertical-align:baseline;" id="TBL-25-9-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-9-1"  
 class="td11"><span 
-class="cmr-8">IA          </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-9-2"  
+class="pplr7t-x-x-80">IA              </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-9-2"  
 class="td11"><span 
-class="cmr-8">Row indices array                           </span></td>
+class="pplr7t-x-x-80">Row indices array                                      </span></td>
 </tr><tr  
- style="vertical-align:baseline;" id="TBL-23-10-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-10-1"  
+ style="vertical-align:baseline;" id="TBL-25-10-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-10-1"  
 class="td11"><span 
-class="cmr-8">JA          </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-10-2"  
+class="pplr7t-x-x-80">JA              </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-10-2"  
 class="td11"><span 
-class="cmr-8">Column indices array                       </span></td>
+class="pplr7t-x-x-80">Column indices array                                </span></td>
 </tr><tr  
- style="vertical-align:baseline;" id="TBL-23-11-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-11-1"  
+ style="vertical-align:baseline;" id="TBL-25-11-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-11-1"  
 class="td11"><span 
-class="cmr-8">IRP        </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-11-2"  
+class="pplr7t-x-x-80">IRP            </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-11-2"  
 class="td11"><span 
-class="cmr-8">Row start pointers array                   </span></td>
+class="pplr7t-x-x-80">Row start pointers array                           </span></td>
 </tr><tr  
- style="vertical-align:baseline;" id="TBL-23-12-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-12-1"  
+ style="vertical-align:baseline;" id="TBL-25-12-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-12-1"  
 class="td11"><span 
-class="cmr-8">JCP        </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-12-2"  
+class="pplr7t-x-x-80">JCP            </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-12-2"  
 class="td11"><span 
-class="cmr-8">Column start pointers array              </span></td>
+class="pplr7t-x-x-80">Column start pointers array                     </span></td>
 </tr><tr  
- style="vertical-align:baseline;" id="TBL-23-13-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-13-1"  
+ style="vertical-align:baseline;" id="TBL-25-13-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-13-1"  
 class="td11"><span 
-class="cmr-8">NZR       </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-13-2"  
+class="pplr7t-x-x-80">NZR          </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-13-2"  
 class="td11"><span 
-class="cmr-8">Number of nonzeros per row array      </span></td>
+class="pplr7t-x-x-80">Number of nonzeros per row array        </span></td>
 </tr><tr  
- style="vertical-align:baseline;" id="TBL-23-14-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-14-1"  
+ style="vertical-align:baseline;" id="TBL-25-14-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-14-1"  
 class="td11"><span 
-class="cmr-8">OFFSET </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-14-2"  
+class="pplr7t-x-x-80">OFFSET    </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-25-14-2"  
 class="td11"><span 
-class="cmr-8">Offset for diagonals                         </span></td>
+class="pplr7t-x-x-80">Offset for diagonals                                   </span></td>
 </tr><tr 
 class="hline"><td><hr></td><td><hr></td></tr><tr  
- style="vertical-align:baseline;" id="TBL-23-15-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-15-1"  
+ style="vertical-align:baseline;" id="TBL-25-15-"><td  style="white-space:nowrap; text-align:left;" id="TBL-25-15-1"  
 class="td11">         </td></tr></table>                                                         </div>
 </div>
@ -274,7 +276,7 @@ class="td11">         </td></tr></table>
 <a 
- id="x19-147002r5"></a>
+ id="x19-148002r5"></a>
@ -283,18 +285,18 @@ src="mat.png" alt="PIC"
 width="147" height="147" >
 <br /> <div class="caption" 
 ><span class="id">Figure&#x00A0;5: </span><span  
-class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-147002r5 -->
+class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-148002r5 -->
 <!--l. 198--><p class="indent" >   </div><hr class="endfigure">
   <h4 class="subsectionHead"><span class="titlemark">12.3    </span> <a 
- id="x19-14800012.3"></a>CPU-class extensions</h4>
+ id="x19-14900012.3"></a>CPU-class extensions</h4>
 <!--l. 203--><p class="noindent" >
   <h5 class="likesubsubsectionHead"><a 
- id="x19-149000"></a>ELLPACK</h5>
+ id="x19-150000"></a>ELLPACK</h5>
 <!--l. 205--><p class="noindent" >The ELLPACK/ITPACK format (shown in Figure&#x00A0;<a 
-href="#x19-149001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional
+href="#x19-150001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional
 arrays <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">JA</span></span></span> with <span class="obeylines-h"><span class="verb"><span 
@ -302,7 +304,7 @@ class="cmtt-10">M</span></span></span> rows and <span class="obeylines-h"><span
 class="cmtt-10">MAXNZR</span></span></span> columns, where <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">MAXNZR</span></span></span> is the maximum
 number of nonzeros in any row&#x00A0;<span class="cite">[<span 
-class="cmbx-10">?</span>]</span>. Each row of the arrays <span class="obeylines-h"><span class="verb"><span 
+class="pplb7t-">?</span>]</span>. Each row of the arrays <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">JA</span></span></span> contains the
 coefficients and column indices; rows shorter than <span class="obeylines-h"><span class="verb"><span 
@ -315,7 +317,7 @@ row.
 <a 
- id="x19-149001r6"></a>
+ id="x19-150001r6"></a>
@ -325,13 +327,13 @@ width="233" height="233" >
 <br /> <div class="caption" 
 ><span class="id">Figure&#x00A0;6: </span><span  
 class="content">ELLPACK compression of matrix in Figure&#x00A0;<a 
-href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-149001r6 -->
+href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-150001r6 -->
 <!--l. 225--><p class="indent" >   </div><hr class="endfigure">
 <a 
- id="x19-149002r1"></a>
+ id="x19-150002r1"></a>
@ -341,9 +343,8 @@ href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
   <!--l. 231-->
-   <pre class="lstlisting" id="listing-169"><span class="label"><a 
+   <pre class="lstlisting" id="listing-220"><span class="label"><a 
- id="x19-149003r1"></a></span><span 
+ id="x19-150003r1"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
@ -352,8 +353,7 @@ class="cmtt-9">i</span></span><span style="color:#000000"><span
 class="cmtt-9">=1,</span></span><span style="color:#000000"><span 
 class="cmtt-9">n</span></span> 
 <span class="label"><a 
- id="x19-149004r2"></a></span><span 
+ id="x19-150004r2"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
@ -362,8 +362,7 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
 class="cmtt-9">t</span></span><span style="color:#000000"><span 
 class="cmtt-9">=0</span></span> 
 <span class="label"><a 
- id="x19-149005r3"></a></span><span 
+ id="x19-150005r3"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
@ -374,8 +373,7 @@ class="cmtt-9">j</span></span><span style="color:#000000"><span
 class="cmtt-9">=1,</span></span><span style="color:#000000"><span 
 class="cmtt-9">maxnzr</span></span> 
 <span class="label"><a 
- id="x19-149006r4"></a></span><span 
+ id="x19-150006r4"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
@ -403,8 +401,7 @@ class="cmtt-9">,</span></span><span style="color:#000000"><span
 class="cmtt-9">j</span></span><span style="color:#000000"><span 
 class="cmtt-9">))</span></span> 
 <span class="label"><a 
- id="x19-149007r5"></a></span><span 
+ id="x19-150007r5"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
@ -413,8 +410,7 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
 class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
 class="cmtt-9">do</span></span> 
 <span class="label"><a 
- id="x19-149008r6"></a></span><span 
+ id="x19-150008r6"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
@ -427,8 +423,7 @@ class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="c
 class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
 class="cmtt-9">t</span></span> 
 <span class="label"><a 
- id="x19-149009r7"></a></span><span 
+ id="x19-150009r7"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
@ -436,35 +431,36 @@ class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style=
 class="cmtt-9">do</span></span></pre>
 <a 
- id="x19-149010r1"></a>
+ id="x19-150010r1"></a>
 <a 
- id="x19-149011"></a>
+ id="x19-150011"></a>
         <span 
-class="cmbx-10">Algorithm</span><span 
+class="pplb7t-">Algorithm</span><span 
-class="cmbx-10">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format
+class="pplb7t-">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format
   </div><hr class="endfloat" />
 <!--l. 242--><p class="indent" >   The matrix-vector product <span 
-class="cmmi-10">y </span>= <span 
+class="zplmr7m-">y </span><span 
-class="cmmi-10">Ax </span>can be computed with the code shown in
+class="zplmr7t-">= </span><span 
 class="zplmr7m-">Ax </span>can be computed with the code shown in
 Alg.&#x00A0;<a 
-href="#x19-149010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and
+href="#x19-150010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and
 two floating-point operations per inner iteration.
-<!--l. 247--><p class="indent" >   Unless all rows have exactly the same number of nonzeros, some of the coefficients
+<!--l. 247--><p class="indent" >   Unless all rows have exactly the same number of nonzeros, some of the
-in the <span class="obeylines-h"><span class="verb"><span 
+coefficients in the <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">AS</span></span></span> array will be zeros; therefore this data structure will have an overhead both
+class="cmtt-10">AS</span></span></span> array will be zeros; therefore this data structure will have an
-in terms of memory space and redundant operations (multiplications by zero). The
+overhead both in terms of memory space and redundant operations (multiplications
-overhead can be acceptable if:
+by zero). The overhead can be acceptable if:
     <ol  class="enumerate1" >
 <li 
-  class="enumerate" id="x19-149013x1">
+  class="enumerate" id="x19-150013x1">
     <!--l. 253--><p class="noindent" >The maximum number of nonzeros per row is not much larger than the
     average;
     </li>
 <li 
-  class="enumerate" id="x19-149015x2">
+  class="enumerate" id="x19-150015x2">
     <!--l. 255--><p class="noindent" >The regularity of the data structure allows for faster code, e.g. by allowing
     vectorization, thereby offsetting the additional storage requirements.</li></ol>
 <!--l. 259--><p class="noindent" >In the extreme case where the input matrix has one full row, the ELLPACK
@ -492,62 +488,72 @@ class="cmtt-10">psb_T_ell_sparse_mat</span></span></span>:
 </pre>
 <!--l. 295--><p class="nopar" >                                                           </div></div>
   <h5 class="likesubsubsectionHead"><a 
- id="x19-150000"></a>Hacked ELLPACK</h5>
+ id="x19-151000"></a>Hacked ELLPACK</h5>
 <!--l. 303--><p class="noindent" >The <span 
 class="cmti-10">hacked ELLPACK </span>(<span 
 class="cmbx-10">HLL</span>) format alleviates the main problem of the ELLPACK
 format, that is, the amount of memory required by padding for sparse matrices in
 which the maximum row length is larger than the average.
 <!--l. 303--><p class="noindent" >The <span 
 class="pplri7t-">hacked ELLPACK </span>(<span 
 class="pplb7t-">HLL</span>) format alleviates the main problem of the ELLPACK
 format, that is, the amount of memory required by padding for sparse matrices in
 which the maximum row length is larger than the average.
 <!--l. 308--><p class="indent" >   The number of elements allocated to padding is
-[(<span 
+<span 
-class="cmmi-10">m</span><span 
+class="zplmr7t-">[(</span><span 
-class="cmsy-10">*</span><span 
+class="zplmr7m-">m</span><span 
-class="cmmi-10">maxNR</span>) <span 
+class="zplmr7y-">*</span><span 
-class="cmsy-10">- </span>(<span 
+class="zplmr7m-">maxNR</span><span 
-class="cmmi-10">m</span><span 
+class="zplmr7t-">) </span><span 
-class="cmsy-10">*</span><span 
+class="zplmr7y-">- </span><span 
-class="cmmi-10">avgNR</span>) = <span 
+class="zplmr7t-">(</span><span 
-class="cmmi-10">m</span><span 
+class="zplmr7m-">m</span><span 
-class="cmsy-10">* </span>(<span 
+class="zplmr7y-">*</span><span 
-class="cmmi-10">maxNR</span><span 
+class="zplmr7m-">avgNR</span><span 
-class="cmsy-10">-</span><span 
+class="zplmr7t-">) = </span><span 
-class="cmmi-10">avgNR</span>)] for both <span class="obeylines-h"><span class="verb"><span 
+class="zplmr7m-">m</span><span 
 class="zplmr7y-">* </span><span 
 class="zplmr7t-">(</span><span 
 class="zplmr7m-">maxNR</span><span 
 class="zplmr7y-">-</span><span 
 class="zplmr7m-">avgNR</span><span 
 class="zplmr7t-">)] </span>for both <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">JA</span></span></span> arrays,
 where <span 
-class="cmmi-10">m </span>is equal to the number of rows of the matrix, <span 
+class="zplmr7m-">m </span>is equal to the number of rows of the matrix, <span 
-class="cmmi-10">maxNR </span>is the maximum
+class="zplmr7m-">maxNR </span>is the maximum
 number of nonzero elements in every row and <span 
-class="cmmi-10">avgNR </span>is the average number of
+class="zplmr7m-">avgNR </span>is the average number of
 nonzeros. Therefore a single densely populated row can seriously affect the total size
 of the allocation.
 <!--l. 317--><p class="indent" >   To limit this effect, in the HLL format we break the original matrix into equally
 sized groups of rows (called <span 
-class="cmti-10">hacks</span>), and then store these groups as independent
+class="pplri7t-">hacks</span>), and then store these groups as independent
 matrices in ELLPACK format. The groups can be arranged selecting rows in an
 arbitrarily manner; indeed, if the rows are sorted by decreasing number of nonzeros
 we obtain essentially the JAgged Diagonals format. If the rows are not in the original
 order, then an additional vector <span 
-class="cmti-10">rIdx </span>is required, storing the actual row index for
+class="pplri7t-">rIdx </span>is required, storing the actual row index for each
-each row in the data structure.
+row in the data structure.
 <!--l. 327--><p class="indent" >   The multiple ELLPACK-like buffers are stacked together inside a single, one
 dimensional array; an additional vector <span 
-class="cmti-10">hackOffsets </span>is provided to keep track of the
+class="pplri7t-">hackOffsets </span>is provided to keep track of the
 individual submatrices. All hacks have the same number of rows <span 
-class="cmti-10">hackSize</span>; hence, the
+class="pplri7t-">hackSize</span>; hence, the
 <span 
-class="cmti-10">hackOffsets </span>vector is an array of (<span 
+class="pplri7t-">hackOffsets </span>vector is an array of <span 
-class="cmmi-10">m&#x2215;hackSize</span>) + 1 elements, each one pointing to
+class="zplmr7t-">(</span><span 
-the first index of a submatrix inside the stacked <span 
+class="zplmr7m-">m</span><span 
-class="cmti-10">cM</span>/<span 
+class="zplmr7t-">/</span><span 
-class="cmti-10">rP </span>buffers, plus an additional
+class="zplmr7m-">hackSize</span><span 
 class="zplmr7t-">) + </span>1 elements, each one pointing to the
 first index of a submatrix inside the stacked <span 
 class="pplri7t-">cM</span>/<span 
 class="pplri7t-">rP </span>buffers, plus an additional
 element pointing past the end of the last block, where the next one would begin. We
 thus have the property that the elements of the <span 
-class="cmmi-10">k</span>-th <span 
+class="zplmr7m-">k</span>-th <span 
-class="cmti-10">hack </span>are stored between
+class="pplri7t-">hack </span>are stored between
 <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">hackOffsets[k]</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">hackOffsets[k+1]</span></span></span>, similarly to what happens in the CSR
@ -558,7 +564,7 @@ format.
 <a 
- id="x19-150001r7"></a>
+ id="x19-151001r7"></a>
@ -568,7 +574,7 @@ width="248" height="248" >
 <br /> <div class="caption" 
 ><span class="id">Figure&#x00A0;7: </span><span  
 class="content">Hacked ELLPACK compression of matrix in Figure&#x00A0;<a 
-href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-150001r7 -->
+href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-151001r7 -->
@ -595,9 +601,9 @@ class="cmtt-10">psb_T_hll_sparse_mat</span></span></span>:
 </pre>
 <!--l. 388--><p class="nopar" >                                                           </div></div>
   <h5 class="likesubsubsectionHead"><a 
- id="x19-151000"></a>Diagonal storage</h5>
+ id="x19-152000"></a>Diagonal storage</h5>
 <!--l. 396--><p class="noindent" >The DIAgonal (DIA) format (shown in Figure&#x00A0;<a 
-href="#x19-151001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span 
+href="#x19-152001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">AS</span></span></span>
 containing in each column the coefficients along a diagonal of the matrix, and an
 integer array <span class="obeylines-h"><span class="verb"><span 
@ -605,9 +611,10 @@ class="cmtt-10">OFFSET</span></span></span> that determines where each diagonal
 class="cmtt-10">AS</span></span></span>
 are padded with zeros as necessary.
 <!--l. 402--><p class="indent" >   The code to compute the matrix-vector product <span 
-class="cmmi-10">y </span>= <span 
+class="zplmr7m-">y </span><span 
-class="cmmi-10">Ax </span>is shown in Alg.&#x00A0;<a 
+class="zplmr7t-">= </span><span 
-href="#x19-151003r2">2<!--tex4ht:ref: alg:dia --></a>; it
+class="zplmr7m-">Ax </span>is shown in Alg.&#x00A0;<a 
 href="#x19-152003r2">2<!--tex4ht:ref: alg:dia --></a>; it
 costs one memory read per outer iteration, plus three memory reads, one memory
 write and two floating-point operations per inner iteration. The accesses to
 <span class="obeylines-h"><span class="verb"><span 
@ -620,7 +627,7 @@ required.
 <a 
- id="x19-151001r8"></a>
+ id="x19-152001r8"></a>
@ -630,13 +637,13 @@ width="248" height="248" >
 <br /> <div class="caption" 
 ><span class="id">Figure&#x00A0;8: </span><span  
 class="content">DIA compression of matrix in Figure&#x00A0;<a 
-href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-151001r8 -->
+href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-152001r8 -->
 <!--l. 419--><p class="indent" >   </div><hr class="endfigure">
 <a 
- id="x19-151002r2"></a>
+ id="x19-152002r2"></a>
@ -662,12 +669,12 @@ href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
 </pre>
 <!--l. 450--><p class="nopar" >                                                           </div></div>
   <a 
- id="x19-151003r2"></a>
+ id="x19-152003r2"></a>
   <a 
- id="x19-151004"></a>
+ id="x19-152004"></a>
         <span 
-class="cmbx-10">Algorithm</span><span 
+class="pplb7t-">Algorithm</span><span 
-class="cmbx-10">&#x00A0;2:</span>&#x00A0; Matrix-Vector product in DIA format
+class="pplb7t-">&#x00A0;2:</span>&#x00A0; Matrix-Vector product in DIA format
@ -691,7 +698,7 @@ class="cmtt-10">psb_T_dia_sparse_mat</span></span></span>:
 </pre>
 <!--l. 486--><p class="nopar" >                                                           </div></div>
   <h5 class="likesubsubsectionHead"><a 
- id="x19-152000"></a>Hacked DIA</h5>
+ id="x19-153000"></a>Hacked DIA</h5>
 <!--l. 495--><p class="noindent" >Storage by DIAgonals is an attractive option for matrices whose coefficients are
 located on a small set of diagonals, since they do away with storing explicitly the
 indices and therefore reduce significantly memory traffic. However, having a few
@ -705,40 +712,44 @@ class="cmtt-10">y</span></span></span> is too large to remain
 in the cache memory, the associated cache miss penalty is paid multiple
 times.
 <!--l. 507--><p class="indent" >   The <span 
-class="cmti-10">hacked DIA </span>(<span 
+class="pplri7t-">hacked DIA </span>(<span 
-class="cmbx-10">HDIA</span>) format was designed to contain the amount of padding,
+class="pplb7t-">HDIA</span>) format was designed to contain the amount of padding,
 by breaking the original matrix into equally sized groups of rows (<span 
-class="cmti-10">hacks</span>), and then
+class="pplri7t-">hacks</span>), and then
-storing these groups as independent matrices in DIA format. This approach is similar
+storing these groups as independent matrices in DIA format. This approach is
-to that of HLL, and requires using an offset vector for each submatrix. Again,
+similar to that of HLL, and requires using an offset vector for each submatrix. Again,
 similarly to HLL, the various submatrices are stacked inside a linear array to
 improve memory management. The fact that the matrix is accessed in slices
 helps in reducing cache misses, especially regarding accesses to the vector
 <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">y</span></span></span>.
 <!--l. 519--><p class="indent" >   An additional vector <span 
-class="cmti-10">hackOffsets </span>is provided to complete the matrix format; given
+class="pplri7t-">hackOffsets </span>is provided to complete the matrix format; given
 that <span 
-class="cmti-10">hackSize </span>is the number of rows of each hack, the <span 
+class="pplri7t-">hackSize </span>is the number of rows of each hack, the <span 
-class="cmti-10">hackOffsets </span>vector is made by
+class="pplri7t-">hackOffsets </span>vector is made by an
-an array of (<span 
+array of <span 
-class="cmmi-10">m&#x2215;hackSize</span>) + 1 elements, pointing to the first diagonal offset of a
+class="zplmr7t-">(</span><span 
 class="zplmr7m-">m</span><span 
 class="zplmr7t-">/</span><span 
 class="zplmr7m-">hackSize</span><span 
 class="zplmr7t-">) + </span>1 elements, pointing to the first diagonal offset of a
 submatrix inside the stacked <span 
-class="cmti-10">offsets </span>buffers, plus an additional element equal to the
+class="pplri7t-">offsets </span>buffers, plus an additional element equal to the
-number of nonzero diagonals in the whole matrix. We thus have the property that
+number of nonzero diagonals in the whole matrix. We thus have the property
-the number of diagonals of the <span 
+that the number of diagonals of the <span 
-class="cmmi-10">k</span>-th <span 
+class="zplmr7m-">k</span>-th <span 
-class="cmti-10">hack </span>is given by <span 
+class="pplri7t-">hack </span>is given by <span 
-class="cmti-10">hackOffsets[k+1] -</span>
+class="pplri7t-">hackOffsets[k+1] -</span>
 <span 
-class="cmti-10">hackOffsets[k]</span>.
+class="pplri7t-">hackOffsets[k]</span>.
 <!--l. 529--><p class="indent" >   <hr class="figure"><div class="figure" 
 >
 <a 
- id="x19-152001r9"></a>
+ id="x19-153001r9"></a>
@ -748,7 +759,7 @@ width="248" height="248" >
 <br /> <div class="caption" 
 ><span class="id">Figure&#x00A0;9: </span><span  
 class="content">Hacked DIA compression of matrix in Figure&#x00A0;<a 
-href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-152001r9 -->
+href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-153001r9 -->
@ -793,7 +804,7 @@ class="cmtt-10">psb_T_hdia_sparse_mat</span></span></span>:
   <h4 class="subsectionHead"><span class="titlemark">12.4    </span> <a 
- id="x19-15300012.4"></a>CUDA-class extensions</h4>
+ id="x19-15400012.4"></a>CUDA-class extensions</h4>
 <!--l. 4--><p class="noindent" >For computing with CUDA we define a dual memorization strategy in which each
 variable on the CPU (&#8220;host&#8221;) side has a GPU (&#8220;device&#8221;) side. When a GPU-type
 variable is initialized, the data contained is (usually) the same on both sides. Each
@ -801,10 +812,10 @@ operator invoked on the variable may change the data so that only the host side
 the device side are up-to-date.
 <!--l. 11--><p class="indent" >   Keeping track of the updates to data in the variables is essential: we want to
 perform most computations on the GPU, but we cannot afford the time needed to
-move data between the host memory and the device memory because the bandwidth
+move data between the host memory and the device memory because the
-of the interconnection bus would become the main bottleneck of the computation.
+bandwidth of the interconnection bus would become the main bottleneck of the
-Thus, each and every computational routine in the library is built according to the
+computation. Thus, each and every computational routine in the library is built
-following principles:
+according to the following principles:
     <ul class="itemize1">
     <li class="itemize">
     <!--l. 18--><p class="noindent" >If the data type being handled is GPU-enabled, make sure that its device
@ -818,20 +829,20 @@ following principles:
         <dl class="description"><dt class="description">
         <!--l. 25--><p class="noindent" >
     <span 
-class="cmbx-10">explicitly</span> </dt><dd 
+class="pplb7t-">explicitly</span> </dt><dd 
 class="description">
         <!--l. 25--><p class="noindent" >by invoking a synchronization method;
         </dd><dt class="description">
         <!--l. 26--><p class="noindent" >
     <span 
-class="cmbx-10">implicitly</span> </dt><dd 
+class="pplb7t-">implicitly</span> </dt><dd 
 class="description">
         <!--l. 26--><p class="noindent" >by invoking a method that involves other data items that are not
         GPU-enabled, e.g., by assignment ov a vector to a normal array.</dd></dl>
     </li></ul>
 <!--l. 31--><p class="noindent" >In this way, data items are put on the GPU memory &#8220;on demand&#8221; and remain there as
-long as &#8220;normal&#8221; computations are carried out. As an example, the following call to a
+long as &#8220;normal&#8221; computations are carried out. As an example, the following call to
-matrix-vector product
+a matrix-vector product
 <div class="center" 
 >
 <!--l. 39--><p class="noindent" >
@ -850,11 +861,11 @@ then
     <!--l. 52--><p class="noindent" >The first kernel invocation will find the data in main memory, and will
     copy it to the GPU memory, thus incurring a significant overhead; the
     result is however <span 
-class="cmti-10">not </span>copied back, and therefore:
+class="pplri7t-">not </span>copied back, and therefore:
     </li>
     <li class="itemize">
-     <!--l. 56--><p class="noindent" >Subsequent kernel invocations involving the same vector will find the data
+     <!--l. 56--><p class="noindent" >Subsequent  kernel  invocations  involving  the  same  vector  will  find  the
-     on the GPU side so that they will run at full speed.</li></ul>
+     data on the GPU side so that they will run at full speed.</li></ul>
 <!--l. 60--><p class="noindent" >For all invocations after the first the only data that will have to be transferred to/from
 the main memory will be the scalars <code class="lstinline"><span style="color:#000000">alpha</span></code> and <code class="lstinline"><span style="color:#000000">beta</span></code>, and the return code
 <code class="lstinline"><span style="color:#000000">info</span></code>.
@ -862,7 +873,7 @@ the main memory will be the scalars <code class="lstinline"><span style="color:#
     <dl class="description"><dt class="description">
     <!--l. 65--><p class="noindent" >
 <span 
-class="cmbx-10">Vectors:</span> </dt><dd 
+class="pplb7t-">Vectors:</span> </dt><dd 
 class="description">
     <!--l. 65--><p class="noindent" >The  data  type  <code class="lstinline"><span style="color:#000000">psb_T_vect_gpu</span></code> provides  a  GPU-enabled  extension  of
     the inner type <code class="lstinline"><span style="color:#000000">psb_T_base_vect_type</span></code>, and must be used together with
@ -871,14 +882,14 @@ class="description">
     </dd><dt class="description">
     <!--l. 69--><p class="noindent" >
 <span 
-class="cmbx-10">CSR:</span> </dt><dd 
+class="pplb7t-">CSR:</span> </dt><dd 
 class="description">
     <!--l. 69--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_csrg_sparse_mat</span></code> provides an interface to the GPU
     version of CSR available in the NVIDIA CuSPARSE library;
     </dd><dt class="description">
     <!--l. 72--><p class="noindent" >
 <span 
-class="cmbx-10">HYB:</span> </dt><dd 
+class="pplb7t-">HYB:</span> </dt><dd 
 class="description">
     <!--l. 72--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hybg_sparse_mat</span></code> provides an interface to the HYB
     GPU  storage  available  in  the  NVIDIA  CuSPARSE  library.  The  internal
@ -887,7 +898,7 @@ class="description">
     </dd><dt class="description">
     <!--l. 77--><p class="noindent" >
 <span 
-class="cmbx-10">ELL:</span> </dt><dd 
+class="pplb7t-">ELL:</span> </dt><dd 
 class="description">
     <!--l. 77--><p class="noindent" >The  data  type  <code class="lstinline"><span style="color:#000000">psb_T_elg_sparse_mat</span></code> provides  an  interface  to  the
     ELLPACK implementation from SPGPU;
@ -897,14 +908,14 @@ class="description">
     </dd><dt class="description">
     <!--l. 80--><p class="noindent" >
 <span 
-class="cmbx-10">HLL:</span> </dt><dd 
+class="pplb7t-">HLL:</span> </dt><dd 
 class="description">
-     <!--l. 80--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hlg_sparse_mat</span></code> provides an interface to the Hacked
+     <!--l. 80--><p class="noindent" >The  data  type  <code class="lstinline"><span style="color:#000000">psb_T_hlg_sparse_mat</span></code> provides  an  interface  to  the
-     ELLPACK implementation from SPGPU;
+     Hacked ELLPACK implementation from SPGPU;
     </dd><dt class="description">
     <!--l. 82--><p class="noindent" >
 <span 
-class="cmbx-10">HDIA:</span> </dt><dd 
+class="pplb7t-">HDIA:</span> </dt><dd 
 class="description">
     <!--l. 82--><p class="noindent" >The  data  type  <code class="lstinline"><span style="color:#000000">psb_T_hdiag_sparse_mat</span></code> provides  an  interface  to  the
     Hacked DIAgonals implementation from SPGPU;</dd></dl>
--- a/docs/html/userhtmlse13.html
+++ b/docs/html/userhtmlse13.html
@ -16,12 +16,12 @@ href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail</a>] [<a
 href="userhtmlse10.html#tailuserhtmlse13.html">tail</a>] [<a 
 href="userhtml.html# " >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">13    </span> <a 
- id="x20-15400013"></a>CUDA Environment Routines</h3>
+ id="x20-15500013"></a>CUDA Environment Routines</h3>
 <!--l. 91--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-155000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4>
+ id="x20-156000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4>
 <a 
- id="Q1-20-192"></a>
+ id="Q1-20-194"></a>
 <div class="center" 
 >
 <!--l. 99--><p class="noindent" >
@ -33,44 +33,44 @@ call&#x00A0;psb_cuda_init(ctxt&#x00A0;[,&#x00A0;device])
     <dl class="description"><dt class="description">
     <!--l. 110--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 110--><p class="noindent" >Synchronous.
     </dd><dt class="description">
     <!--l. 111--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span>  </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 111--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 112--><p class="noindent" >
 <span 
-class="cmbx-10">device</span> </dt><dd 
+class="pplb7t-">device</span> </dt><dd 
 class="description">
     <!--l. 112--><p class="noindent" >ID of CUDA device to attach to.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local</span>.<br 
+class="pplb7t-">local</span>.<br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: an integer value. &#x00A0;Default: use <code class="lstinline"><span style="color:#000000">mod</span><span style="color:#000000">(</span><span style="color:#000000">iam</span><span style="color:#000000">,</span><span style="color:#000000">ngpu</span><span style="color:#000000">)</span></code> where <code class="lstinline"><span style="color:#000000">iam</span></code> is
     the calling process index and <code class="lstinline"><span style="color:#000000">ngpu</span></code> is the total number of CUDA devices
     available on the current node.</dd></dl>
 <!--l. 123--><p class="noindent" ><span 
-class="cmbx-12">Notes</span>
+class="pplb7t-x-x-120">Notes</span>
     <ol  class="enumerate1" >
 <li 
-  class="enumerate" id="x20-155002x1">
+  class="enumerate" id="x20-156002x1">
     <!--l. 125--><p class="noindent" >A call to this routine must precede any other PSBLAS-CUDA call.</li></ol>
 <!--l. 129--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-156000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4>
+ id="x20-157000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4>
 <a 
- id="Q1-20-194"></a>
+ id="Q1-20-196"></a>
 <div class="center" 
 >
 <!--l. 137--><p class="noindent" >
@ -82,33 +82,33 @@ call&#x00A0;psb_cuda_exit(ctxt)
     <dl class="description"><dt class="description">
     <!--l. 148--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 148--><p class="noindent" >Synchronous.
     </dd><dt class="description">
     <!--l. 149--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span>  </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 149--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 150--><p class="noindent" >
 <span 
-class="cmbx-10">ctxt</span> </dt><dd 
+class="pplb7t-">ctxt</span> </dt><dd 
 class="description">
     <!--l. 150--><p class="noindent" >the communication context identifying the virtual parallel machine.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global</span>.<br 
+class="pplb7t-">global</span>.<br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span>.<br 
+class="pplb7t-">required</span>.<br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: an integer variable.</dd></dl>
 <!--l. 161--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-157000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4>
+ id="x20-158000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4>
 <a 
- id="Q1-20-196"></a>
+ id="Q1-20-198"></a>
@ -123,9 +123,9 @@ call&#x00A0;psb_cuda_DeviceSync()
 CUDA-side code, have completed.
 <!--l. 182--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-158000"></a>psb_cuda_getDeviceCount </h4>
+ id="x20-159000"></a>psb_cuda_getDeviceCount </h4>
 <a 
- id="Q1-20-198"></a>
+ id="Q1-20-200"></a>
 <div class="center" 
 >
 <!--l. 190--><p class="noindent" >
@ -136,9 +136,9 @@ ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDeviceCount()
 <!--l. 199--><p class="noindent" >Get number of devices available on current computing node.
 <!--l. 201--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-159000"></a>psb_cuda_getDevice </h4>
+ id="x20-160000"></a>psb_cuda_getDevice </h4>
 <a 
- id="Q1-20-200"></a>
+ id="Q1-20-202"></a>
 <div class="center" 
 >
 <!--l. 209--><p class="noindent" >
@ -147,14 +147,14 @@ ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice()
 </pre>
 <!--l. 213--><p class="nopar" >                                                           </div></div>
 <!--l. 218--><p class="noindent" >Get device in use by current process.
 <!--l. 220--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
 id="x20-160000"></a>psb_cuda_setDevice </h4>
 <a 
 id="Q1-20-202"></a>
 <!--l. 220--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
 id="x20-161000"></a>psb_cuda_setDevice </h4>
 <a 
 id="Q1-20-204"></a>
 <div class="center" 
 >
 <!--l. 228--><p class="noindent" >
@ -165,9 +165,9 @@ info&#x00A0;=&#x00A0;psb_cuda_setDevice(dev)
 <!--l. 237--><p class="noindent" >Set device to be used by current process.
 <!--l. 239--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-161000"></a>psb_cuda_DeviceHasUVA </h4>
+ id="x20-162000"></a>psb_cuda_DeviceHasUVA </h4>
 <a 
- id="Q1-20-204"></a>
+ id="Q1-20-206"></a>
 <div class="center" 
 >
 <!--l. 247--><p class="noindent" >
@ -178,9 +178,9 @@ hasUva&#x00A0;=&#x00A0;psb_cuda_DeviceHasUVA()
 <!--l. 256--><p class="noindent" >Returns true if device currently in use supports UVA (Unified Virtual Addressing).
 <!--l. 259--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-162000"></a>psb_cuda_WarpSize </h4>
+ id="x20-163000"></a>psb_cuda_WarpSize </h4>
 <a 
- id="Q1-20-206"></a>
+ id="Q1-20-208"></a>
 <div class="center" 
 >
 <!--l. 267--><p class="noindent" >
@ -189,14 +189,14 @@ nw&#x00A0;=&#x00A0;psb_cuda_WarpSize()
 </pre>
 <!--l. 271--><p class="nopar" >                                                           </div></div>
 <!--l. 276--><p class="noindent" >Returns the warp size.
 <!--l. 279--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
 id="x20-163000"></a>psb_cuda_MultiProcessors </h4>
 <a 
 id="Q1-20-208"></a>
 <!--l. 279--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
 id="x20-164000"></a>psb_cuda_MultiProcessors </h4>
 <a 
 id="Q1-20-210"></a>
 <div class="center" 
 >
 <!--l. 287--><p class="noindent" >
@ -207,9 +207,9 @@ nmp&#x00A0;=&#x00A0;psb_cuda_MultiProcessors()
 <!--l. 296--><p class="noindent" >Returns the number of multiprocessors in the CUDA device.
 <!--l. 298--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-164000"></a>psb_cuda_MaxThreadsPerMP </h4>
+ id="x20-165000"></a>psb_cuda_MaxThreadsPerMP </h4>
 <a 
- id="Q1-20-210"></a>
+ id="Q1-20-212"></a>
 <div class="center" 
 >
 <!--l. 306--><p class="noindent" >
@ -220,9 +220,9 @@ nt&#x00A0;=&#x00A0;psb_cuda_MaxThreadsPerMP()
 <!--l. 315--><p class="noindent" >Returns the maximum number of threads per multiprocessor.
 <!--l. 318--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-165000"></a>psb_cuda_MaxRegistersPerBlock </h4>
+ id="x20-166000"></a>psb_cuda_MaxRegistersPerBlock </h4>
 <a 
- id="Q1-20-212"></a>
+ id="Q1-20-214"></a>
 <div class="center" 
 >
 <!--l. 326--><p class="noindent" >
@ -231,14 +231,14 @@ nr&#x00A0;=&#x00A0;psb_cuda_MaxRegistersPerBlock()
 </pre>
 <!--l. 330--><p class="nopar" >                                                           </div></div>
 <!--l. 335--><p class="noindent" >Returns the maximum number of register per thread block.
 <!--l. 338--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
 id="x20-166000"></a>psb_cuda_MemoryClockRate </h4>
 <a 
 id="Q1-20-214"></a>
 <!--l. 338--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
 id="x20-167000"></a>psb_cuda_MemoryClockRate </h4>
 <a 
 id="Q1-20-216"></a>
 <div class="center" 
 >
 <!--l. 346--><p class="noindent" >
@ -249,9 +249,9 @@ cl&#x00A0;=&#x00A0;psb_cuda_MemoryClockRate()
 <!--l. 355--><p class="noindent" >Returns the memory clock rate in KHz, as an integer.
 <!--l. 357--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-167000"></a>psb_cuda_MemoryBusWidth </h4>
+ id="x20-168000"></a>psb_cuda_MemoryBusWidth </h4>
 <a 
- id="Q1-20-216"></a>
+ id="Q1-20-218"></a>
 <div class="center" 
 >
 <!--l. 365--><p class="noindent" >
@ -262,9 +262,9 @@ nb&#x00A0;=&#x00A0;psb_cuda_MemoryBusWidth()
 <!--l. 374--><p class="noindent" >Returns the memory bus width in bits.
 <!--l. 376--><p class="noindent" >
   <h4 class="likesubsectionHead"><a 
- id="x20-168000"></a>psb_cuda_MemoryPeakBandwidth </h4>
+ id="x20-169000"></a>psb_cuda_MemoryPeakBandwidth </h4>
 <a 
- id="Q1-20-218"></a>
+ id="Q1-20-220"></a>
 <div class="center" 
 >
 <!--l. 384--><p class="noindent" >
@ -282,7 +282,7 @@ bw&#x00A0;=&#x00A0;psb_cuda_MemoryPeakBandwidth()
-<!--l. 126--><p class="indent" >
+<!--l. 134--><p class="indent" >
--- a/docs/html/userhtmlse2.html
+++ b/docs/html/userhtmlse2.html
@ -21,21 +21,21 @@ href="userhtml.html#userhtmlse2.html" >up</a>] </p></div>
 <!--l. 74--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for
 sparse linear systems on distributed memory parallel computers. The system
 coefficient matrix <span 
-class="cmmi-10">A </span>must be square; it may be real or complex, nonsymmetric, and
+class="zplmr7m-">A </span>must be square; it may be real or complex, nonsymmetric, and
 its sparsity pattern needs not to be symmetric. The serial computation parts are
-based on the serial sparse BLAS, so that any extension made to the data structures
+based on the serial sparse BLAS, so that any extension made to the data structures of
-of the serial kernels is available to the parallel version. The overall design and
+the serial kernels is available to the parallel version. The overall design and
 parallelization strategy have been influenced by the structure of the ScaLAPACK
 parallel library. The layered structure of the PSBLAS library is shown in figure&#x00A0;<a 
 href="#x4-3001r1">1<!--tex4ht:ref: fig:psblas --></a>;
-lower layers of the library indicate an encapsulation relationship with upper
+lower layers of the library indicate an encapsulation relationship with upper layers.
-layers. The ongoing discussion focuses on the Fortran&#x00A0;2003 layer immediately
+The ongoing discussion focuses on the Fortran&#x00A0;2003 layer immediately
 below the application layer. The serial parts of the computation on each
 process are executed through calls to the serial sparse BLAS subroutines. In a
 similar way, the inter-process message exchanges are encapsulated in an
 applicaiton layer that has been strongly inspired by the Basic Linear Algebra
 Communication Subroutines (BLACS) library&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XBLACS">6</a>]</span>. Usually there is no need to deal
+href="userhtmlli2.html#XBLACS">7</a>]</span>. Usually there is no need to deal
 directly with MPI; however, in some cases, MPI routines are used directly
 to improve efficiency. For further details on our communication layer see
 Sec.&#x00A0;<a 
@ -76,7 +76,7 @@ mesh.
 process that will own the corresponding row in the coefficient matrix and will
 carry out all related computations. This allocation strategy is equivalent to a
 partition of the discretization mesh into <span 
-class="cmti-10">sub-domains</span>. Our library supports any
+class="pplri7t-">sub-domains</span>. Our library supports any
 distribution that keeps together the coefficients of each matrix row; there are no
 other constraints on the variable assignment. This choice is consistent with
 simple data distributions such as <span class="obeylines-h"><span class="verb"><span 
@ -85,7 +85,7 @@ class="cmtt-10">BLOCK</span></span></span>, as well as completely
 arbitrary assignments of equation indices to processes. In particular it is
 consistent with the usage of graph partitioning tools commonly available in
 the literature, e.g. METIS&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XMETIS">13</a>]</span>. Dense vectors conform to sparse matrices,
+href="userhtmlli2.html#XMETIS">14</a>]</span>. Dense vectors conform to sparse matrices,
 that is, the entries of a vector follow the same distribution of the matrix
 rows.
 <!--l. 146--><p class="indent" >   We assume that the sparse matrix is built in parallel, where each process generates
@ -94,35 +94,35 @@ node. However, it is possible to hold the entire matrix in one process and distr
 explicitly<span class="footnote-mark"><a 
 href="userhtml5.html#fn1x0"><sup class="textsuperscript">1</sup></a></span><a 
 id="x4-3002f1"></a> ,
-even though the resulting memory bottleneck would make this option unattractive in
+even though the resulting memory bottleneck would make this option unattractive
-most cases.
+in most cases.
   <h4 class="subsectionHead"><span class="titlemark">2.1    </span> <a 
 id="x4-40002.1"></a>Basic Nomenclature</h4>
 <!--l. 158--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
-memory machine is guided by the structure of the physical model, and specifically by
+memory machine is guided by the structure of the physical model, and specifically
-the discretization mesh of the PDE.
+by the discretization mesh of the PDE.
 <!--l. 163--><p class="indent" >   Each point of the discretization mesh will have (at least) one associated
 equation/variable, and therefore one index. We say that point <span 
-class="cmmi-10">i </span><span 
+class="zplmr7m-">i </span><span 
-class="cmti-10">depends </span>on point <span 
+class="pplri7t-">depends </span>on point <span 
-class="cmmi-10">j </span>if
+class="zplmr7m-">j </span>if
 the equation for a variable associated with <span 
-class="cmmi-10">i </span>contains a term in <span 
+class="zplmr7m-">i </span>contains a term in <span 
-class="cmmi-10">j</span>, or equivalently if
+class="zplmr7m-">j</span>, or equivalently if
 <span 
-class="cmmi-10">a</span><sub><span 
+class="zplmr7m-">a</span><sub><span 
-class="cmmi-7">ij</span></sub><span 
+class="zplmr7m-x-x-76">ij</span></sub><span 
-class="cmmi-10">&#x2260;</span>0. After the partition of the discretization mesh into <span 
+class="zplmr7m-">&#x2260;</span>0. After the partition of the discretization mesh into <span 
-class="cmti-10">sub-domains </span>assigned
+class="pplri7t-">sub-domains </span>assigned
 to the parallel processes, we classify the points of a given sub-domain as
 following.
     <dl class="description"><dt class="description">
     <!--l. 172--><p class="noindent" >
 <span 
-class="cmbx-10">Internal.</span> </dt><dd 
+class="pplb7t-">Internal.</span> </dt><dd 
 class="description">
     <!--l. 172--><p class="noindent" >An internal point of a given domain <span 
-class="cmti-10">depends </span>only on points of the same
+class="pplri7t-">depends </span>only on points of the same
     domain.  If  all  points  of  a  domain  are  assigned  to  one  process,  then
     a  computational  step  (e.g.,  a  matrix-vector  product)  of  the  equations
@ -133,22 +133,22 @@ class="cmti-10">depends </span>only on points of the same
     </dd><dt class="description">
     <!--l. 181--><p class="noindent" >
 <span 
-class="cmbx-10">Boundary.</span> </dt><dd 
+class="pplb7t-">Boundary.</span> </dt><dd 
 class="description">
     <!--l. 181--><p class="noindent" >A  point  of  a  given  domain  is  a  boundary  point  if  it  <span 
-class="cmti-10">depends </span>on points
+class="pplri7t-">depends  </span>on  points
     belonging to other domains.
     </dd><dt class="description">
     <!--l. 185--><p class="noindent" >
 <span 
-class="cmbx-10">Halo.</span> </dt><dd 
+class="pplb7t-">Halo.</span> </dt><dd 
 class="description">
     <!--l. 185--><p class="noindent" >A halo point for a given domain is a point belonging to another domain
     such that there is a boundary point which <span 
-class="cmti-10">depends </span>on it. Whenever performing
+class="pplri7t-">depends </span>on it. Whenever performing
     a computational step, such as a matrix-vector product, the values associated
-     with halo points are requested from other domains. A boundary point of a
+     with halo points are requested from other domains. A boundary point of
-     given domain is usually a halo point for some other domain<span class="footnote-mark"><a 
+     a given domain is usually a halo point for some other domain<span class="footnote-mark"><a 
 href="userhtml6.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a 
 id="x4-4001f2"></a> ;
     therefore the cardinality of the boundary points set denotes the amount
@ -156,7 +156,7 @@ href="userhtml6.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a
     </dd><dt class="description">
     <!--l. 198--><p class="noindent" >
 <span 
-class="cmbx-10">Overlap.</span> </dt><dd 
+class="pplb7t-">Overlap.</span> </dt><dd 
 class="description">
     <!--l. 198--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any
     operation  that  involves  an  overlap  point  has  to  be  replicated  for  each
@ -164,42 +164,45 @@ class="description">
 <!--l. 202--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
 feature of Domain Decomposition Schwarz preconditioners which are the subject of
 related research work&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#X2007c">3</a>,&#x00A0;<a 
+href="userhtmlli2.html#X2007c">4</a>,&#x00A0;<a 
-href="userhtmlli2.html#X2007d">2</a>]</span>.
+href="userhtmlli2.html#X2007d">3</a>]</span>.
 <!--l. 207--><p class="indent" >   We denote the sets of internal, boundary and halo points for a given subdomain
 by <span 
-class="cmsy-10"><img 
+class="zplmr7y-"><img 
-src="cmsy10-49.png" alt="I" class="10x-x-49" /></span>, <span 
+src="zplmr7y-49.png" alt="I" class="x-x-49" /></span>, <span 
-class="cmsy-10"><img 
+class="zplmr7y-"><img 
-src="cmsy10-42.png" alt="B" class="10x-x-42" /> </span>and <span 
+src="zplmr7y-42.png" alt="B" class="x-x-42" /> </span>and <span 
-class="cmsy-10"><img 
+class="zplmr7y-"><img 
-src="cmsy10-48.png" alt="H" class="10x-x-48" /></span>. Each subdomain is assigned to one process; each process usually owns
+src="zplmr7y-48.png" alt="H" class="x-x-48" /></span>. Each subdomain is assigned to one process; each process usually
-one subdomain, although the user may choose to assign more than one subdomain to
+owns one subdomain, although the user may choose to assign more than one
-a process. If each process <span 
+subdomain to a process. If each process <span 
-class="cmmi-10">i </span>owns one subdomain, the number of rows in
+class="zplmr7m-">i </span>owns one subdomain, the number of rows
-the local sparse matrix is <span 
+in the local sparse matrix is <span 
-class="cmsy-10">|<img 
+class="zplmr7y-">|<img 
-src="cmsy10-49.png" alt="I" class="10x-x-49" /></span><sub><span 
+src="zplmr7y-49.png" alt="I" class="x-x-49" /></span><sub><span 
-class="cmmi-7">i</span></sub><span 
+class="zplmr7m-x-x-76">i</span></sub><span 
-class="cmsy-10">| </span>+ <span 
+class="zplmr7y-">| </span><span 
-class="cmsy-10">|<img 
+class="zplmr7t-">+ </span><span 
-src="cmsy10-42.png" alt="B" class="10x-x-42" /></span><sub><span 
+class="zplmr7y-">|<img 
-class="cmmi-7">i</span></sub><span 
+src="zplmr7y-42.png" alt="B" class="x-x-42" /></span><sub><span 
-class="cmsy-10">|</span>, and the number of local columns (i.e.
+class="zplmr7m-x-x-76">i</span></sub><span 
 class="zplmr7y-">|</span>, and the number of local columns (i.e.
 those for which there exists at least one non-zero entry in the local rows) is
 <span 
-class="cmsy-10">|<img 
+class="zplmr7y-">|<img 
-src="cmsy10-49.png" alt="I" class="10x-x-49" /></span><sub><span 
+src="zplmr7y-49.png" alt="I" class="x-x-49" /></span><sub><span 
-class="cmmi-7">i</span></sub><span 
+class="zplmr7m-x-x-76">i</span></sub><span 
-class="cmsy-10">| </span>+ <span 
+class="zplmr7y-">| </span><span 
-class="cmsy-10">|<img 
+class="zplmr7t-">+ </span><span 
-src="cmsy10-42.png" alt="B" class="10x-x-42" /></span><sub><span 
+class="zplmr7y-">|<img 
-class="cmmi-7">i</span></sub><span 
+src="zplmr7y-42.png" alt="B" class="x-x-42" /></span><sub><span 
-class="cmsy-10">| </span>+ <span 
+class="zplmr7m-x-x-76">i</span></sub><span 
-class="cmsy-10">|<img 
+class="zplmr7y-">| </span><span 
-src="cmsy10-48.png" alt="H" class="10x-x-48" /></span><sub><span 
+class="zplmr7t-">+ </span><span 
-class="cmmi-7">i</span></sub><span 
+class="zplmr7y-">|<img 
-class="cmsy-10">|</span>.
+src="zplmr7y-48.png" alt="H" class="x-x-48" /></span><sub><span 
 class="zplmr7m-x-x-76">i</span></sub><span 
 class="zplmr7y-">|</span>.
 <!--l. 217--><p class="indent" >   <hr class="figure"><div class="figure" 
 >
@ -232,7 +235,7 @@ are also often called &#8220;ghost&#8221; points in the literature.
     <dl class="description"><dt class="description">
     <!--l. 240--><p class="noindent" >
 <span 
-class="cmbx-10">Computational routines</span> </dt><dd 
+class="pplb7t-">Computational routines</span> </dt><dd 
 class="description">
     <!--l. 240--><p class="noindent" >comprising:
         <ul class="itemize1">
@ -253,13 +256,13 @@ class="description">
     </dd><dt class="description">
     <!--l. 249--><p class="noindent" >
 <span 
-class="cmbx-10">Communication routines</span> </dt><dd 
+class="pplb7t-">Communication routines</span> </dt><dd 
 class="description">
     <!--l. 249--><p class="noindent" >handling halo and overlap communications;
     </dd><dt class="description">
     <!--l. 251--><p class="noindent" >
 <span 
-class="cmbx-10">Data management and auxiliary routines</span> </dt><dd 
+class="pplb7t-">Data management and auxiliary routines</span> </dt><dd 
 class="description">
     <!--l. 251--><p class="noindent" >including:
         <ul class="itemize1">
@ -283,17 +286,17 @@ class="description">
     </dd><dt class="description">
     <!--l. 259--><p class="noindent" >
 <span 
-class="cmbx-10">Preconditioner routines</span> </dt><dd 
+class="pplb7t-">Preconditioner routines</span> </dt><dd 
 class="description">
     <!--l. 259--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 260--><p class="noindent" >
 <span 
-class="cmbx-10">Iterative methods</span> </dt><dd 
+class="pplb7t-">Iterative methods</span> </dt><dd 
 class="description">
     <!--l. 260--><p class="noindent" >a subset of Krylov subspace iterative methods</dd></dl>
-<!--l. 263--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined in
+<!--l. 263--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined
-the PSBLAS software package:
+in the PSBLAS software package:
     <ul class="itemize1">
     <li class="itemize">
     <!--l. 266--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span 
@ -341,7 +344,7 @@ as:
     <dl class="description"><dt class="description">
     <!--l. 288--><p class="noindent" >
 <span 
-class="cmbx-10">global</span> </dt><dd 
+class="pplb7t-">global</span> </dt><dd 
 class="description">
     <!--l. 288--><p class="noindent" >For  input  arguments,  the  value  must  be  the  same  on  all  processes
     participating  in  the  subroutine  call;  for  output  arguments  the  value  is
@ -349,7 +352,7 @@ class="description">
     </dd><dt class="description">
     <!--l. 291--><p class="noindent" >
 <span 
-class="cmbx-10">local</span> </dt><dd 
+class="pplb7t-">local</span> </dt><dd 
 class="description">
     <!--l. 291--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
 <!--l. 293--><p class="noindent" >To finish our general description, we define a version string with the constant
@ -367,29 +370,29 @@ created and exist with reference to a discretized space to which there correspon
 an index space and a matrix sparsity pattern. As an example, consider a
 cell-centered finite-volume discretization of the Navier-Stokes equations on a
 simulation domain; the index space 1<span 
-class="cmmi-10">&#x2026;</span><span 
+class="zplmr7m-">&#x2026;</span><span 
-class="cmmi-10">n </span>is isomorphic to the set of cell centers,
+class="zplmr7m-">n </span>is isomorphic to the set of cell centers,
 whereas the pattern of the associated linear system matrix is isomorphic to the
 adjacency graph imposed on the discretization mesh by the discretization
 stencil.
 <!--l. 311--><p class="indent" >   Thus the first order of business is to establish an index space, and this is done
 with a call to <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall</span></span></span> in which we specify the size of the index space <span 
-class="cmmi-10">n </span>and the
+class="zplmr7m-">n </span>and the
 allocation of the elements of the index space to the various processes making up the
 MPI (virtual) parallel machine.
 <!--l. 317--><p class="indent" >   The index space is partitioned among processes, and this creates a mapping from
 the &#8220;global&#8221; numbering 1<span 
-class="cmmi-10">&#x2026;</span><span 
+class="zplmr7m-">&#x2026;</span><span 
-class="cmmi-10">n </span>to a numbering &#8220;local&#8221; to each process; each process <span 
+class="zplmr7m-">n </span>to a numbering &#8220;local&#8221; to each process; each process <span 
-class="cmmi-10">i</span>
+class="zplmr7m-">i</span>
 will own a certain subset 1<span 
-class="cmmi-10">&#x2026;</span><span 
+class="zplmr7m-">&#x2026;</span><span 
-class="cmmi-10">n</span><sub>row<sub><span 
+class="zplmr7m-">n</span><sub>row<sub><span 
-class="cmmi-5">i</span></sub></sub>, each element of which corresponds to a certain
+class="zplmr7m-x-x-60">i</span></sub></sub>, each element of which corresponds to a certain
 element of 1<span 
-class="cmmi-10">&#x2026;</span><span 
+class="zplmr7m-">&#x2026;</span><span 
-class="cmmi-10">n</span>. The user does not set explicitly this mapping; when the application
+class="zplmr7m-">n</span>. The user does not set explicitly this mapping; when the application
 needs to indicate to which element of the index space a certain item is related,
 such as the row and column index of a matrix coefficient, it does so in the
 &#8220;global&#8221; numbering, and the library will translate into the appropriate &#8220;local&#8221;
@ -398,8 +401,8 @@ numbering.
 <!--l. 327--><p class="indent" >   For a given index space 1<span 
-class="cmmi-10">&#x2026;</span><span 
+class="zplmr7m-">&#x2026;</span><span 
-class="cmmi-10">n </span>there are many possible associated topologies, i.e.
+class="zplmr7m-">n </span>there are many possible associated topologies, i.e.
 many different discretization stencils; thus the description of the index space is not
 completed until the user has defined a sparsity pattern, either explicitly through
 <span class="obeylines-h"><span class="verb"><span 
@ -410,19 +413,20 @@ class="cmtt-10">psb_cdasb</span></span></span> and a sparse matrix with a call t
 class="cmtt-10">psb_spasb</span></span></span>. After <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdasb</span></span></span> each
 process <span 
-class="cmmi-10">i </span>will have defined a set of &#8220;halo&#8221; (or &#8220;ghost&#8221;) indices <span 
+class="zplmr7m-">i </span>will have defined a set of &#8220;halo&#8221; (or &#8220;ghost&#8221;) indices <span 
-class="cmmi-10">n</span><sub>row<sub><span 
+class="zplmr7m-">n</span><sub>row<sub><span 
-class="cmmi-5">i</span></sub></sub> + 1<span 
+class="zplmr7m-x-x-60">i</span></sub></sub> <span 
-class="cmmi-10">&#x2026;</span><span 
+class="zplmr7t-">+ </span>1<span 
-class="cmmi-10">n</span><sub>col<sub>
+class="zplmr7m-">&#x2026;</span><span 
 class="zplmr7m-">n</span><sub>col<sub>
 <span 
-class="cmmi-5">i</span></sub></sub>,
+class="zplmr7m-x-x-60">i</span></sub></sub>,
 denoting elements of the index space that are <span 
-class="cmti-10">not </span>assigned to process <span 
+class="pplri7t-">not </span>assigned to process <span 
-class="cmmi-10">i</span>; however the
+class="zplmr7m-">i</span>; however the
 variables associated with them are needed to complete computations associated with
 the sparse matrix <span 
-class="cmmi-10">A</span>, and thus they have to be fetched from (neighbouring)
+class="zplmr7m-">A</span>, and thus they have to be fetched from (neighbouring)
 processes. The descriptor of the index space is built exactly for the purpose
 of properly sequencing the communication steps required to achieve this
 objective.
@ -432,18 +436,18 @@ matrix/vector creation and linear system solution as follows:
 <li 
  class="enumerate" id="x4-6002x1">
     <!--l. 347--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_init</span></span></span>
+class="cmtt-10">psb_init</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x4-6004x2">
     <!--l. 348--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_cdall</span></span></span>
+class="cmtt-10">psb_cdall</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x4-6006x3">
     <!--l. 349--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_geall</span></span></span>
+class="cmtt-10">psb_geall</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x4-6008x4">
@ -459,12 +463,12 @@ class="cmtt-10">psb_geins</span></span></span>
 <li 
  class="enumerate" id="x4-6012x1">
         <!--l. 355--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_cdasb</span></span></span>
+class="cmtt-10">psb_cdasb</span></span></span>,
         </li>
 <li 
  class="enumerate" id="x4-6014x2">
         <!--l. 356--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_spasb</span></span></span>
+class="cmtt-10">psb_spasb</span></span></span>,
@ -472,147 +476,157 @@ class="cmtt-10">psb_spasb</span></span></span>
 <li 
  class="enumerate" id="x4-6016x3">
         <!--l. 357--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_geasb</span></span></span></li></ol>
+class="cmtt-10">psb_geasb</span></span></span>;</li></ol>
     </li>
 <li 
  class="enumerate" id="x4-6018x6">
     <!--l. 359--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">prec%init</span></span></span> and build it with
+class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%set</span></span></span>, and build it with
     <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%build</span></span></span><span class="footnote-mark"><a 
 href="userhtml7.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a 
- id="x4-6019f3"></a> .
+ id="x4-6019f3"></a> ;
     </li>
 <li 
  class="enumerate" id="x4-6022x7">
-     <!--l. 363--><p class="noindent" >Call the iterative driver <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 364--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_krylov</span></span></span> with the method of choice, e.g.
+class="cmtt-10">psb_krylov</span></span></span>
-     <span class="obeylines-h"><span class="verb"><span 
+     with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">bicgstab</span></span></span>.</li></ol>
-<!--l. 366--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span 
+<!--l. 367--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">test/pargen/</span></span></span>.
-<!--l. 369--><p class="indent" >   For a simulation in which the same discretization mesh is used over multiple time
+<!--l. 370--><p class="indent" >   For a simulation in which the same discretization mesh is used over multiple
-steps, the following structure may be more appropriate:
+time steps, the following structure may be more appropriate:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x4-6024x1">
-     <!--l. 372--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 373--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_init</span></span></span>
     </li>
 <li 
  class="enumerate" id="x4-6026x2">
-     <!--l. 373--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 374--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall</span></span></span>
     </li>
 <li 
  class="enumerate" id="x4-6028x3">
-     <!--l. 374--><p class="noindent" >Loop over the topology of the discretization mesh and build the descriptor
+     <!--l. 375--><p class="noindent" >Loop   over   the   topology   of   the   discretization   mesh   and   build   the
-     with <span class="obeylines-h"><span class="verb"><span 
+     descriptor with <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_cdins</span></span></span>
+class="cmtt-10">psb_cdins</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x4-6030x4">
-     <!--l. 376--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 377--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_cdasb</span></span></span>
+class="cmtt-10">psb_cdasb</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x4-6032x5">
-     <!--l. 377--><p class="noindent" >Allocate  the  sparse  matrices  and  dense  vectors  with  <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 378--><p class="noindent" >Allocate  the  sparse  matrices  and  dense  vectors  with;  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spall</span></span></span> and
     <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_geall</span></span></span>
+class="cmtt-10">psb_geall</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x4-6034x6">
-     <!--l. 379--><p class="noindent" >Loop over the time steps:
+     <!--l. 380--><p class="noindent" >Loop over the time steps:
         <ol  class="enumerate2" >
 <li 
  class="enumerate" id="x4-6036x1">
-         <!--l. 381--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span 
+         <!--l. 382--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_sprn</span></span></span>;
         also zero out the dense vectors;
         </li>
 <li 
  class="enumerate" id="x4-6038x2">
-         <!--l. 384--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update them
+         <!--l. 385--><p class="noindent" >Loop  over  the  mesh,  generate  the  coefficients  and  insert/update
-         with <span class="obeylines-h"><span class="verb"><span 
+         them with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_geins</span></span></span>
+class="cmtt-10">psb_geins</span></span></span>;
         </li>
 <li 
  class="enumerate" id="x4-6040x3">
-         <!--l. 386--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span 
+         <!--l. 387--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_geasb</span></span></span>
+class="cmtt-10">psb_geasb</span></span></span>;
         </li>
 <li 
  class="enumerate" id="x4-6042x4">
-         <!--l. 387--><p class="noindent" >Choose and build preconditioner with <span class="obeylines-h"><span class="verb"><span 
+         <!--l. 388--><p class="noindent" >
 class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%build</span></span></span>
         </li>
 <li 
  class="enumerate" id="x4-6044x5">
-         <!--l. 389--><p class="noindent" >Call the iterative method of choice, e.g. <span class="obeylines-h"><span class="verb"><span 
+         <!--l. 388--><p class="noindent" >Choose   the   preconditioner   to   be   used   with   <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">psb_bicgstab</span></span></span></li></ol>
+class="cmtt-10">prec%init</span></span></span> and
         <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%set</span></span></span>, and build it with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">prec%build</span></span></span>;
         </li>
 <li 
  class="enumerate" id="x4-6046x6">
         <!--l. 391--><p class="noindent" >Call  one  of  the  iterative  drivers  with  the  method  of  choice,  e.g.
         <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_krylov</span></span></span> with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">bicgstab</span></span></span>.</li></ol>
     </li></ol>
-<!--l. 392--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
+<!--l. 395--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
 called on the data that is actually allocated to the current process, i.e. each process
 generates its own data.
-<!--l. 397--><p class="indent" >   In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span 
+<!--l. 400--><p class="indent" >   In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span>, nor is there a
 requirement to build a matrix row in its entirety before calling the routine; this
 allows the application programmer to walk through the discretization mesh element
 by element, generating the main part of a given matrix row but also contributions to
 the rows corresponding to neighbouring elements.
-<!--l. 404--><p class="indent" >   From a functional point of view it is even possible to execute one call for each
+<!--l. 407--><p class="indent" >   From a functional point of view it is even possible to execute one call for each
 nonzero coefficient; however this would have a substantial computational
 overhead. It is therefore advisable to pack a certain amount of data into each
 call to the insertion routine, say touching on a few tens of rows; the best
-performng value would depend on both the architecture of the computer being
+                                                                  
-used and on the problem structure. At the opposite extreme, it would be
+
-possible to generate the entire part of a coefficient matrix residing on a
+                                                                  
 performng value would depend on both the architecture of the computer
 being used and on the problem structure. At the opposite extreme, it would
 be possible to generate the entire part of a coefficient matrix residing on a
 process and pass it in a single call to <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a
 doubling of memory occupation, and thus would be almost always far from
 optimal.
-                                                                  
+<!--l. 420--><p class="noindent" >
 <!--l. 417--><p class="noindent" >
   <h5 class="subsubsectionHead"><span class="titlemark">2.3.1    </span> <a 
 id="x4-70002.3.1"></a>User-defined index mappings</h5>
-<!--l. 419--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
+<!--l. 422--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
 constraints outlined in sec.&#x00A0;<a 
 href="#x4-60002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x4-7002x1">
-     <!--l. 422--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span 
+     <!--l. 425--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span 
-class="cmmi-10">&#x2026;</span><span 
+class="zplmr7m-">&#x2026;</span><span 
-class="cmmi-10">n</span><sub>row<sub><span 
+class="zplmr7m-">n</span><sub>row<sub><span 
-class="cmmi-5">i</span></sub></sub>;
+class="zplmr7m-x-x-60">i</span></sub></sub>;
     </li>
 <li 
  class="enumerate" id="x4-7004x2">
-     <!--l. 424--><p class="noindent" >The set of halo points must be mapped to the set <span 
+     <!--l. 427--><p class="noindent" >The set of halo points must be mapped to the set <span 
-class="cmmi-10">n</span><sub>row<sub><span 
+class="zplmr7m-">n</span><sub>row<sub><span 
-class="cmmi-5">i</span></sub></sub> + 1<span 
+class="zplmr7m-x-x-60">i</span></sub></sub> <span 
-class="cmmi-10">&#x2026;</span><span 
+class="zplmr7t-">+ </span>1<span 
-class="cmmi-10">n</span><sub>col<sub>
+class="zplmr7m-">&#x2026;</span><span 
 class="zplmr7m-">n</span><sub>col<sub>
 <span 
-class="cmmi-5">i</span></sub></sub>;</li></ol>
+class="zplmr7m-x-x-60">i</span></sub></sub>;</li></ol>
-<!--l. 427--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
+<!--l. 430--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
 consistency of this mapping; some errors may be caught by the library, but
 this is not guaranteed. The application structure to support this usage is as
 follows:
     <ol  class="enumerate1" >
 <li 
  class="enumerate" id="x4-7006x1">
-     <!--l. 433--><p class="noindent" >Initialize                                                                             index
+     <!--l. 436--><p class="noindent" >Initialize                                                                                                       index
     space with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the
     vectors <span class="obeylines-h"><span class="verb"><span 
@ -622,66 +636,66 @@ class="cmtt-10">lidx(:)</span></span></span> containing the corresponding local
     </li>
 <li 
  class="enumerate" id="x4-7008x2">
-     <!--l. 438--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 441--><p class="noindent" >Add  the  halo  points  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ja(:)</span></span></span> and  their  associated  local  indices  <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">lidx(:)</span></span></span> with
+class="cmtt-10">lidx(:)</span></span></span>
-     a(some) call(s) to <span class="obeylines-h"><span class="verb"><span 
+     with a(some) call(s) to <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdins(nz,ja,desc,info,lidx=lidx)</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x4-7010x3">
-     <!--l. 441--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
+     <!--l. 444--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_cdasb</span></span></span>;
     </li>
 <li 
  class="enumerate" id="x4-7012x4">
     <!--l. 442--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span>
     and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geins</span></span></span> of the <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">local</span></span></span> argument specifying that the indices in <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ia</span></span></span>,
     <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
-<!--l. 449--><p class="noindent" >
+     <!--l. 445--><p class="noindent" >Build   the   sparse   matrices   and   vectors,   optionally   making   use   in
     <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_spins</span></span></span> and  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_geins</span></span></span> of  the  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">local</span></span></span> argument  specifying  that  the
     indices in <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ia</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
 <!--l. 452--><p class="noindent" >
   <h4 class="subsectionHead"><span class="titlemark">2.4    </span> <a 
 id="x4-80002.4"></a>Programming model</h4>
-<!--l. 451--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
+<!--l. 454--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
 programming model: each process participating in the computation performs the
 same actions on a chunk of data. Parallelism is thus data-driven.
-<!--l. 456--><p class="indent" >   Because of this structure, many subroutines coordinate their action across the
+<!--l. 459--><p class="indent" >   Because of this structure, many subroutines coordinate their action across the
 various processes, thus providing an implicit synchronization point, and therefore
 <span 
-class="cmti-10">must </span>be called simultaneously by all processes participating in the computation. This
+class="pplri7t-">must </span>be called simultaneously by all processes participating in the computation. This
 is certainly true for the data allocation and assembly routines, for all the
 computational routines and for some of the tools routines.
-<!--l. 464--><p class="indent" >   However there are many cases where no synchronization, and indeed no
+<!--l. 467--><p class="indent" >   However there are many cases where no synchronization, and indeed no
 communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a 
 href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a>
 are only acting on the local data structures, and thus may be called independently.
-The most important case is that of the coefficient insertion routines: since the
+The most important case is that of the coefficient insertion routines: since the number
-number of coefficients in the sparse and dense matrices varies among the processors,
+of coefficients in the sparse and dense matrices varies among the processors, and
-and since the user is free to choose an arbitrary order in builiding the matrix entries,
+since the user is free to choose an arbitrary order in builiding the matrix entries,
 these routines cannot imply a synchronization.
-<!--l. 474--><p class="indent" >   Throughout this user&#8217;s guide each subroutine will be clearly indicated
+<!--l. 477--><p class="indent" >   Throughout this user&#8217;s guide each subroutine will be clearly indicated
 as:
     <dl class="description"><dt class="description">
-     <!--l. 477--><p class="noindent" >
+     <!--l. 480--><p class="noindent" >
 <span 
-class="cmbx-10">Synchronous:</span> </dt><dd 
+class="pplb7t-">Synchronous:</span> </dt><dd 
 class="description">
-     <!--l. 477--><p class="noindent" >must  be  called  simultaneously  by  all  the  processes  in  the  relevant
+     <!--l. 480--><p class="noindent" >must  be  called  simultaneously  by  all  the  processes  in  the  relevant
     communication context;
     </dd><dt class="description">
-     <!--l. 479--><p class="noindent" >
+     <!--l. 482--><p class="noindent" >
 <span 
-class="cmbx-10">Asynchronous:</span> </dt><dd 
+class="pplb7t-">Asynchronous:</span> </dt><dd 
 class="description">
-     <!--l. 479--><p class="noindent" >may be called in a totally independent manner.</dd></dl>
+     <!--l. 482--><p class="noindent" >may be called in a totally independent manner.</dd></dl>
--- a/docs/html/userhtmlse3.html
+++ b/docs/html/userhtmlse3.html
--- a/docs/html/userhtmlse4.html
+++ b/docs/html/userhtmlse4.html
--- a/docs/html/userhtmlse5.html
+++ b/docs/html/userhtmlse5.html
--- a/docs/html/userhtmlse6.html
+++ b/docs/html/userhtmlse6.html
--- a/docs/html/userhtmlse7.html
+++ b/docs/html/userhtmlse7.html
--- a/docs/html/userhtmlse8.html
+++ b/docs/html/userhtmlse8.html
@ -23,14 +23,14 @@ href="userhtml.html#userhtmlse11.html" >up</a>] </p></div>
 messages on a stack allowing the user to trace back up to the point where the first
 error message has been generated. Every routine in the PSBLAS-2.0 library has, as
 last non-optional argument, an integer <span class="obeylines-h"><span class="verb"><span 
-class="cmtt-10">info</span></span></span> variable; whenever, inside the routine, an
+class="cmtt-10">info</span></span></span> variable; whenever, inside the
-error is detected, this variable is set to a value corresponding to a specific
+routine, an error is detected, this variable is set to a value corresponding to a
-error code. Then this error code is also pushed on the error stack and then
+specific error code. Then this error code is also pushed on the error stack
-either control is returned to the caller routine or the execution is aborted,
+and then either control is returned to the caller routine or the execution is
-depending on the users choice. At the time when the execution is aborted,
+aborted, depending on the users choice. At the time when the execution is
-an error message is printed on standard output with a level of verbosity
+aborted, an error message is printed on standard output with a level of
-than can be chosen by the user. If the execution is not aborted, then, the
+verbosity than can be chosen by the user. If the execution is not aborted, then,
-caller routine checks the value returned in the <span class="obeylines-h"><span class="verb"><span 
+the caller routine checks the value returned in the <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">info</span></span></span> variable and, if not
 zero, an error condition is raised. This process continues on all the levels of
 nested calls until the level where the user decides to abort the program
@ -100,7 +100,6 @@ class="cmtt-9">&#x00A0;</span><span
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
 class="cmtt-9">info</span></span><span style="color:#000000"><span 
 class="cmtt-9">=</span></span><span style="color:#000000"><span 
@ -111,7 +110,6 @@ class="cmtt-9">&#x00A0;</span><span
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
 class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
 class="cmtt-9">psb_errpush</span></span><span style="color:#000000"><span 
@ -128,7 +126,6 @@ class="cmtt-9">&#x00A0;</span><span
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
 class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
 class="cmtt-9">9999</span></span> 
@ -178,7 +175,6 @@ class="cmtt-9">&#x00A0;</span><span
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
 class="cmtt-9">info</span></span><span style="color:#000000"><span 
 class="cmtt-9">=</span></span><span style="color:#000000"><span 
@ -189,7 +185,6 @@ class="cmtt-9">&#x00A0;</span><span
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
 class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
 class="cmtt-9">psb_errpush</span></span><span style="color:#000000"><span 
@ -206,7 +201,6 @@ class="cmtt-9">&#x00A0;</span><span
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
 class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
 class="cmtt-9">9999</span></span> 
@ -246,7 +240,6 @@ class="cmtt-9">then</span></span>
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
 class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
 class="cmtt-9">psb_error</span></span><span style="color:#000000"><span 
@ -258,7 +251,6 @@ class="cmtt-9">)</span></span>
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
 class="cmtt-9">return</span></span> 
 <span class="label"><a 
@ -272,7 +264,6 @@ class="cmtt-9">else</span></span>
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
 class="cmtt-9">return</span></span> 
 <span class="label"><a 
@ -371,71 +362,71 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 179--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 179--><p class="noindent" >Asynchronous.
     </dd><dt class="description">
     <!--l. 180--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span> </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 180--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 181--><p class="noindent" >
 <span 
-class="cmbx-10">err</span><span 
+class="pplb7t-">err</span><span 
-class="cmbx-10">_c</span> </dt><dd 
+class="pplb7t-">_c</span> </dt><dd 
 class="description">
     <!--l. 181--><p class="noindent" >the error code<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: an integer.
     </dd><dt class="description">
     <!--l. 186--><p class="noindent" >
 <span 
-class="cmbx-10">r</span><span 
+class="pplb7t-">r</span><span 
-class="cmbx-10">_name</span> </dt><dd 
+class="pplb7t-">_name</span> </dt><dd 
 class="description">
     <!--l. 186--><p class="noindent" >the soutine where the error has been caught.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: a string.<br 
 class="newline" />
     </dd><dt class="description">
     <!--l. 191--><p class="noindent" >
 <span 
-class="cmbx-10">i</span><span 
+class="pplb7t-">i</span><span 
-class="cmbx-10">_err</span> </dt><dd 
+class="pplb7t-">_err</span> </dt><dd 
 class="description">
     <!--l. 191--><p class="noindent" >addional info for error code<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Specified as: an integer array<br 
 class="newline" />
     </dd><dt class="description">
     <!--l. 195--><p class="noindent" >
 <span 
-class="cmbx-10">a</span><span 
+class="pplb7t-">a</span><span 
-class="cmbx-10">_err</span> </dt><dd 
+class="pplb7t-">_err</span> </dt><dd 
 class="description">
     <!--l. 195--><p class="noindent" >addional info for error code<br 
 class="newline" />Scope: <span 
-class="cmbx-10">local </span><br 
+class="pplb7t-">local </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Specified as: a string.<br 
 class="newline" /></dd></dl>
@ -456,27 +447,27 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 209--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 209--><p class="noindent" >Asynchronous.
     </dd><dt class="description">
     <!--l. 210--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span> </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 210--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 211--><p class="noindent" >
 <span 
-class="cmbx-10">icontxt</span> </dt><dd 
+class="pplb7t-">icontxt</span> </dt><dd 
 class="description">
     <!--l. 211--><p class="noindent" >the communication context.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">optional</span><br 
+class="pplb7t-">optional</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: an integer.</dd></dl>
@ -496,27 +487,27 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 229--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 229--><p class="noindent" >Asynchronous.
     </dd><dt class="description">
     <!--l. 230--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span> </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 230--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 231--><p class="noindent" >
 <span 
-class="cmbx-10">v</span> </dt><dd 
+class="pplb7t-">v</span> </dt><dd 
 class="description">
     <!--l. 231--><p class="noindent" >the verbosity level<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global</span><br 
+class="pplb7t-">global</span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: an integer.</dd></dl>
@ -537,28 +528,28 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 246--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 246--><p class="noindent" >Asynchronous.
     </dd><dt class="description">
     <!--l. 247--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span> </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 247--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 248--><p class="noindent" >
 <span 
-class="cmbx-10">err</span><span 
+class="pplb7t-">err</span><span 
-class="cmbx-10">_act</span> </dt><dd 
+class="pplb7t-">_act</span> </dt><dd 
 class="description">
     <!--l. 248--><p class="noindent" >the type of action.<br 
 class="newline" />Scope: <span 
-class="cmbx-10">global </span><br 
+class="pplb7t-">global </span><br 
 class="newline" />Type: <span 
-class="cmbx-10">required</span><br 
+class="pplb7t-">required</span><br 
 class="newline" />Intent: <span 
-class="cmbx-10">in</span>.<br 
+class="pplb7t-">in</span>.<br 
 class="newline" />Specified as: an integer. Possible values: <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_act_ret</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_act_abort</span></span></span>.</dd></dl>
--- a/docs/html/userhtmlse9.html
+++ b/docs/html/userhtmlse9.html
@ -50,23 +50,23 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 21--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 21--><p class="noindent" >Asynchronous.
     </dd><dt class="description">
     <!--l. 22--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span>  </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 22--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 23--><p class="noindent" >
 <span 
-class="cmbx-10">filename</span> </dt><dd 
+class="pplb7t-">filename</span> </dt><dd 
 class="description">
     <!--l. 23--><p class="noindent" >The name of the file to be read.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified  as:  a  character  variable  containing  a  valid  file  name,  or  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>,  in
     which case the default input unit 5 (i.e. standard input in Unix jargon) is
@ -75,28 +75,28 @@ class="cmtt-10">-</span></span></span>.
     </dd><dt class="description">
     <!--l. 28--><p class="noindent" >
 <span 
-class="cmbx-10">iunit</span> </dt><dd 
+class="pplb7t-">iunit</span> </dt><dd 
 class="description">
     <!--l. 28--><p class="noindent" >The Fortran file unit number.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>.</dd></dl>
 <!--l. 33--><p class="noindent" >
     <dl class="description"><dt class="description">
     <!--l. 34--><p class="noindent" >
 <span 
-class="cmbx-10">On Return</span> </dt><dd 
+class="pplb7t-">On Return</span> </dt><dd 
 class="description">
     <!--l. 34--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 35--><p class="noindent" >
 <span 
-class="cmbx-10">a</span> </dt><dd 
+class="pplb7t-">a</span> </dt><dd 
 class="description">
     <!--l. 35--><p class="noindent" >the sparse matrix read from file.<br 
 class="newline" />Type:<span 
-class="cmbx-10">required</span>.<br 
+class="pplb7t-">required</span>.<br 
 class="newline" />Specified as: a structured data of type <a 
 href="userhtmlse3.html#spdata"><span 
 class="cmtt-10">psb</span><span 
@ -108,32 +108,32 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 38--><p class="noindent" >
 <span 
-class="cmbx-10">b</span> </dt><dd 
+class="pplb7t-">b</span> </dt><dd 
 class="description">
     <!--l. 38--><p class="noindent" >Rigth hand side(s).<br 
 class="newline" />Type: <span 
-class="cmbx-10">Optional </span><br 
+class="pplb7t-">Optional </span><br 
 class="newline" />An array of type real or complex, rank 2 and having the ALLOCATABLE
     attribute; will be allocated and filled in if the input file contains a right
     hand side, otherwise will be left in the UNALLOCATED state.
     </dd><dt class="description">
     <!--l. 43--><p class="noindent" >
 <span 
-class="cmbx-10">mtitle</span> </dt><dd 
+class="pplb7t-">mtitle</span> </dt><dd 
 class="description">
     <!--l. 43--><p class="noindent" >Matrix title.<br 
 class="newline" />Type: <span 
-class="cmbx-10">Optional </span><br 
+class="pplb7t-">Optional </span><br 
 class="newline" />A charachter variable of length 72 holding a copy of the matrix title as
     specified by the Harwell-Boeing format and contained in the input file.
     </dd><dt class="description">
     <!--l. 48--><p class="noindent" >
 <span 
-class="cmbx-10">iret</span> </dt><dd 
+class="pplb7t-">iret</span> </dt><dd 
 class="description">
     <!--l. 48--><p class="noindent" >Error code.<br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
@ -166,23 +166,23 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 66--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 66--><p class="noindent" >Asynchronous.
     </dd><dt class="description">
     <!--l. 67--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span>  </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 67--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 68--><p class="noindent" >
 <span 
-class="cmbx-10">a</span> </dt><dd 
+class="pplb7t-">a</span> </dt><dd 
 class="description">
     <!--l. 68--><p class="noindent" >the sparse matrix to be written.<br 
 class="newline" />Type:<span 
-class="cmbx-10">required</span>.<br 
+class="pplb7t-">required</span>.<br 
 class="newline" />Specified as: a structured data of type <a 
 href="userhtmlse3.html#spdata"><span 
 class="cmtt-10">psb</span><span 
@ -191,22 +191,22 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 71--><p class="noindent" >
 <span 
-class="cmbx-10">b</span> </dt><dd 
+class="pplb7t-">b</span> </dt><dd 
 class="description">
     <!--l. 71--><p class="noindent" >Rigth hand side.<br 
 class="newline" />Type: <span 
-class="cmbx-10">Optional </span><br 
+class="pplb7t-">Optional </span><br 
 class="newline" />An array of type real or complex, rank 1 and having the ALLOCATABLE
     attribute; will be allocated and filled in if the input file contains a right
     hand side.
     </dd><dt class="description">
     <!--l. 76--><p class="noindent" >
 <span 
-class="cmbx-10">filename</span> </dt><dd 
+class="pplb7t-">filename</span> </dt><dd 
 class="description">
     <!--l. 76--><p class="noindent" >The name of the file to be written to.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified  as:  a  character  variable  containing  a  valid  file  name,  or  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>,  in
     which case the default output unit 6 (i.e. standard output in Unix jargon)
@ -215,11 +215,11 @@ class="cmtt-10">-</span></span></span>.
     </dd><dt class="description">
     <!--l. 81--><p class="noindent" >
 <span 
-class="cmbx-10">iunit</span> </dt><dd 
+class="pplb7t-">iunit</span> </dt><dd 
 class="description">
     <!--l. 81--><p class="noindent" >The Fortran file unit number.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>.
@ -228,38 +228,38 @@ class="cmtt-10">-</span></span></span>.
     </dd><dt class="description">
     <!--l. 84--><p class="noindent" >
 <span 
-class="cmbx-10">key</span> </dt><dd 
+class="pplb7t-">key</span> </dt><dd 
 class="description">
     <!--l. 84--><p class="noindent" >Matrix key.<br 
 class="newline" />Type: <span 
-class="cmbx-10">Optional </span><br 
+class="pplb7t-">Optional </span><br 
 class="newline" />A charachter variable of length 8 holding the matrix key as specified by
     the Harwell-Boeing format and to be written to file.
     </dd><dt class="description">
     <!--l. 89--><p class="noindent" >
 <span 
-class="cmbx-10">mtitle</span> </dt><dd 
+class="pplb7t-">mtitle</span> </dt><dd 
 class="description">
     <!--l. 89--><p class="noindent" >Matrix title.<br 
 class="newline" />Type: <span 
-class="cmbx-10">Optional </span><br 
+class="pplb7t-">Optional </span><br 
 class="newline" />A charachter variable of length 72 holding the matrix title as specified by
     the Harwell-Boeing format and to be written to file.</dd></dl>
 <!--l. 96--><p class="noindent" >
     <dl class="description"><dt class="description">
     <!--l. 97--><p class="noindent" >
 <span 
-class="cmbx-10">On Return</span> </dt><dd 
+class="pplb7t-">On Return</span> </dt><dd 
 class="description">
     <!--l. 97--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 98--><p class="noindent" >
 <span 
-class="cmbx-10">iret</span> </dt><dd 
+class="pplb7t-">iret</span> </dt><dd 
 class="description">
     <!--l. 98--><p class="noindent" >Error code.<br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
@ -286,23 +286,23 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 116--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 116--><p class="noindent" >Asynchronous.
     </dd><dt class="description">
     <!--l. 117--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span>  </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 117--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 118--><p class="noindent" >
 <span 
-class="cmbx-10">filename</span> </dt><dd 
+class="pplb7t-">filename</span> </dt><dd 
 class="description">
     <!--l. 118--><p class="noindent" >The name of the file to be read.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified  as:  a  character  variable  containing  a  valid  file  name,  or  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>,  in
     which case the default input unit 5 (i.e. standard input in Unix jargon) is
@ -311,28 +311,28 @@ class="cmtt-10">-</span></span></span>.
     </dd><dt class="description">
     <!--l. 123--><p class="noindent" >
 <span 
-class="cmbx-10">iunit</span> </dt><dd 
+class="pplb7t-">iunit</span> </dt><dd 
 class="description">
     <!--l. 123--><p class="noindent" >The Fortran file unit number.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>.</dd></dl>
 <!--l. 128--><p class="noindent" >
     <dl class="description"><dt class="description">
     <!--l. 129--><p class="noindent" >
 <span 
-class="cmbx-10">On Return</span> </dt><dd 
+class="pplb7t-">On Return</span> </dt><dd 
 class="description">
     <!--l. 129--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 130--><p class="noindent" >
 <span 
-class="cmbx-10">a</span> </dt><dd 
+class="pplb7t-">a</span> </dt><dd 
 class="description">
     <!--l. 130--><p class="noindent" >the sparse matrix read from file.<br 
 class="newline" />Type:<span 
-class="cmbx-10">required</span>.<br 
+class="pplb7t-">required</span>.<br 
 class="newline" />Specified as: a structured data of type <a 
 href="userhtmlse3.html#spdata"><span 
 class="cmtt-10">psb</span><span 
@ -344,11 +344,11 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 133--><p class="noindent" >
 <span 
-class="cmbx-10">iret</span> </dt><dd 
+class="pplb7t-">iret</span> </dt><dd 
 class="description">
     <!--l. 133--><p class="noindent" >Error code.<br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
@ -375,23 +375,23 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 147--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 147--><p class="noindent" >Asynchronous.
     </dd><dt class="description">
     <!--l. 148--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span>  </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 148--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 149--><p class="noindent" >
 <span 
-class="cmbx-10">filename</span> </dt><dd 
+class="pplb7t-">filename</span> </dt><dd 
 class="description">
     <!--l. 149--><p class="noindent" >The name of the file to be read.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified  as:  a  character  variable  containing  a  valid  file  name,  or  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>,  in
     which case the default input unit 5 (i.e. standard input in Unix jargon) is
@ -400,28 +400,28 @@ class="cmtt-10">-</span></span></span>.
     </dd><dt class="description">
     <!--l. 154--><p class="noindent" >
 <span 
-class="cmbx-10">iunit</span> </dt><dd 
+class="pplb7t-">iunit</span> </dt><dd 
 class="description">
     <!--l. 154--><p class="noindent" >The Fortran file unit number.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>.</dd></dl>
 <!--l. 159--><p class="noindent" >
     <dl class="description"><dt class="description">
     <!--l. 160--><p class="noindent" >
 <span 
-class="cmbx-10">On Return</span> </dt><dd 
+class="pplb7t-">On Return</span> </dt><dd 
 class="description">
     <!--l. 160--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 161--><p class="noindent" >
 <span 
-class="cmbx-10">b</span> </dt><dd 
+class="pplb7t-">b</span> </dt><dd 
 class="description">
     <!--l. 161--><p class="noindent" >Rigth hand side(s).<br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />An   array   of   type   real   or   complex,   rank   1   or   2   and   having   the
@ -431,19 +431,19 @@ href="userhtmlse3.html#vdata"><span
 class="cmtt-10">psb</span><span 
 class="cmtt-10">_T</span><span 
 class="cmtt-10">_vect</span><span 
-class="cmtt-10">_type</span></a>, of
+class="cmtt-10">_type</span></a>, of type
-     type real or complex.<br 
+     real or complex.<br 
 class="newline" />Will be allocated and filled in if the input file contains a right hand side,
     otherwise will be left in the UNALLOCATED state. <br 
 class="newline" />
     </dd><dt class="description">
     <!--l. 168--><p class="noindent" >
 <span 
-class="cmbx-10">iret</span> </dt><dd 
+class="pplb7t-">iret</span> </dt><dd 
 class="description">
     <!--l. 168--><p class="noindent" >Error code.<br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
@ -472,23 +472,23 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 183--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 183--><p class="noindent" >Asynchronous.
     </dd><dt class="description">
     <!--l. 184--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span>  </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 184--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 185--><p class="noindent" >
 <span 
-class="cmbx-10">a</span> </dt><dd 
+class="pplb7t-">a</span> </dt><dd 
 class="description">
     <!--l. 185--><p class="noindent" >the sparse matrix to be written.<br 
 class="newline" />Type:<span 
-class="cmbx-10">required</span>.<br 
+class="pplb7t-">required</span>.<br 
 class="newline" />Specified as: a structured data of type <a 
 href="userhtmlse3.html#spdata"><span 
 class="cmtt-10">psb</span><span 
@ -497,21 +497,21 @@ class="cmtt-10">_type</span></a>.
     </dd><dt class="description">
     <!--l. 188--><p class="noindent" >
 <span 
-class="cmbx-10">mtitle</span> </dt><dd 
+class="pplb7t-">mtitle</span> </dt><dd 
 class="description">
     <!--l. 188--><p class="noindent" >Matrix title.<br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />A  charachter  variable  holding  a  descriptive  title  for  the  matrix  to  be
     written to file.
     </dd><dt class="description">
     <!--l. 192--><p class="noindent" >
 <span 
-class="cmbx-10">filename</span> </dt><dd 
+class="pplb7t-">filename</span> </dt><dd 
 class="description">
     <!--l. 192--><p class="noindent" >The name of the file to be written to.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified  as:  a  character  variable  containing  a  valid  file  name,  or  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>,  in
     which case the default output unit 6 (i.e. standard output in Unix jargon)
@ -520,11 +520,11 @@ class="cmtt-10">-</span></span></span>.
     </dd><dt class="description">
     <!--l. 197--><p class="noindent" >
 <span 
-class="cmbx-10">iunit</span> </dt><dd 
+class="pplb7t-">iunit</span> </dt><dd 
 class="description">
     <!--l. 197--><p class="noindent" >The Fortran file unit number.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>.</dd></dl>
@ -534,27 +534,27 @@ class="cmtt-10">-</span></span></span>.</dd></dl>
     <dl class="description"><dt class="description">
     <!--l. 203--><p class="noindent" >
 <span 
-class="cmbx-10">On Return</span> </dt><dd 
+class="pplb7t-">On Return</span> </dt><dd 
 class="description">
     <!--l. 203--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 204--><p class="noindent" >
 <span 
-class="cmbx-10">iret</span> </dt><dd 
+class="pplb7t-">iret</span> </dt><dd 
 class="description">
     <!--l. 204--><p class="noindent" >Error code.<br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
 <!--l. 209--><p class="noindent" ><span 
-class="cmbx-12">Notes</span>
+class="pplb7t-x-x-120">Notes</span>
 <!--l. 211--><p class="indent" >   If this function is called on a matrix <code class="lstinline"><span style="color:#000000">a</span></code> on a distributed communicator only the
 local part is written in output. To get a single MatrixMarket file with the whole
 matrix when appropriate, e.g. for debugging purposes, one could <span 
-class="cmti-10">gather </span>the whole
+class="pplri7t-">gather </span>the whole
 matrix on a single rank and then write it. Consider the following example for a
 <span 
-class="cmti-10">double </span>precision matrix
+class="pplri7t-">double </span>precision matrix
 <div class="center" 
 >
 <!--l. 227--><p class="noindent" >
@ -605,23 +605,23 @@ class="cmtt-10">)</span></span></pre>
     <dl class="description"><dt class="description">
     <!--l. 266--><p class="noindent" >
 <span 
-class="cmbx-10">Type:</span> </dt><dd 
+class="pplb7t-">Type:</span> </dt><dd 
 class="description">
     <!--l. 266--><p class="noindent" >Asynchronous.
     </dd><dt class="description">
     <!--l. 267--><p class="noindent" >
 <span 
-class="cmbx-10">On Entry</span>  </dt><dd 
+class="pplb7t-">On Entry</span> </dt><dd 
 class="description">
     <!--l. 267--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 268--><p class="noindent" >
 <span 
-class="cmbx-10">b</span> </dt><dd 
+class="pplb7t-">b</span> </dt><dd 
 class="description">
     <!--l. 268--><p class="noindent" >Rigth hand side(s).<br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />An  array  of  type  real  or  complex,  rank  1  or  2,  or  an  object  of  type
     <a 
 href="userhtmlse3.html#vdata"><span 
@ -634,21 +634,21 @@ class="newline" />
     </dd><dt class="description">
     <!--l. 273--><p class="noindent" >
 <span 
-class="cmbx-10">filename</span> </dt><dd 
+class="pplb7t-">filename</span> </dt><dd 
 class="description">
     <!--l. 273--><p class="noindent" >The name of the file to be written.<br 
 class="newline" />
     </dd><dt class="description">
     <!--l. 274--><p class="noindent" >
 <span 
-class="cmbx-10">vtitle</span> </dt><dd 
+class="pplb7t-">vtitle</span> </dt><dd 
 class="description">
     <!--l. 274--><p class="noindent" >Matrix title.<br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
-class="newline" />A charachter variable holding a descriptive title for the vector to be written
+class="newline" />A  charachter  variable  holding  a  descriptive  title  for  the  vector  to  be
-     to file. Type:<span 
+     written to file. Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified  as:  a  character  variable  containing  a  valid  file  name,  or  <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>,  in
     which case the default input unit 5 (i.e. standard input in Unix jargon) is
@ -657,40 +657,40 @@ class="cmtt-10">-</span></span></span>.
     </dd><dt class="description">
     <!--l. 282--><p class="noindent" >
 <span 
-class="cmbx-10">iunit</span> </dt><dd 
+class="pplb7t-">iunit</span> </dt><dd 
 class="description">
     <!--l. 282--><p class="noindent" >The Fortran file unit number.<br 
 class="newline" />Type:<span 
-class="cmbx-10">optional</span>.<br 
+class="pplb7t-">optional</span>.<br 
 class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">-</span></span></span>.</dd></dl>
 <!--l. 287--><p class="noindent" >
     <dl class="description"><dt class="description">
     <!--l. 288--><p class="noindent" >
 <span 
-class="cmbx-10">On Return</span> </dt><dd 
+class="pplb7t-">On Return</span> </dt><dd 
 class="description">
     <!--l. 288--><p class="noindent" >
     </dd><dt class="description">
     <!--l. 289--><p class="noindent" >
 <span 
-class="cmbx-10">iret</span> </dt><dd 
+class="pplb7t-">iret</span> </dt><dd 
 class="description">
     <!--l. 289--><p class="noindent" >Error code.<br 
 class="newline" />Type: <span 
-class="cmbx-10">required </span><br 
+class="pplb7t-">required </span><br 
 class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
 <!--l. 294--><p class="noindent" ><span 
-class="cmbx-12">Notes</span>
+class="pplb7t-x-x-120">Notes</span>
 <!--l. 296--><p class="indent" >   If this function is called on a vector <code class="lstinline"><span style="color:#000000">v</span></code> on a distributed communicator only the
 local part is written in output. To get a single MatrixMarket file with the whole
 vector when appropriate, e.g. for debugging purposes, one could <span 
-class="cmti-10">gather </span>the whole
+class="pplri7t-">gather </span>the whole
 vector on a single rank and then write it. Consider the following example for a <span 
-class="cmti-10">double</span>
+class="pplri7t-">double</span>
 precision vector
 <div class="center" 
 >
--- a/docs/html/zplmr7y-42.png
+++ b/docs/html/zplmr7y-42.png
--- a/docs/html/zplmr7y-48.png
+++ b/docs/html/zplmr7y-48.png
--- a/docs/html/zplmr7y-49.png
+++ b/docs/html/zplmr7y-49.png
--- a/docs/psblas-3.9.pdf
+++ b/docs/psblas-3.9.pdf
--- a/docs/src/biblio.tex
+++ b/docs/src/biblio.tex
@ -8,6 +8,11 @@ in J.~Dongarra, K.~Madsen, J.~Wasniewski, editors,
 Proceedings of PARA~04 Workshop on State of the Art
 in Scientific Computing, pp.~546--553, Lecture Notes in Computer Science,
 Springer, 2005.
 \bibitem{BERTACCINIFILIPPONE}
 D. Bertaccini\ and\ S. Filippone, 
 {\em Sparse approximate inverse preconditioners on high performance GPU platforms}, 
 Comput. Math. Appl.,  71, (2016), no.~3, 693--711.
 %
 \bibitem{2007d} A. Buttari, D. di Serafino, P. D'Ambra, S. Filippone,\newblock
 2LEV-D2P4: a package of high-performance preconditioners,\newblock
 Applicable Algebra in Engineering, Communications and Computing, 
@ -183,7 +188,14 @@ MIT Press, 1998.
 {\em Scientific Programming\/}~{\em 22,\/}~1, 1--19.
 \bibitem{OurTechRep}
 D.~Barbieri, V.~Cardellini, A.~Fanfarillo, S.~Filippone, Three storage formats
-  for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6, Universit\`a di
+  for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6,
  Universit\`a di
  Roma Tor Vergata (February 2015).
 \bibitem{Filippone:2017:SMM:3034774.3017994}
 S.~Filippone, V.~Cardellini, D.~Barbieri, and A.~Fanfarillo.
 Sparse matrix-vector multiplication on {GPGPUs}.
 {\em ACM Trans. Math. Softw.}, 43(4):30:1--30:49, 2017.
 \end{thebibliography}
--- a/docs/src/datastruct.tex
+++ b/docs/src/datastruct.tex
@ -1317,13 +1317,14 @@ like Diagonal Scaling or Block Jacobi with incomplete
 factorization ILU(0). 
 A preconditioner is held in the \hypertarget{precdata}{{\tt
-    psb\_prec\_type}} data structure reported in 
+    psb\_Tprec\_type}} data structure reported in 
-figure~\ref{fig:prectype}. The \fortinline|psb_prec_type| 
+figure~\ref{fig:prectype}. The \fortinline|psb_Tprec_type| 
 data type may contain a simple preconditioning matrix with the
-associated communication descriptor.%%  which may be different than the
+associated communication descriptor.
 %% which may be different from  the
 %% system communication descriptor in the case of parallel
 %% preconditioners like the Additive Schwarz one. Then the
-%% \fortinline|psb_prec_type| may contain more than one preconditioning matrix
+%% \fortinline|psb_Tprec_type| may contain more than one preconditioning matrix
 %% like in the case of Two-Level (in general Multi-Level) preconditioners.
 %% The user can choose the type of preconditioner to be used by means of
 %% the \fortinline|psb_precset| subroutine; once the type of preconditioning
@ -1407,8 +1408,8 @@ Given a heap object, the following methods are defined on it:
 \item[dump] Print on file;
 \item[free] Release memory.
 \end{description}
-These objects are used in AMG4PSBLAS to implement the factorization
+These objects are used to implement the factorization
-algorithms. 
+and approximate inversion algorithms. 
 %%% Local Variables: 
 %%% mode: latex
--- a/docs/src/error.tex
+++ b/docs/src/error.tex
@ -1,6 +1,6 @@
-\section{Error handling}
+\section{Error handling\label{sec:errors}}
 The PSBLAS library error handling policy has been completely rewritten
 in version 2.0. The idea behind the design of this new error handling
--- a/docs/src/intro.tex
+++ b/docs/src/intro.tex
@ -344,24 +344,25 @@ A simple application structure will walk through the index space
 allocation, matrix/vector creation and linear system solution as
 follows:
 \begin{enumerate}
-\item Initialize parallel environment with \verb|psb_init|
+\item Initialize parallel environment with \verb|psb_init|;
-\item Initialize index space with \verb|psb_cdall|
+\item Initialize index space with \verb|psb_cdall|;
 \item Allocate sparse matrix and dense vectors with \verb|psb_spall|
-  and \verb|psb_geall|
+  and \verb|psb_geall|;
 \item Loop over all local rows, generate matrix and vector entries,
  and insert them with \verb|psb_spins| and \verb|psb_geins|
 \item Assemble the various entities: 
 \begin{enumerate}
-\item \verb|psb_cdasb|
+\item \verb|psb_cdasb|,
-\item \verb|psb_spasb|
+\item \verb|psb_spasb|,
-\item \verb|psb_geasb|
+\item \verb|psb_geasb|;
 \end{enumerate}
 \item Choose the preconditioner to be used with \verb|prec%init| and
  \verb|prec%set|,  and
  build it with \verb|prec%build|\footnote{The subroutine style {\tt
-      psb\_precinit} and {\tt psb\_precbl} are still supported for
+      psb\_precinit} and {\tt psb\_precbld} are still supported for
-    backward compatibility}. 
+    backward compatibility};
-\item Call the iterative driver \verb|psb_krylov| with the method of
+\item Call one of the iterative drivers with the method of
-  choice, e.g. \verb|bicgstab|.
+  choice, e.g. \verb|psb_krylov| with \verb|bicgstab|.
 \end{enumerate}
 This is the structure of the sample programs in the directory 
 \verb|test/pargen/|. 
@ -372,21 +373,23 @@ multiple time steps, the following structure may be more appropriate:
 \item Initialize parallel environment with \verb|psb_init|
 \item Initialize index space with \verb|psb_cdall|
 \item Loop over the topology of the discretization mesh and build the
-  descriptor with \verb|psb_cdins|
+  descriptor with \verb|psb_cdins|;
-\item Assemble the descriptor with \verb|psb_cdasb|
+\item Assemble the descriptor with \verb|psb_cdasb|;
-\item Allocate the sparse matrices and dense vectors with
+\item Allocate the sparse matrices and dense vectors with;
-  \verb|psb_spall| and \verb|psb_geall|
+  \verb|psb_spall| and \verb|psb_geall|;
 \item Loop over the time steps: 
 \begin{enumerate}
 \item If after first time step, 
  reinitialize the sparse matrix with \verb|psb_sprn|; also zero out
  the dense vectors;
 \item Loop over the mesh, generate the coefficients and insert/update
-  them with \verb|psb_spins| and \verb|psb_geins|
+  them with \verb|psb_spins| and \verb|psb_geins|;
-\item Assemble with \verb|psb_spasb| and \verb|psb_geasb|
+\item Assemble with \verb|psb_spasb| and \verb|psb_geasb|;
-\item Choose and build preconditioner with \verb|prec%init| and
+\item \item Choose the preconditioner to be used with \verb|prec%init| and
-  \verb|prec%build|
+  \verb|prec%set|,  and
-\item Call the iterative method of choice, e.g. \verb|psb_bicgstab|
+  build it with \verb|prec%build|;
 \item Call one of the iterative drivers with the method of
  choice, e.g. \verb|psb_krylov| with \verb|bicgstab|.
 \end{enumerate}
 \end{enumerate}
 The insertion routines will be called as many times as needed; 
--- a/docs/src/precs.tex
+++ b/docs/src/precs.tex
@ -76,14 +76,125 @@ $ptype$ string as follows\footnote{The string is case-insensitive}:
 \item[DIAG] Diagonal scaling; each entry of the input vector is
  multiplied by the reciprocal of the sum of the absolute values of
  the coefficients in the corresponding row of matrix  $A$;
-\item[BJAC] Precondition by a  factorization of the
+\item[BJAC] Precondition by a factorization or an approximante inverse
-  block-diagonal of matrix $A$, where block boundaries are determined
+  of the   block-diagonal of matrix $A$, where block boundaries are
-  by the data allocation boundaries for each process; requires no
+  determined   by the data allocation boundaries for each process;
-  communication. Only the incomplete factorization $ILU(0)$ is
+  requires no   communication.  See also Table-\ref{tab:p_subsolve_1}. 
  currently implemented.  
 \end{description}
 \clearpage
 \subsection{Set\label{sec:precset} --- set preconditioner parameters}
 \begin{center}
 \fortinline|call p%set(what,val,info)|
 \end{center}
 \noindent
 This method sets the parameters defining the subdomain solver when the
 preconditioner type is \verb|BJAC|. More precisely, the parameter
 identified by \fortinline|what| is assigned the value 
 contained in \fortinline|val|.
 {\vskip1.5\baselineskip\noindent\large\bfseries Arguments} \smallskip
 \begin{tabular}{p{1.2cm}p{12cm}}
 \fortinline|what|   & \fortinline|character(len=*)|. \\
              & The parameter to be set. It can be specified through its name;
                the string is case-insensitive. See
                Table~\ref{tab:p_subsolve_1}.\\
 \fortinline|val |   & \fortinline|integer| \emph{or} \fortinline|character(len=*)| \emph{or}
                \fortinline|real(psb_spk_)| \emph{or} \fortinline|real(psb_dpk_)|,
                \fortinline|intent(in)|.\\
              & The value of the parameter to be set. The list of allowed
                values and the corresponding data types is given in
                Table~\ref{tab:p_subsolve_1}.
                When the value is of type \fortinline|character(len=*)|,
                it is also treated as case insensitive.\\
 \fortinline|info|   & \fortinline|integer, intent(out)|.\\
              & Error code. If no error, 0 is returned. See Section~\ref{sec:errors}
                for details.
 \end{tabular}
 \noindent
 A number of subdomain solvers can be chosen with this method;
 a list of the parameters that can be set, along with their allowed and
 default values, is given in Table-\ref{tab:p_subsolve_1}.\\
 \bsideways
 \begin{center}
 \small
 % \begin{tabular}{|p{3.6cm}|l|p{1.9cm}|p{3.6cm}|p{6.5cm}|}
 \begin{tabular}{|p{3.2cm}|l|p{2.6cm}|p{2.6cm}|p{6.7cm}|}
 \hline
 \fortinline|what|              & \textsc{data type}        &  \fortinline|val|      &  \textsc{default}  &
 \textsc{comments} \\ \hline
 \fortinline|'SUB_SOLVE'|  & \fortinline|character(len=*)|
                         & \fortinline|'ILU'| \par
                           \fortinline|'ILUT'| \par 
                            \par \fortinline|'INVT'| \par \fortinline|'INVK'| \par \fortinline|'AINV'|
                         & 
                         & The local solver to be used with the smoother or one-level
                            preconditioner  ILU($p$),  ILU($p,t$), 
                           Approximate Inverses  INVK($p,q$),
                           INVT($p_1,p2,t_1,t_2$) and
                           AINV($t$); note  that approximate inverses
                           are specifically suited for GPUs since they
                           do not employ triangular system solve
                           kernels,
                           see~\cite{BERTACCINIFILIPPONE}.\\ \hline
 \fortinline|'SUB_FILLIN'|  & \fortinline|integer|
                         & Any integer \par number~$\ge 0$
                         & 0
                         & Fill-in level $p$ of the incomplete LU factorizations. \\ \hline
 \fortinline|'SUB_ILUTHRS'|  & \fortinline|real(kind_parameter)|
                         & Any real number~$\ge 0$
                         & 0
                         & Drop tolerance $t$ in the ILU($p,t$) factorization. \\ \hline
 \fortinline|'ILU_ALG'|  & \fortinline|character(len=*)|
                         & \fortinline|'MILU'|
                         & \fortinline|'NONE'|
                         & ILU algorithmic variant \\ \hline
 \fortinline|'ILUT_SCALE'|  & \fortinline|character(len=*)|
                          & \fortinline|'MAXVAL'| \par
                            \fortinline|'DIAG'| \par
                            \fortinline|'ARSWUM'| \par
                            \fortinline|'ARCSUM'| \par
                            \fortinline|'ACLSUM'| \par
                            \fortinline|'NONE'| 
                         & \fortinline|'NONE'|
                         & ILU scaling strategy \\ \hline
 \fortinline|'INV_FILLIN'|  & \fortinline|integer|
                         & Any integer \par number~$\ge 0$
                         & 0
                         & Second fill-in level $q$ of the INVK($p,q$)
                           approximate inverse. \\ \hline
 \fortinline|'INV_ILUTHRS'|  & \fortinline|real(kind_parameter)|
                         & Any real number~$\ge 0$
                         & 0
                         & Second drop tolerance $s$ in the
                           INVT($t,s$) approximate inverse. \\ \hline
 \fortinline|'AINV_ALG'|  & \fortinline|character(len=*)|
                         & \fortinline|'LLK'| \par
                            \fortinline|'SYM-LLK'| \par
                            \fortinline|'STAB-LLK'| \par
                            \fortinline|'MLK,LMX'| 
                         & \fortinline|'LLK'|
                         & AINV algorithmic strategy. \\ \hline
 \end{tabular}
 \end{center}
 \caption{Parameters defining the solver of the BJAC
  preconditioner.\label{tab:p_subsolve_1}} 
 \esideways
 \clearpage\subsection{build --- Builds a preconditioner}
 \begin{verbatim}
--- a/docs/src/userguide.tex
+++ b/docs/src/userguide.tex
@ -17,6 +17,8 @@
 \newtheorem{theorem}{Theorem}
 \newtheorem{corollary}{Corollary}
 \usepackage{listings}
 \usepackage{rotating}
 \usepackage{microtype}
 \usepackage{algorithm2e}
 \usepackage{minted}
 \usemintedstyle{friendly}
@ -91,12 +93,14 @@
 \newcommand{\example}{\stepcounter{example}%
 \section*{\examplename~\theexample}}
-\newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_prec\_type}}}
+\newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_Tprec\_type}}}
 \newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}}
 \newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_Tspmat\_type}}}
 \newcommand{\vdata}{\hyperlink{vdata}{{\tt psb\_T\_vect\_type}}}
 \newcommand{\spbasedata}{\hypertarget{spbasedata}{{\tt psb\_T\_base\_sparse\_mat}}}
 \newcommand{\vbasedata}{\hypertarget{vbasedata}{{\tt psb\_T\_base\_vect\_type}}}
 \def\bsideways{\begin{sidewaystable}}
 \def\esideways{\end{sidewaystable}}
 \begin{document}
 {
--- a/docs/src/userhtml.tex
+++ b/docs/src/userhtml.tex
@ -17,8 +17,14 @@
 \newtheorem{theorem}{Theorem}
 \newtheorem{corollary}{Corollary}
 \usepackage{listings}
-\usepackage{algorithm2e}
+\usepackage{rotating}
 \usepackage{microtype}
 \usepackage{algorithm2e}
 \definecolor{bg}{rgb}{0.95,0.95,0.95}
 \usepackage{breakurl}
 \usepackage{mathpazo}
 \usepackage[english]{babel}
 \ifpdf
 \newmintinline[fortinline]{fortran}{}
 \else%
@ -78,12 +84,14 @@
 \newcommand{\example}{\stepcounter{example}%
 \section*{\examplename~\theexample}}
-\newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_prec\_type}}}
+\newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_Tprec\_type}}}
 \newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}}
 \newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_Tspmat\_type}}}
 \newcommand{\vdata}{\hyperlink{vdata}{{\tt psb\_T\_vect\_type}}}
 \newcommand{\spbasedata}{\hypertarget{spbasedata}{{\tt psb\_T\_base\_sparse\_mat}}}
 \newcommand{\vbasedata}{\hypertarget{vbasedata}{{\tt psb\_T\_base\_vect\_type}}}
 \def\bsideways{\begin{table}}
 \def\esideways{\end{table}}
 \begin{document}
 \lstset{language=Fortran}
--- a/test/hello/Makefile
+++ b/test/hello/Makefile
@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas
 #
 # Libraries used
 LIBDIR=$(BASEDIR)/lib
-PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base 
+PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base 
 LDLIBS=$(PSBLDLIBS)
 #
 # Compilers and such
--- a/test/kernel/Makefile
+++ b/test/kernel/Makefile
@ -6,7 +6,7 @@ INCDIR=$(INSTALLDIR)/include/
 MODDIR=$(INSTALLDIR)/modules/
 include $(INCDIR)/Make.inc.psblas
 LIBDIR=$(INSTALLDIR)/lib/
-PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
+PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
 LDLIBS=$(PSBLDLIBS)
 FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
--- a/test/omp/Makefile
+++ b/test/omp/Makefile
@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas
 #
 # Libraries used
 LIBDIR=$(INSTALLDIR)/lib
-PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base 
+PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base 
 LDLIBS=$(PSBLDLIBS)
 #
 # Compilers and such
--- a/test/omp/psb_tomp.F90
+++ b/test/omp/psb_tomp.F90
@ -658,7 +658,7 @@ end module psb_d_pde3d_mod
 program psb_d_pde3d
  use psb_base_mod
  use psb_prec_mod
-  use psb_krylov_mod
+  use psb_linsolve_mod
  use psb_util_mod
  use psb_d_pde3d_mod
 #if defined(OPENMP)
--- a/test/pargen/runs/ppde.inp
+++ b/test/pargen/runs/ppde.inp
@ -8,11 +8,11 @@ CSR                    Storage format for matrix A:  CSR COO
 0200                   MAXIT
 10                     ITRACE
 002                    IRST    restart for RGMRES  and BiCGSTABL
-INVK                   Block Solver ILU,ILUT,INVK,AINVT,AORTH
+INVK                   Block Solver ILU,ILUT,INVK,INVT,AINV  
 NONE                   If ILU : MILU or NONE othewise ignored
 NONE                   Scaling if ILUT: NONE, MAXVAL otherwise ignored
 0                      Level of fill for forward factorization
-1                      Level of fill for inverse factorization (only INVK)
+1                      Level of fill for inverse factorization (only INVK,INVT)
 1E-1                   Threshold for forward factorization
-1E-1                   Threshold for inverse factorization (Only INVK, AINVT)
+1E-1                   Threshold for inverse factorization (Only INVK, INVT)
-LLK                    What orthogonalization algorithm? (Only AINVT)
+LLK                    What orthogonalization algorithm? (Only AINV)
--- a/test/serial/Makefile
+++ b/test/serial/Makefile
@ -6,7 +6,7 @@ INCDIR=$(INSTALLDIR)/include/
 MODDIR=$(INSTALLDIR)/modules/
 include $(INCDIR)/Make.inc.psblas
 LIBDIR=$(INSTALLDIR)/lib/
-PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
+PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
 LDLIBS=$(PSBLDLIBS)
 FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
--- a/test/torture/Makefile
+++ b/test/torture/Makefile
@ -3,7 +3,7 @@ INCDIR=$(INSTALLDIR)/include/
 MODDIR=$(INSTALLDIR)/modules/
 include $(INCDIR)/Make.inc.psblas
 LIBDIR=$(INSTALLDIR)/lib/
-PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
+PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
 LDLIBS=$(PSBLDLIBS)
 CCOPT= -g
 FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
--- a/test/util/Makefile
+++ b/test/util/Makefile
@ -6,7 +6,7 @@ include $(INCDIR)/Make.inc.psblas
 # Libraries used
 #
 LIBDIR=$(INSTALLDIR)/lib/
-PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
+PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
 LDLIBS=$(PSBLDLIBS)
 FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).