Merge branch 'repackage' of github.com:sfilippone/psblas3 into repackage

openacc
sfilippone 3 months ago
commit f825bf37a1

@ -1,7 +1,7 @@
all: guide all: guide
guide: guide:
cd src && $(MAKE) cd src && $(MAKE) clean all
doxy: doxy:
doxygen doxypsb doxygen doxypsb

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

@ -10,16 +10,16 @@
<link rel="stylesheet" type="text/css" href="userhtml.css"> <link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body </head><body
> >
<!--l. 91--><p class="noindent" ><span <!--l. 99--><p class="noindent" ><span
class="cmbx-12x-x-144">PSBLAS</span><br class="pplb7t-x-x-172">PSBLAS</span><br
class="newline" /> <span class="newline" /> <span
class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br
class="newline" /> <span class="newline" /> <span
class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br
class="newline" /> <span class="newline" /> <span
class="cmbx-10">Salvatore Filippone</span><br class="pplb7t-">Salvatore Filippone</span><br
class="newline" /><span class="newline" /><span
class="cmbx-10">Alfredo Buttari </span><br class="pplb7t-">Alfredo Buttari </span><br
class="newline" />Software version: 3.9.0<br class="newline" />Software version: 3.9.0<br
class="newline" />Aug 1st, 2024 class="newline" />Aug 1st, 2024
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a <br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span> href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a <br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span> href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a <br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14500012" id="QQ2-19-175">Extensions</a></span> href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a <br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15400013" id="QQ2-20-190">CUDA Environment Routines</a></span> href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a <br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-169000" id="QQ2-21-219">References</a></span> href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
</div> </div>

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

@ -1,33 +1,62 @@
/* start css.sty */ /* start css.sty */
.cmr-7{font-size:70%;} .pplb7t-x-x-172{font-size:172%;font-weight: bold;}
.cmmi-5{font-size:50%;font-style: italic;} .pplb7t-x-x-172{font-weight: bold;}
.cmmi-7{font-size:70%;font-style: italic;} .pplb7t-x-x-172{font-weight: bold;}
.cmmi-10{font-style: italic;} .pplri7t-{font-style: italic;}
.cmsy-7{font-size:70%;} .pplri7t-{font-style: italic;}
.cmbx-12x-x-144{font-size:172%; font-weight: bold;} .pplri7t-x-x-120{font-size:120%;font-style: italic;}
.cmbx-12x-x-144{ font-weight: bold;} .pplri7t-x-x-120{font-style: italic;}
.cmbx-12x-x-144{ font-weight: bold;} .pplb7t-{font-weight: bold;}
.cmti-10{ font-style: italic;} .pplb7t-{font-weight: bold;}
.cmti-12{font-size:120%; font-style: italic;} .pplb7t-{font-weight: bold;}
.cmbx-10{ font-weight: bold;}
.cmbx-10{ font-weight: bold;}
.cmbx-10{ font-weight: bold;}
.cmtt-10{font-family: monospace,monospace;} .cmtt-10{font-family: monospace,monospace;}
.cmtt-10{font-family: monospace,monospace;} .cmtt-10{font-family: monospace,monospace;}
.cmtt-10{font-family: monospace,monospace;} .cmtt-10{font-family: monospace,monospace;}
.cmr-9{font-size:90%;} .pplr7t-x-x-76{font-size:76%;}
.cmr-8{font-size:80%;} .zplmr7m-{font-style: italic;}
.cmbx-12{font-size:120%; font-weight: bold;} .zplmr7m-{font-style: italic;}
.cmbx-12{ font-weight: bold;} .zplmr7m-{font-style: italic;}
.cmbx-12{ font-weight: bold;} .zplmr7m-{font-style: italic;}
.zplmr7m-{font-style: italic;}
.zplmr7m-x-x-76{font-size:76%;font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-60{font-size:60%;font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7y-x-x-76{font-size:76%;}
.zplmr7t-x-x-76{font-size:76%;}
.pplr7t-x-x-90{font-size:90%;}
.pplr7t-x-x-80{font-size:80%;}
.pplb7t-x-x-120{font-size:120%;font-weight: bold;}
.pplb7t-x-x-120{font-weight: bold;}
.pplb7t-x-x-120{font-weight: bold;}
.cmtt-8{font-size:80%;font-family: monospace,monospace;} .cmtt-8{font-size:80%;font-family: monospace,monospace;}
.cmtt-8{font-family: monospace,monospace;} .cmtt-8{font-family: monospace,monospace;}
.cmtt-8{font-family: monospace,monospace;} .cmtt-8{font-family: monospace,monospace;}
.cmtt-9{font-size:90%;font-family: monospace,monospace;} .cmtt-9{font-size:90%;font-family: monospace,monospace;}
.cmtt-9{font-family: monospace,monospace;} .cmtt-9{font-family: monospace,monospace;}
.cmtt-9{font-family: monospace,monospace;} .cmtt-9{font-family: monospace,monospace;}
.cmmi-8{font-size:80%;font-style: italic;} .pplr7t-x-x-70{font-size:70%;}
.zplmr7m-x-x-90{font-size:90%;font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7y-x-x-90{font-size:90%;}
.zplmr7m-x-x-80{font-size:80%;font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7t-x-x-80{font-size:80%;}
.pplrc7t-x-x-90{font-size:90%;}
.small-caps{font-variant: small-caps; }
p{margin-top:0;margin-bottom:0} p{margin-top:0;margin-bottom:0}
p.indent{text-indent:0;} p.indent{text-indent:0;}
p + p{margin-top:1em;} p + p{margin-top:1em;}
@ -158,5 +187,11 @@ pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top
pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; } pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; }
pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; } pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; }
.lstinputlisting .label{margin-right:0.5em;} .lstinputlisting .label{margin-right:0.5em;}
#TBL-24-1{border-left: 1px solid black;}
#TBL-24-1{border-right:1px solid black;}
#TBL-24-2{border-right:1px solid black;}
#TBL-24-3{border-right:1px solid black;}
#TBL-24-4{border-right:1px solid black;}
#TBL-24-5{border-right:1px solid black;}
/* end css.sty */ /* end css.sty */

@ -10,16 +10,16 @@
<link rel="stylesheet" type="text/css" href="userhtml.css"> <link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body </head><body
> >
<!--l. 91--><p class="noindent" ><span <!--l. 99--><p class="noindent" ><span
class="cmbx-12x-x-144">PSBLAS</span><br class="pplb7t-x-x-172">PSBLAS</span><br
class="newline" /> <span class="newline" /> <span
class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br
class="newline" /> <span class="newline" /> <span
class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br
class="newline" /> <span class="newline" /> <span
class="cmbx-10">Salvatore Filippone</span><br class="pplb7t-">Salvatore Filippone</span><br
class="newline" /><span class="newline" /><span
class="cmbx-10">Alfredo Buttari </span><br class="pplb7t-">Alfredo Buttari </span><br
class="newline" />Software version: 3.9.0<br class="newline" />Software version: 3.9.0<br
class="newline" />Aug 1st, 2024 class="newline" />Aug 1st, 2024
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a <br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span> href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a <br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span> href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a <br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14500012" id="QQ2-19-175">Extensions</a></span> href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a <br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15400013" id="QQ2-20-190">CUDA Environment Routines</a></span> href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a <br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-169000" id="QQ2-21-219">References</a></span> href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
</div> </div>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

@ -10,10 +10,10 @@
<link rel="stylesheet" type="text/css" href="userhtml.css"> <link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body </head><body
> >
<div class="footnote-text"> <div class="footnote-text">
<!--l. 72--><p class="indent" > <span class="footnote-mark"><a <!--l. 72--><p class="indent" > <span class="footnote-mark"><a
id="fn4x0"><a id="fn4x0"><a
id="x16-136002x10.1"></a> <sup class="textsuperscript">4</sup></a></span><span id="x16-136002x10.1"></a> <sup class="textsuperscript">4</sup></a></span><span
class="cmr-8">The string is case-insensitive</span></div> class="pplr7t-x-x-80">The string is case-insensitive</span></div>
</body></html> </body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 968 B

After

Width:  |  Height:  |  Size: 1021 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

@ -13,8 +13,11 @@
<div class="footnote-text"> <div class="footnote-text">
<!--l. 53--><p class="noindent" ><span class="footnote-mark"><a <!--l. 53--><p class="noindent" ><span class="footnote-mark"><a
id="fn5x0"><a id="fn5x0"><a
id="x18-143004x11.1"></a> <sup class="textsuperscript">5</sup></a></span><span id="x18-144004x11.1"></a> <sup class="textsuperscript">5</sup></a></span><span
class="cmr-8">Note: the implementation is for </span><span class="pplr7t-x-x-80">Note: the implementation is for </span><span
class="cmmi-8">FCG</span><span class="zplmr7m-x-x-80">FCG</span><span
class="cmr-8">(1).</span></div> class="zplmr7t-x-x-80">(</span><span
class="pplr7t-x-x-80">1</span><span
class="zplmr7t-x-x-80">)</span><span
class="pplr7t-x-x-80">.</span></div>
</body></html> </body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.5 KiB

After

Width:  |  Height:  |  Size: 8.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 970 B

After

Width:  |  Height:  |  Size: 978 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 420 B

After

Width:  |  Height:  |  Size: 399 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 710 B

After

Width:  |  Height:  |  Size: 700 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.0 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1016 B

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

@ -10,10 +10,10 @@
<link rel="stylesheet" type="text/css" href="userhtml.css"> <link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body </head><body
> >
<div class="footnote-text"> <div class="footnote-text">
<!--l. 151--><p class="indent" > <span class="footnote-mark"><a <!--l. 151--><p class="indent" > <span class="footnote-mark"><a
id="fn1x0"><a id="fn1x0"><a
id="x5-3003x2"></a> <sup class="textsuperscript">1</sup></a></span><span id="x5-3003x2"></a> <sup class="textsuperscript">1</sup></a></span><span
class="cmr-8">In our prototype implementation we provide sample scatter/gather routines.</span></div> class="pplr7t-x-x-80">In our prototype implementation we provide sample scatter/gather routines.</span></div>
</body></html> </body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

@ -13,12 +13,12 @@
<div class="footnote-text"> <div class="footnote-text">
<!--l. 195--><p class="noindent" ><span class="footnote-mark"><a <!--l. 195--><p class="noindent" ><span class="footnote-mark"><a
id="fn2x0"><a id="fn2x0"><a
id="x6-4002x2.1"></a> <sup class="textsuperscript">2</sup></a></span><span id="x6-4002x2.1"></a> <sup class="textsuperscript">2</sup></a></span><span
class="cmr-8">This is the normal situation when the pattern of the sparse matrix is symmetric, which is</span> class="pplr7t-x-x-80">This is the normal situation when the pattern of the sparse matrix is symmetric, which is equivalent to</span>
<span <span
class="cmr-8">equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is</span> class="pplr7t-x-x-80">say that the interaction between two variables is reciprocal. If the matrix pattern is non-symmetric we may</span>
<span <span
class="cmr-8">non-symmetric we may have one-way interactions, and these could cause a situation in which a</span> class="pplr7t-x-x-80">have one-way interactions, and these could cause a situation in which a boundary point is not a halo point</span>
<span <span
class="cmr-8">boundary point is not a halo point for its neighbour.</span></div> class="pplr7t-x-x-80">for its neighbour.</span></div>
</body></html> </body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.3 KiB

@ -11,16 +11,16 @@
</head><body </head><body
> >
<div class="footnote-text"> <div class="footnote-text">
<!--l. 362--><p class="noindent" ><span class="footnote-mark"><a <!--l. 363--><p class="noindent" ><span class="footnote-mark"><a
id="fn3x0"><a id="fn3x0"><a
id="x7-6020x3"></a> <sup class="textsuperscript">3</sup></a></span><span id="x7-6020x3"></a> <sup class="textsuperscript">3</sup></a></span><span
class="cmr-8">The subroutine style </span><span class="pplr7t-x-x-80">The subroutine style </span><span
class="cmtt-8">psb</span><span class="cmtt-8">psb</span><span
class="cmtt-8">_precinit </span><span class="cmtt-8">_precinit </span><span
class="cmr-8">and </span><span class="pplr7t-x-x-80">and </span><span
class="cmtt-8">psb</span><span class="cmtt-8">psb</span><span
class="cmtt-8">_precbl </span><span class="cmtt-8">_precbld </span><span
class="cmr-8">are still supported for backward</span> class="pplr7t-x-x-80">are still supported for backward</span>
<span <span
class="cmr-8">compatibility</span></div> class="pplr7t-x-x-80">compatibility</span></div>
</body></html> </body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

@ -10,7 +10,7 @@
<link rel="stylesheet" type="text/css" href="userhtml.css"> <link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body </head><body
> >
<!--l. 106--><div class="crosslinks"><p class="noindent">[<a <!--l. 114--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse1.html" >next</a>] [<a href="userhtmlse1.html" >next</a>] [<a
href="#tailuserhtmlli1.html">tail</a>] [<a href="#tailuserhtmlli1.html">tail</a>] [<a
href="userhtml.html#userhtmlli1.html" >up</a>] </p></div> href="userhtml.html#userhtmlli1.html" >up</a>] </p></div>
@ -297,61 +297,63 @@ href="userhtmlse10.html#x15-13500010">Preconditioner routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.1 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.1 <a
href="userhtmlse10.html#x15-13600010.1" id="QQ2-15-166">init &#8212; Initialize a preconditioner</a></span> href="userhtmlse10.html#x15-13600010.1" id="QQ2-15-166">init &#8212; Initialize a preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.2 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.2 <a
href="userhtmlse10.html#x15-13700010.2" id="QQ2-15-167">build &#8212; Builds a preconditioner</a></span> href="userhtmlse10.html#x15-13700010.2" id="QQ2-15-167">Set &#8212; set preconditioner parameters</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.3 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.3 <a
href="userhtmlse10.html#x15-13800010.3" id="QQ2-15-168">apply &#8212; Preconditioner application routine</a></span> href="userhtmlse10.html#x15-13800010.3" id="QQ2-15-169">build &#8212; Builds a preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.4 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.4 <a
href="userhtmlse10.html#x15-13900010.4" id="QQ2-15-169">descr &#8212; Prints a description of current preconditioner</a></span> href="userhtmlse10.html#x15-13900010.4" id="QQ2-15-170">apply &#8212; Preconditioner application routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.5 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.5 <a
href="userhtmlse10.html#x15-14000010.5" id="QQ2-15-170">clone &#8212; clone current preconditioner</a></span> href="userhtmlse10.html#x15-14000010.5" id="QQ2-15-171">descr &#8212; Prints a description of current preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.6 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.6 <a
href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-171">free &#8212; Free a preconditioner</a></span> href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-172">clone &#8212; clone current preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.7 <a
href="userhtmlse10.html#x15-14200010.7" id="QQ2-15-173">free &#8212; Free a preconditioner</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a <br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011">Iterative Methods</a></span> href="userhtmlse11.html#x17-14300011">Iterative Methods</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.1 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.1 <a
href="userhtmlse11.html#x17-14300011.1" id="QQ2-17-173">psb_krylov &#8212; Krylov Methods Driver Routine</a></span> href="userhtmlse11.html#x17-14400011.1" id="QQ2-17-175">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.2 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.2 <a
href="userhtmlse11.html#x17-14400011.2" id="QQ2-17-174">psb_richardson &#8212; Richardson Iteration Driver Routine</a></span> href="userhtmlse11.html#x17-14500011.2" id="QQ2-17-176">psb_richardson &#8212; Richardson Iteration Driver Routine</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a <br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14500012">Extensions</a></span> href="userhtmlse12.html#x19-14600012">Extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.1 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.1 <a
href="userhtmlse12.html#x19-14600012.1" id="QQ2-19-176">Using the extensions</a></span> href="userhtmlse12.html#x19-14700012.1" id="QQ2-19-178">Using the extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.2 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.2 <a
href="userhtmlse12.html#x19-14700012.2" id="QQ2-19-177">Extensions&#8217; Data Structures</a></span> href="userhtmlse12.html#x19-14800012.2" id="QQ2-19-179">Extensions&#8217; Data Structures</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.3 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.3 <a
href="userhtmlse12.html#x19-14800012.3" id="QQ2-19-180">CPU-class extensions</a></span> href="userhtmlse12.html#x19-14900012.3" id="QQ2-19-182">CPU-class extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.4 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.4 <a
href="userhtmlse12.html#x19-15300012.4" id="QQ2-19-189">CUDA-class extensions</a></span> href="userhtmlse12.html#x19-15400012.4" id="QQ2-19-191">CUDA-class extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a <br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15400013">CUDA Environment Routines</a></span> href="userhtmlse13.html#x20-15500013">CUDA Environment Routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-192">psb_cuda_init</a></span> href="userhtmlse13.html#Q1-20-194">psb_cuda_init</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-194">psb_cuda_exit</a></span> href="userhtmlse13.html#Q1-20-196">psb_cuda_exit</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-196">psb_cuda_DeviceSync</a></span> href="userhtmlse13.html#Q1-20-198">psb_cuda_DeviceSync</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-198">psb_cuda_getDeviceCount</a></span> href="userhtmlse13.html#Q1-20-200">psb_cuda_getDeviceCount</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-200">psb_cuda_getDevice</a></span> href="userhtmlse13.html#Q1-20-202">psb_cuda_getDevice</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-202">psb_cuda_setDevice</a></span> href="userhtmlse13.html#Q1-20-204">psb_cuda_setDevice</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-204">psb_cuda_DeviceHasUVA</a></span> href="userhtmlse13.html#Q1-20-206">psb_cuda_DeviceHasUVA</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-206">psb_cuda_WarpSize</a></span> href="userhtmlse13.html#Q1-20-208">psb_cuda_WarpSize</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-208">psb_cuda_MultiProcessors</a></span> href="userhtmlse13.html#Q1-20-210">psb_cuda_MultiProcessors</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-210">psb_cuda_MaxThreadsPerMP</a></span> href="userhtmlse13.html#Q1-20-212">psb_cuda_MaxThreadsPerMP</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-212">psb_cuda_MaxRegisterPerBlock</a></span> href="userhtmlse13.html#Q1-20-214">psb_cuda_MaxRegisterPerBlock</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-214">psb_cuda_MemoryClockRate</a></span> href="userhtmlse13.html#Q1-20-216">psb_cuda_MemoryClockRate</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-216">psb_cuda_MemoryBusWidth</a></span> href="userhtmlse13.html#Q1-20-218">psb_cuda_MemoryBusWidth</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-218">psb_cuda_MemoryPeakBandwidth</a></span> href="userhtmlse13.html#Q1-20-220">psb_cuda_MemoryPeakBandwidth</a></span>
</div> </div>

@ -16,194 +16,210 @@ href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
href="#tailuserhtmlli2.html">tail</a>] [<a href="#tailuserhtmlli2.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div> href="userhtml.html# " >up</a>] </p></div>
<h3 class="likesectionHead"><a <h3 class="likesectionHead"><a
id="x21-169000"></a>References</h3> id="x21-170000"></a>References</h3>
<!--l. 2--><p class="noindent" > <!--l. 2--><p class="noindent" >
<div class="thebibliography"> <div class="thebibliography">
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[1]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [1]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XPARA04FOREST"></a>G.&#x00A0;Bella, S.&#x00A0;Filippone, A.&#x00A0;De Maio and M.&#x00A0;Testa, <span id="XPARA04FOREST"></a>G.&#x00A0;Bella, S.&#x00A0;Filippone, A.&#x00A0;De Maio and M.&#x00A0;Testa, <span
class="cmti-10">A Simulation Model</span> class="pplri7t-">A Simulation Model</span>
<span <span
class="cmti-10">for Forest Fires</span>, in J.&#x00A0;Dongarra, K.&#x00A0;Madsen, J.&#x00A0;Wasniewski, editors, class="pplri7t-">for Forest Fires</span>, in J.&#x00A0;Dongarra, K.&#x00A0;Madsen, J.&#x00A0;Wasniewski, editors,
Proceedings of PARA&#x00A0;04 Workshop on State of the Art in Scientific Proceedings of PARA&#x00A0;04 Workshop on State of the Art in Scientific
Computing, pp.&#x00A0;546&#8211;553, Lecture Notes in Computer Science, Springer, Computing, pp.&#x00A0;546&#8211;553, Lecture Notes in Computer Science, Springer,
2005. 2005.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[2]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [2]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="X2007d"></a>A. Buttari, D. di Serafino, P. D&#8217;Ambra, S. Filippone, 2LEV-D2P4: id="XBERTACCINIFILIPPONE"></a>D. Bertaccini&#x00A0;and&#x00A0;S. Filippone, <span
a package of high-performance preconditioners, Applicable Algebra in class="pplri7t-">Sparse approximate</span>
Engineering, Communications and Computing, Volume 18, Number 3, May, <span
2007, pp. 223-239 class="pplri7t-">inverse preconditioners on high performance GPU platforms</span>, Comput. Math.
Appl., 71, (2016), no.&#x00A0;3, 693&#8211;711.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[3]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [3]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="X2007c"></a>P. D&#8217;Ambra, S. Filippone, D. Di Serafino On the Development id="X2007d"></a>A. Buttari, D. di Serafino, P. D&#8217;Ambra, S. Filippone, 2LEV-D2P4:
of PSBLAS-based Parallel Two-level Schwarz Preconditioners Applied a package of high-performance preconditioners, Applicable Algebra in
Numerical Mathematics, Elsevier Science, Volume 57, Issues 11-12, Engineering, Communications and Computing, Volume 18, Number 3,
November-December 2007, Pages 1181-1196. May, 2007, pp. 223-239
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[4]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [4]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="X2007c"></a>P. D&#8217;Ambra, S. Filippone, D. Di Serafino On the Development of
PSBLAS-based Parallel Two-level Schwarz Preconditioners Applied
Numerical Mathematics, Elsevier Science, Volume 57, Issues 11-12,
November-December 2007, Pages 1181-1196.
</p>
<p class="bibitem" ><span class="biblabel">
[5]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS2"></a>Dongarra, J. J., DuCroz, J., Hammarling, S. and Hanson, R., An id="XBLAS2"></a>Dongarra, J. J., DuCroz, J., Hammarling, S. and Hanson, R., An
Extended Set of Fortran Basic Linear Algebra Subprograms, ACM Trans. Extended Set of Fortran Basic Linear Algebra Subprograms, ACM Trans.
Math. Softw. vol.&#x00A0;14, 1&#8211;17, 1988. Math. Softw. vol.&#x00A0;14, 1&#8211;17, 1988.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[5]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [6]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS3"></a>Dongarra, J., DuCroz, J., Hammarling, S. and Duff, I., A Set of level id="XBLAS3"></a>Dongarra, J., DuCroz, J., Hammarling, S. and Duff, I., A Set of level
3 Basic Linear Algebra Subprograms, ACM Trans. Math. Softw. vol.&#x00A0;16, 3 Basic Linear Algebra Subprograms, ACM Trans. Math. Softw. vol.&#x00A0;16,
1&#8211;17, 1990. 1&#8211;17, 1990.
</p>
<p class="bibitem" ><span class="biblabel">
[6]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLACS"></a>J.&#x00A0;J.&#x00A0;Dongarra and R.&#x00A0;C.&#x00A0;Whaley, <span
class="cmti-10">A User&#8217;s Guide to the BLACS</span>
<span
class="cmti-10">v.</span><span
class="cmti-10">&#x00A0;1.1</span>, Lapack Working Note 94, Tech.&#x00A0;Rep.&#x00A0;UT-CS-95-281, University of
Tennessee, March 1995 (updated May 1997).
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[7]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [7]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLACS"></a>J.&#x00A0;J.&#x00A0;Dongarra and R.&#x00A0;C.&#x00A0;Whaley, <span
class="pplri7t-">A User&#8217;s Guide to the BLACS</span>
<span
class="pplri7t-">v.</span><span
class="pplri7t-">&#x00A0;1.1</span>, Lapack Working Note 94, Tech.&#x00A0;Rep.&#x00A0;UT-CS-95-281, University of
Tennessee, March 1995 (updated May 1997).
</p>
<p class="bibitem" ><span class="biblabel">
[8]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xsblas97"></a>I.&#x00A0;Duff, M.&#x00A0;Marrone, G.&#x00A0;Radicati and C.&#x00A0;Vittoli, <span id="Xsblas97"></a>I.&#x00A0;Duff, M.&#x00A0;Marrone, G.&#x00A0;Radicati and C.&#x00A0;Vittoli, <span
class="cmti-10">Level 3 Basic Linear</span> class="pplri7t-">Level 3 Basic Linear</span>
<span <span
class="cmti-10">Algebra Subprograms for Sparse Matrices: a User Level Interface</span>, ACM class="pplri7t-">Algebra Subprograms for Sparse Matrices: a User Level Interface</span>, ACM
Transactions on Mathematical Software, 23(3), pp.&#x00A0;379&#8211;401, 1997. Transactions on Mathematical Software, 23(3), pp.&#x00A0;379&#8211;401, 1997.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[8]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [9]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xsblas02"></a>I.&#x00A0;Duff, M.&#x00A0;Heroux and R.&#x00A0;Pozo, <span id="Xsblas02"></a>I.&#x00A0;Duff, M.&#x00A0;Heroux and R.&#x00A0;Pozo, <span
class="cmti-10">An Overview of the Sparse Basic</span> class="pplri7t-">An Overview of the Sparse Basic Linear</span>
<span
class="cmti-10">Linear Algebra Subprograms: the New Standard from the BLAS Technical</span>
<span <span
class="cmti-10">Forum</span>, ACM Transactions on Mathematical Software, 28(2), pp.&#x00A0;239&#8211;267, class="pplri7t-">Algebra Subprograms: the New Standard from the BLAS Technical Forum</span>, ACM
2002. Transactions on Mathematical Software, 28(2), pp.&#x00A0;239&#8211;267, 2002.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[9]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [10]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XPSBLAS"></a>S.&#x00A0;Filippone and M.&#x00A0;Colajanni, <span id="XPSBLAS"></a>S.&#x00A0;Filippone and M.&#x00A0;Colajanni, <span
class="cmti-10">PSBLAS: A Library for Parallel</span> class="pplri7t-">PSBLAS: A Library for Parallel</span>
<span <span
class="cmti-10">Linear Algebra Computation on Sparse Matrices</span>, ACM Transactions on class="pplri7t-">Linear Algebra Computation on Sparse Matrices</span>, ACM Transactions on
Mathematical Software, 26(4), pp.&#x00A0;527&#8211;550, 2000. Mathematical Software, 26(4), pp.&#x00A0;527&#8211;550, 2000.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[10]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [11]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XSparse03"></a>S.&#x00A0;Filippone and A.&#x00A0;Buttari, <span id="XSparse03"></a>S.&#x00A0;Filippone and A.&#x00A0;Buttari, <span
class="cmti-10">Object-Oriented Techniques for Sparse</span> class="pplri7t-">Object-Oriented Techniques for Sparse</span>
<span <span
class="cmti-10">Matrix Computations in Fortran 2003</span>, ACM Transactions on Mathematical class="pplri7t-">Matrix Computations in Fortran 2003</span>, ACM Transactions on Mathematical
Software, 38(4), 2012. Software, 38(4), 2012.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[11]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [12]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XKIVA3PSBLAS"></a>S.&#x00A0;Filippone, P.&#x00A0;D&#8217;Ambra, M.&#x00A0;Colajanni, <span id="XKIVA3PSBLAS"></a>S.&#x00A0;Filippone, P.&#x00A0;D&#8217;Ambra, M.&#x00A0;Colajanni, <span
class="cmti-10">Using a Parallel Library</span> class="pplri7t-">Using a Parallel Library of</span>
<span <span
class="cmti-10">of Sparse Linear Algebra in a Fluid Dynamics Applications Code on</span> class="pplri7t-">Sparse Linear Algebra in a Fluid Dynamics Applications Code on Linux</span>
<span <span
class="cmti-10">Linux Clusters</span>, in G.&#x00A0;Joubert, A.&#x00A0;Murli, F.&#x00A0;Peters, M.&#x00A0;Vanneschi, editors, class="pplri7t-">Clusters</span>, in G.&#x00A0;Joubert, A.&#x00A0;Murli, F.&#x00A0;Peters, M.&#x00A0;Vanneschi, editors,
Parallel Computing - Advances &amp; Current Issues, pp.&#x00A0;441&#8211;448, Imperial Parallel Computing - Advances &amp; Current Issues, pp.&#x00A0;441&#8211;448, Imperial
College Press, 2002. College Press, 2002.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[12]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [13]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XDesignPatterns"></a> Gamma, E., Helm, R., Johnson, R., and Vlissides, J. 1995. <span id="XDesignPatterns"></a> Gamma, E., Helm, R., Johnson, R., and Vlissides, J. 1995. <span
class="cmti-10">Design</span> class="pplri7t-">Design</span>
<span <span
class="cmti-10">Patterns: Elements of Reusable Object-Oriented Software</span>. Addison-Wesley. class="pplri7t-">Patterns: Elements of Reusable Object-Oriented Software</span>. Addison-Wesley.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[13]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [14]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMETIS"></a>Karypis, G. and Kumar, V., <span id="XMETIS"></a>Karypis, G. and Kumar, V., <span
class="cmti-10">METIS: Unstructured Graph Partitioning</span> class="pplri7t-">METIS: Unstructured Graph Partitioning</span>
<span <span
class="cmti-10">and Sparse Matrix Ordering System</span>. Minneapolis, MN 55455: University class="pplri7t-">and Sparse Matrix Ordering System</span>. Minneapolis, MN 55455: University
of Minnesota, Department of Computer Science, 1995. Internet Address: of Minnesota, Department of Computer Science, 1995. Internet Address:
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">http://www.cs.umn.edu/~karypis</span></span></span>. class="cmtt-10">http://www.cs.umn.edu/~karypis</span></span></span>.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[14]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [15]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS1"></a>Lawson, C., Hanson, R., Kincaid, D. and Krogh, F., Basic Linear id="XBLAS1"></a>Lawson, C., Hanson, R., Kincaid, D. and Krogh, F., Basic Linear
Algebra Subprograms for Fortran usage, ACM Trans. Math. Softw. vol.&#x00A0;5, Algebra Subprograms for Fortran usage, ACM Trans. Math. Softw. vol.&#x00A0;5,
38&#8211;329, 1979. 38&#8211;329, 1979.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[15]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [16]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xmachiels"></a>Machiels, L. and Deville, M. <span id="Xmachiels"></a>Machiels, L. and Deville, M. <span
class="cmti-10">Fortran 90: An entry to object-oriented</span> class="pplri7t-">Fortran 90: An entry to object-oriented</span>
<span <span
class="cmti-10">programming for the solution of partial differential equations. </span>ACM Trans. class="pplri7t-">programming for the solution of partial differential equations. </span>ACM Trans.
Math. Softw. vol.&#x00A0;23, 32&#8211;49. Math. Softw. vol.&#x00A0;23, 32&#8211;49.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[16]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [17]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xmetcalf"></a>Metcalf, M., Reid, J. and Cohen, M. <span id="Xmetcalf"></a>Metcalf, M., Reid, J. and Cohen, M. <span
class="cmti-10">Fortran 95/2003 explained. </span>Oxford class="pplri7t-">Fortran 95/2003 explained. </span>Oxford
University Press, 2004. University Press, 2004.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[17]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [18]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMRC:11"></a>Metcalf, M., Reid, J. and Cohen, M. <span id="XMRC:11"></a>Metcalf, M., Reid, J. and Cohen, M. <span
class="cmti-10">Modern Fortran explained. </span>Oxford class="pplri7t-">Modern Fortran explained. </span>Oxford
University Press, 2011. University Press, 2011.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[18]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [19]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XRouXiaXu:11"></a>Rouson, D.W.I., Xia, J., Xu, X.: Scientific Software Design: The id="XRouXiaXu:11"></a>Rouson, D.W.I., Xia, J., Xu, X.: Scientific Software Design: The
Object-Oriented Way. Cambridge University Press (2011) Object-Oriented Way. Cambridge University Press (2011)
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[19]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [20]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMPI1"></a>M.&#x00A0;Snir, S.&#x00A0;Otto, S.&#x00A0;Huss-Lederman, D.&#x00A0;Walker and J.&#x00A0;Dongarra, id="XMPI1"></a>M.&#x00A0;Snir, S.&#x00A0;Otto, S.&#x00A0;Huss-Lederman, D.&#x00A0;Walker and J.&#x00A0;Dongarra,
<span <span
class="cmti-10">MPI: The Complete Reference. Volume 1 - The MPI Core</span>, second edition, class="pplri7t-">MPI: The Complete Reference. Volume 1 - The MPI Core</span>, second edition, MIT
MIT Press, 1998. Press, 1998.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[20]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span> [21]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
<a id="XDesPat:11"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini,
id="XDesPat:11"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, S.&#x00A0;Filippone and D.&#x00A0;Rouson <span S.&#x00A0;Filippone and D.&#x00A0;Rouson <span
class="cmti-10">Design Patterns</span> class="pplri7t-">Design Patterns for Scientific Computations</span>
<span <span
class="cmti-10">for Scientific Computations on Sparse Matrices</span>, HPSS 2011, Algorithms class="pplri7t-">on Sparse Matrices</span>, HPSS 2011, Algorithms and Programming Tools for
and Programming Tools for Next-Generation High-Performance Scientific Next-Generation High-Performance Scientific Software, Bordeaux, Sep.
Software, Bordeaux, Sep. 2011 2011
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[21]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [22]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XCaFiRo:2014"></a> Cardellini, V., Filippone, S., and Rouson, D. 2014, Design patterns id="XCaFiRo:2014"></a> Cardellini, V., Filippone, S., and Rouson, D. 2014, Design patterns
for sparse-matrix computations on hybrid CPU/GPU platforms, <span for sparse-matrix computations on hybrid CPU/GPU platforms, <span
class="cmti-10">Scientific</span> class="pplri7t-">Scientific</span>
<span <span
class="cmti-10">Programming</span>&#x00A0;<span class="pplri7t-">Programming</span>&#x00A0;<span
class="cmti-10">22,</span>&#x00A0;1, 1&#8211;19. class="pplri7t-">22,</span>&#x00A0;1, 1&#8211;19.
</p> </p>
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">
[22]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a [23]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XOurTechRep"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, A.&#x00A0;Fanfarillo, S.&#x00A0;Filippone, Three storage id="XOurTechRep"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, A.&#x00A0;Fanfarillo, S.&#x00A0;Filippone, Three storage
formats for sparse matrices on GPGPUs, Tech. Rep. DICII RR-15.6, formats for sparse matrices on GPGPUs, Tech. Rep. DICII RR-15.6,
Università di Roma Tor Vergata (February 2015). Università di Roma Tor Vergata (February 2015).
</p>
<p class="bibitem" ><span class="biblabel">
[24]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XFilippone:2017:SMM:3034774.3017994"></a>S.&#x00A0;Filippone, V.&#x00A0;Cardellini, D.&#x00A0;Barbieri, and A.&#x00A0;Fanfarillo. Sparse
matrix-vector multiplication on GPGPUs. <span
class="pplri7t-">ACM Trans. Math. Softw.</span>,
43(4):30:1&#8211;30:49, 2017.
</p> </p>
</div> </div>
<!--l. 130--><div class="crosslinks"><p class="noindent">[<a <!--l. 138--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse13.html" >prev</a>] [<a href="userhtmlse13.html" >prev</a>] [<a
href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
href="userhtmlli2.html" >front</a>] [<a href="userhtmlli2.html" >front</a>] [<a
href="userhtml.html# " >up</a>] </p></div> href="userhtml.html# " >up</a>] </p></div>
<!--l. 130--><p class="indent" > <a <!--l. 138--><p class="indent" > <a
id="tailuserhtmlli2.html"></a> id="tailuserhtmlli2.html"></a>
</body></html> </body></html>

@ -16,64 +16,66 @@ href="userhtmlli1.html" >prev</a>] [<a
href="userhtmlli1.html#tailuserhtmlli1.html" >prev-tail</a>] [<a href="userhtmlli1.html#tailuserhtmlli1.html" >prev-tail</a>] [<a
href="#tailuserhtmlse1.html">tail</a>] [<a href="#tailuserhtmlse1.html">tail</a>] [<a
href="userhtml.html#userhtmlse1.html" >up</a>] </p></div> href="userhtml.html#userhtmlse1.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">1 </span> <a <h3 class="sectionHead"><span class="titlemark">1 </span> <a
id="x3-20001"></a>Introduction</h3> id="x3-20001"></a>Introduction</h3>
<!--l. 3--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of <!--l. 3--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of
computationally intensive scientific applications, is designed to address parallel computationally intensive scientific applications, is designed to address parallel
implementation of iterative solvers for sparse linear systems through the distributed implementation of iterative solvers for sparse linear systems through the
memory paradigm. It includes routines for multiplying sparse matrices by dense distributed memory paradigm. It includes routines for multiplying sparse
matrices, solving block diagonal systems with triangular diagonal entries, matrices by dense matrices, solving block diagonal systems with triangular
preprocessing sparse matrices, and contains additional routines for dense matrix diagonal entries, preprocessing sparse matrices, and contains additional
operations. The current implementation of PSBLAS addresses a distributed memory routines for dense matrix operations. The current implementation of PSBLAS
execution model operating with message passing. addresses a distributed memory execution model operating with message
passing.
<!--l. 14--><p class="indent" > The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2003&#x00A0;<span class="cite">[<a <!--l. 14--><p class="indent" > The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2003&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#Xmetcalf">16</a>]</span> href="userhtmlli2.html#Xmetcalf">17</a>]</span>
programming language, with reuse and/or adaptation of existing Fortran&#x00A0;77 and programming language, with reuse and/or adaptation of existing Fortran&#x00A0;77 and
Fortran&#x00A0;95 software, plus a handful of C routines. Fortran&#x00A0;95 software, plus a handful of C routines.
<!--l. 19--><p class="indent" > The use of Fortran&#x00A0;2003 offers a number of advantages over Fortran&#x00A0;95, mostly in <!--l. 19--><p class="indent" > The use of Fortran&#x00A0;2003 offers a number of advantages over Fortran&#x00A0;95, mostly
the handling of requirements for evolution and adaptation of the library to new in the handling of requirements for evolution and adaptation of the library to new
computing architectures and integration of new algorithms. For a detailed discussion computing architectures and integration of new algorithms. For a detailed
of our design see&#x00A0;<span class="cite">[<a discussion of our design see&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XSparse03">10</a>]</span>; other works discussing advanced programming in Fortran&#x00A0;2003 href="userhtmlli2.html#XSparse03">11</a>]</span>; other works discussing advanced programming in
include&#x00A0;<span class="cite">[<a Fortran&#x00A0;2003 include&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XDesPat:11">20</a>,&#x00A0;<a href="userhtmlli2.html#XDesPat:11">21</a>,&#x00A0;<a
href="userhtmlli2.html#XRouXiaXu:11">18</a>]</span>; sufficient support for Fortran&#x00A0;2003 is now available from many href="userhtmlli2.html#XRouXiaXu:11">19</a>]</span>; sufficient support for Fortran&#x00A0;2003 is now available
compilers, including the GNU Fortran compiler from the Free Software Foundation from many compilers, including the GNU Fortran compiler from the Free Software
(as of version 4.8). Foundation (as of version 4.8).
<!--l. 30--><p class="indent" > Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for <!--l. 30--><p class="indent" > Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
object-based design, with other languages; these have been advocated by a number of object-based design, with other languages; these have been advocated by a number
authors, e.g.&#x00A0;<span class="cite">[<a of authors, e.g.&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#Xmachiels">15</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory href="userhtmlli2.html#Xmachiels">16</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
management and interface overloading greatly enhance the usability of the PSBLAS management and interface overloading greatly enhance the usability of the PSBLAS
subroutines. In this way, the library can take care of runtime memory requirements subroutines. In this way, the library can take care of runtime memory requirements
that are quite difficult or even impossible to predict at implementation or that are quite difficult or even impossible to predict at implementation or
compilation time. compilation time.
<!--l. 40--><p class="indent" > The presentation of the PSBLAS library follows the general structure of the <!--l. 40--><p class="indent" > The presentation of the PSBLAS library follows the general structure of the
proposal for serial Sparse BLAS&#x00A0;<span class="cite">[<a proposal for serial Sparse BLAS&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#Xsblas97">7</a>,&#x00A0;<a href="userhtmlli2.html#Xsblas97">8</a>,&#x00A0;<a
href="userhtmlli2.html#Xsblas02">8</a>]</span>, which in its turn is based on the proposal for href="userhtmlli2.html#Xsblas02">9</a>]</span>, which in its turn is based on the proposal for
BLAS on dense matrices&#x00A0;<span class="cite">[<a BLAS on dense matrices&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XBLAS1">14</a>,&#x00A0;<a href="userhtmlli2.html#XBLAS1">15</a>,&#x00A0;<a
href="userhtmlli2.html#XBLAS2">4</a>,&#x00A0;<a href="userhtmlli2.html#XBLAS2">5</a>,&#x00A0;<a
href="userhtmlli2.html#XBLAS3">5</a>]</span>. href="userhtmlli2.html#XBLAS3">6</a>]</span>.
<!--l. 45--><p class="indent" > The applicability of sparse iterative solvers to many different areas causes some <!--l. 45--><p class="indent" > The applicability of sparse iterative solvers to many different areas causes
terminology problems because the same concept may be denoted through different some terminology problems because the same concept may be denoted
names depending on the application area. The PSBLAS features presented in this through different names depending on the application area. The PSBLAS
document will be discussed referring to a finite difference discretization of a Partial features presented in this document will be discussed referring to a finite
Differential Equation (PDE). However, the scope of the library is wider than that: for difference discretization of a Partial Differential Equation (PDE). However,
example, it can be applied to finite element discretizations of PDEs, and even to the scope of the library is wider than that: for example, it can be applied
different classes of problems such as nonlinear optimization, for example in optimal to finite element discretizations of PDEs, and even to different classes of
control problems. problems such as nonlinear optimization, for example in optimal control
problems.
<!--l. 55--><p class="indent" > The design of a solver for sparse linear systems is driven by many conflicting <!--l. 55--><p class="indent" > The design of a solver for sparse linear systems is driven by many conflicting
objectives, such as limiting occupation of storage resources, exploiting regularities in objectives, such as limiting occupation of storage resources, exploiting regularities in
the input data, exploiting hardware characteristics of the parallel platform. To the input data, exploiting hardware characteristics of the parallel platform. To
achieve an optimal communication to computation ratio on distributed memory achieve an optimal communication to computation ratio on distributed memory
machines it is essential to keep the <span
class="cmti-10">data locality </span>as high as possible; this can be
done through an appropriate data allocation strategy. The choice of the
machines it is essential to keep the <span
class="pplri7t-">data locality </span>as high as possible; this can
be done through an appropriate data allocation strategy. The choice of the
preconditioner is another very important factor that affects efficiency of the preconditioner is another very important factor that affects efficiency of the
implemented application. Optimal data distribution requirements for a given implemented application. Optimal data distribution requirements for a given
preconditioner may conflict with distribution requirements of the rest of the solver. preconditioner may conflict with distribution requirements of the rest of the solver.

File diff suppressed because it is too large Load Diff

@ -15,48 +15,48 @@ href="userhtmlse10.html" >prev</a>] [<a
href="userhtmlse10.html#tailuserhtmlse10.html" >prev-tail</a>] [<a href="userhtmlse10.html#tailuserhtmlse10.html" >prev-tail</a>] [<a
href="userhtmlse8.html#tailuserhtmlse11.html">tail</a>] [<a href="userhtmlse8.html#tailuserhtmlse11.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div> href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">11 </span> <a <h3 class="sectionHead"><span class="titlemark">11 </span> <a
id="x17-14200011"></a>Iterative Methods</h3> id="x17-14300011"></a>Iterative Methods</h3>
<!--l. 4--><p class="noindent" >In this chapter we provide routines for preconditioners and iterative methods. The <!--l. 4--><p class="noindent" >In this chapter we provide routines for preconditioners and iterative methods. The
interfaces for iterative methods are available in the module <span class="obeylines-h"><span class="verb"><span interfaces for iterative methods are available in the module <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_linsolve_mod</span></span></span>. class="cmtt-10">psb_linsolve_mod</span></span></span>.
<h4 class="subsectionHead"><span class="titlemark">11.1 </span> <a <h4 class="subsectionHead"><span class="titlemark">11.1 </span> <a
id="x17-14300011.1"></a>psb_krylov &#8212; Krylov Methods Driver Routine</h4> id="x17-14400011.1"></a>psb_krylov &#8212; Krylov Methods Driver Routine</h4>
<!--l. 17--><p class="noindent" >This subroutine is a driver that provides a general interface for all the Krylov-Subspace <!--l. 17--><p class="noindent" >This subroutine is a driver that provides a general interface for all the Krylov-Subspace
family methods implemented in PSBLAS version 2. family methods implemented in PSBLAS version 2.
<!--l. 20--><p class="indent" > The stopping criterion can take the following values: <!--l. 20--><p class="indent" > The stopping criterion can take the following values:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 22--><p class="noindent" > <!--l. 22--><p class="noindent" >
<span <span
class="cmbx-10">1</span> </dt><dd class="pplb7t-">1</span> </dt><dd
class="description"> class="description">
<!--l. 22--><p class="noindent" >normwise backward error in the infinity norm; the iteration is stopped <!--l. 22--><p class="noindent" >normwise backward error in the infinity norm; the iteration is stopped
when when
<div class="math-display" > <div class="math-display" >
<img <img
src="userhtml30x.png" alt=" -----&#x2225;ri&#x2225;------ src="userhtml30x.png" alt=" ------&#x2225;ri&#x2225;------
err = (&#x2225;A&#x2225;&#x2225;xi&#x2225;+ &#x2225;b&#x2225;) &#x003C; eps err = (&#x2225;A &#x2225;&#x2225;xi&#x2225; + &#x2225;b&#x2225;) &#x003C; eps
" class="math-display" ></div> " class="math-display" ></div>
<!--l. 24--><p class="nopar" > <!--l. 24--><p class="nopar" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 25--><p class="noindent" > <!--l. 25--><p class="noindent" >
<span <span
class="cmbx-10">2</span> </dt><dd class="pplb7t-">2</span> </dt><dd
class="description"> class="description">
<!--l. 25--><p class="noindent" >Relative residual in the 2-norm; the iteration is stopped when <!--l. 25--><p class="noindent" >Relative residual in the 2-norm; the iteration is stopped when
<div class="math-display" > <div class="math-display" >
<img <img
src="userhtml31x.png" alt=" &#x2225;ri&#x2225;- src="userhtml31x.png" alt=" &#x2225;ri&#x2225;
err = &#x2225;b&#x2225;2 &#x003C; eps err = &#x2225;b&#x2225;-2 &#x003C; eps
" class="math-display" ></div> " class="math-display" ></div>
<!--l. 27--><p class="nopar" > <!--l. 27--><p class="nopar" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 28--><p class="noindent" > <!--l. 28--><p class="noindent" >
<span <span
class="cmbx-10">3</span> </dt><dd class="pplb7t-">3</span> </dt><dd
class="description"> class="description">
<!--l. 28--><p class="noindent" >Relative residual reduction in the 2-norm; the iteration is stopped when <!--l. 28--><p class="noindent" >Relative residual reduction in the 2-norm; the iteration is stopped when
<div class="math-display" > <div class="math-display" >
@ -66,23 +66,24 @@ err = &#x2225;r0&#x2225;2 &#x003C; eps
" class="math-display" ></div> " class="math-display" ></div>
<!--l. 30--><p class="nopar" ></dd></dl> <!--l. 30--><p class="nopar" ></dd></dl>
<!--l. 32--><p class="noindent" >The behaviour is controlled by the istop argument (see later). In the above formulae, <span <!--l. 32--><p class="noindent" >The behaviour is controlled by the istop argument (see later). In the above formulae, <span
class="cmmi-10">x</span><sub><span class="zplmr7m-">x</span><sub><span
class="cmmi-7">i</span></sub> class="zplmr7m-x-x-76">i</span></sub>
is the tentative solution and <span is the tentative solution and <span
class="cmmi-10">r</span><sub><span class="zplmr7m-">r</span><sub><span
class="cmmi-7">i</span></sub> = <span class="zplmr7m-x-x-76">i</span></sub> <span
class="cmmi-10">b </span><span class="zplmr7t-">= </span><span
class="cmsy-10">- </span><span class="zplmr7m-">b</span><span
class="cmmi-10">Ax</span><sub><span class="zplmr7y-">-</span><span
class="cmmi-7">i</span></sub> the corresponding residual at the <span class="zplmr7m-">Ax</span><sub><span
class="cmmi-10">i</span>-th class="zplmr7m-x-x-76">i</span></sub> the corresponding residual at the <span
class="zplmr7m-">i</span>-th
iteration. iteration.
<!--l. 37--> <!--l. 37-->
<pre class="lstlisting" id="listing-167"><span class="label"><a <pre class="lstlisting" id="listing-218"><span class="label"><a
id="x17-143001r1"></a></span><span style="color:#000000"><span id="x17-144001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_krylov</span></span><span style="color:#000000"><span class="cmtt-10">psb_krylov</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -103,7 +104,7 @@ class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">info</span></span><span style="color:#000000"><span class="cmtt-10">info</span></span><span style="color:#000000"><span
class="cmtt-10">,&amp;</span></span> class="cmtt-10">,&amp;</span></span>
<span class="label"><a <span class="label"><a
id="x17-143002r2"></a></span><span id="x17-144002r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
@ -128,58 +129,58 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 43--><p class="noindent" > <!--l. 43--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 43--><p class="noindent" >Synchronous. <!--l. 43--><p class="noindent" >Synchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 44--><p class="noindent" > <!--l. 44--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 44--><p class="noindent" > <!--l. 44--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 45--><p class="noindent" > <!--l. 45--><p class="noindent" >
<span <span
class="cmbx-10">method</span> </dt><dd class="pplb7t-">method</span> </dt><dd
class="description"> class="description">
<!--l. 45--><p class="noindent" >a string that defines the iterative method to be used. Supported values <!--l. 45--><p class="noindent" >a string that defines the iterative method to be used. Supported values
are: are:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 48--><p class="noindent" > <!--l. 48--><p class="noindent" >
<span <span
class="cmbx-10">CG:</span> </dt><dd class="pplb7t-">CG:</span> </dt><dd
class="description"> class="description">
<!--l. 48--><p class="noindent" >the Conjugate Gradient method; <!--l. 48--><p class="noindent" >the Conjugate Gradient method;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 49--><p class="noindent" > <!--l. 49--><p class="noindent" >
<span <span
class="cmbx-10">CGS:</span> </dt><dd class="pplb7t-">CGS:</span> </dt><dd
class="description"> class="description">
<!--l. 49--><p class="noindent" >the Conjugate Gradient Stabilized method; <!--l. 49--><p class="noindent" >the Conjugate Gradient Stabilized method;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 51--><p class="noindent" > <!--l. 51--><p class="noindent" >
<span <span
class="cmbx-10">GCR:</span> </dt><dd class="pplb7t-">GCR:</span> </dt><dd
class="description"> class="description">
<!--l. 51--><p class="noindent" >the Generalized Conjugate Residual method; <!--l. 51--><p class="noindent" >the Generalized Conjugate Residual method;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 52--><p class="noindent" > <!--l. 52--><p class="noindent" >
<span <span
class="cmbx-10">FCG:</span> </dt><dd class="pplb7t-">FCG:</span> </dt><dd
class="description"> class="description">
<!--l. 52--><p class="noindent" >the Flexible Conjugate Gradient method<span class="footnote-mark"><a <!--l. 52--><p class="noindent" >the Flexible Conjugate Gradient method<span class="footnote-mark"><a
href="userhtml18.html#fn5x0"><sup class="textsuperscript">5</sup></a></span><a href="userhtml18.html#fn5x0"><sup class="textsuperscript">5</sup></a></span><a
id="x17-143003f5"></a> ; id="x17-144003f5"></a> ;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 55--><p class="noindent" > <!--l. 55--><p class="noindent" >
<span <span
class="cmbx-10">BICG:</span> </dt><dd class="pplb7t-">BICG:</span> </dt><dd
class="description"> class="description">
<!--l. 55--><p class="noindent" >the Bi-Conjugate Gradient method; <!--l. 55--><p class="noindent" >the Bi-Conjugate Gradient method;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 56--><p class="noindent" > <!--l. 56--><p class="noindent" >
<span <span
class="cmbx-10">BICGSTAB:</span> </dt><dd class="pplb7t-">BICGSTAB:</span> </dt><dd
class="description"> class="description">
<!--l. 56--><p class="noindent" >the Bi-Conjugate Gradient Stabilized method; <!--l. 56--><p class="noindent" >the Bi-Conjugate Gradient Stabilized method;
@ -188,28 +189,28 @@ class="description">
</dd><dt class="description"> </dd><dt class="description">
<!--l. 57--><p class="noindent" > <!--l. 57--><p class="noindent" >
<span <span
class="cmbx-10">BICGSTABL:</span> </dt><dd class="pplb7t-">BICGSTABL:</span> </dt><dd
class="description"> class="description">
<!--l. 57--><p class="noindent" >the Bi-Conjugate Gradient Stabilized method with restarting; <!--l. 57--><p class="noindent" >the Bi-Conjugate Gradient Stabilized method with restarting;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 58--><p class="noindent" > <!--l. 58--><p class="noindent" >
<span <span
class="cmbx-10">RGMRES:</span> </dt><dd class="pplb7t-">RGMRES:</span> </dt><dd
class="description"> class="description">
<!--l. 58--><p class="noindent" >the Generalized Minimal Residual method with restarting.</dd></dl> <!--l. 58--><p class="noindent" >the Generalized Minimal Residual method with restarting.</dd></dl>
</dd><dt class="description"> </dd><dt class="description">
<!--l. 60--><p class="noindent" > <!--l. 60--><p class="noindent" >
<span <span
class="cmbx-10">a</span> </dt><dd class="pplb7t-">a</span> </dt><dd
class="description"> class="description">
<!--l. 60--><p class="noindent" >the local portion of global sparse matrix <span <!--l. 60--><p class="noindent" >the local portion of global sparse matrix <span
class="cmmi-10">A</span>. <br class="zplmr7m-">A</span>. <br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a structured data of type <a class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -218,32 +219,32 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 66--><p class="noindent" > <!--l. 66--><p class="noindent" >
<span <span
class="cmbx-10">prec</span> </dt><dd class="pplb7t-">prec</span> </dt><dd
class="description"> class="description">
<!--l. 66--><p class="noindent" >The data structure containing the preconditioner.<br <!--l. 66--><p class="noindent" >The data structure containing the preconditioner.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a structured data of type <a class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#precdata"><span href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span class="cmtt-10">_Tprec</span><span
class="cmtt-10">_type</span></a>. class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 71--><p class="noindent" > <!--l. 71--><p class="noindent" >
<span <span
class="cmbx-10">b</span> </dt><dd class="pplb7t-">b</span> </dt><dd
class="description"> class="description">
<!--l. 71--><p class="noindent" >The RHS vector. <br <!--l. 71--><p class="noindent" >The RHS vector. <br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -253,15 +254,15 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 76--><p class="noindent" > <!--l. 76--><p class="noindent" >
<span <span
class="cmbx-10">x</span> </dt><dd class="pplb7t-">x</span> </dt><dd
class="description"> class="description">
<!--l. 76--><p class="noindent" >The initial guess. <br <!--l. 76--><p class="noindent" >The initial guess. <br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br class="pplb7t-">inout</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -271,32 +272,32 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 81--><p class="noindent" > <!--l. 81--><p class="noindent" >
<span <span
class="cmbx-10">eps</span> </dt><dd class="pplb7t-">eps</span> </dt><dd
class="description"> class="description">
<!--l. 81--><p class="noindent" >The stopping tolerance. <br <!--l. 81--><p class="noindent" >The stopping tolerance. <br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a real number. class="newline" />Specified as: a real number.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 86--><p class="noindent" > <!--l. 86--><p class="noindent" >
<span <span
class="cmbx-10">desc</span><span class="pplb7t-">desc</span><span
class="cmbx-10">_a</span> </dt><dd class="pplb7t-">_a</span> </dt><dd
class="description"> class="description">
<!--l. 86--><p class="noindent" >contains data structures for communications.<br <!--l. 86--><p class="noindent" >contains data structures for communications.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a structured data of type <a class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#descdata"><span href="userhtmlse3.html#descdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -305,90 +306,93 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 91--><p class="noindent" > <!--l. 91--><p class="noindent" >
<span <span
class="cmbx-10">itmax</span> </dt><dd class="pplb7t-">itmax</span> </dt><dd
class="description"> class="description">
<!--l. 91--><p class="noindent" >The maximum number of iterations to perform.<br <!--l. 91--><p class="noindent" >The maximum number of iterations to perform.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Default: <span class="newline" />Default: <span
class="cmmi-10">itmax </span>= 1000.<br class="zplmr7m-">itmax </span><span
class="zplmr7t-">= </span>1000.<br
class="newline" />Specified as: an integer variable <span class="newline" />Specified as: an integer variable <span
class="cmmi-10">itmax </span><span class="zplmr7m-">itmax </span><span
class="cmsy-10">&#x2265; </span>1. class="zplmr7y-">&#x2265; </span>1.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 97--><p class="noindent" > <!--l. 97--><p class="noindent" >
<span <span
class="cmbx-10">itrace</span> </dt><dd class="pplb7t-">itrace</span> </dt><dd
class="description"> class="description">
<!--l. 97--><p class="noindent" >If <span <!--l. 97--><p class="noindent" >If <span
class="cmmi-10">&#x003E; </span>0 print out an informational message about convergence every <span class="zplmr7m-">&#x003E; </span>0 print out an informational message about convergence every <span
class="cmmi-10">itrace</span> class="zplmr7m-">itrace</span>
iterations. If = 0 print a message in case of convergence failure.<br iterations. If <span
class="zplmr7t-">= </span>0 print a message in case of convergence failure.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Default: <span class="newline" />Default: <span
class="cmmi-10">itrace </span>= <span class="zplmr7m-">itrace </span><span
class="cmsy-10">-</span>1.<br class="zplmr7t-">= </span><span
class="zplmr7y-">-</span>1.<br
class="newline" /> class="newline" />
</dd><dt class="description"> </dd><dt class="description">
<!--l. 104--><p class="noindent" > <!--l. 104--><p class="noindent" >
<span <span
class="cmbx-10">irst</span> </dt><dd class="pplb7t-">irst</span> </dt><dd
class="description"> class="description">
<!--l. 104--><p class="noindent" >An integer specifying the restart parameter.<br <!--l. 104--><p class="noindent" >An integer specifying the restart parameter.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Values: <span class="newline" />Values: <span
class="cmmi-10">irst &#x003E; </span>0. This is employed for the BiCGSTABL or RGMRES methods, class="zplmr7m-">irst &#x003E; </span>0. This is employed for the BiCGSTABL or RGMRES methods,
otherwise it is ignored. otherwise it is ignored.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 111--><p class="noindent" > <!--l. 111--><p class="noindent" >
<span <span
class="cmbx-10">istop</span> </dt><dd class="pplb7t-">istop</span> </dt><dd
class="description"> class="description">
<!--l. 111--><p class="noindent" >An integer specifying the stopping criterion.<br <!--l. 111--><p class="noindent" >An integer specifying the stopping criterion.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Values: 1: use the normwise backward error, 2: use the scaled 2-norm class="newline" />Values: 1: use the normwise backward error, 2: use the scaled 2-norm
of the residual, 3: use the residual reduction in the 2-norm. Default: of the residual, 3: use the residual reduction in the 2-norm. Default:
2. 2.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 117--><p class="noindent" > <!--l. 117--><p class="noindent" >
<span <span
class="cmbx-10">On Return</span> </dt><dd class="pplb7t-">On Return</span> </dt><dd
class="description"> class="description">
<!--l. 117--><p class="noindent" > <!--l. 117--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 118--><p class="noindent" > <!--l. 118--><p class="noindent" >
<span <span
class="cmbx-10">x</span> </dt><dd class="pplb7t-">x</span> </dt><dd
class="description"> class="description">
<!--l. 118--><p class="noindent" >The computed solution. <br <!--l. 118--><p class="noindent" >The computed solution. <br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br class="pplb7t-">inout</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -398,65 +402,65 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 123--><p class="noindent" > <!--l. 123--><p class="noindent" >
<span <span
class="cmbx-10">iter</span> </dt><dd class="pplb7t-">iter</span> </dt><dd
class="description"> class="description">
<!--l. 123--><p class="noindent" >The number of iterations performed.<br <!--l. 123--><p class="noindent" >The number of iterations performed.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">out</span>.<br class="pplb7t-">out</span>.<br
class="newline" />Returned as: an integer variable. class="newline" />Returned as: an integer variable.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 128--><p class="noindent" > <!--l. 128--><p class="noindent" >
<span <span
class="cmbx-10">err</span> </dt><dd class="pplb7t-">err</span> </dt><dd
class="description"> class="description">
<!--l. 128--><p class="noindent" >The convergence estimate on exit.<br <!--l. 128--><p class="noindent" >The convergence estimate on exit.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">out</span>.<br class="pplb7t-">out</span>.<br
class="newline" />Returned as: a real number. class="newline" />Returned as: a real number.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 133--><p class="noindent" > <!--l. 133--><p class="noindent" >
<span <span
class="cmbx-10">cond</span> </dt><dd class="pplb7t-">cond</span> </dt><dd
class="description"> class="description">
<!--l. 133--><p class="noindent" >An estimate of the condition number of matrix <span <!--l. 133--><p class="noindent" >An estimate of the condition number of matrix <span
class="cmmi-10">A</span>; only available with the <span class="zplmr7m-">A</span>; only available with the <span
class="cmmi-10">CG</span> class="zplmr7m-">CG</span>
method on real data.<br method on real data.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">out</span>.<br class="pplb7t-">out</span>.<br
class="newline" />Returned as: a real number. A correct result will be greater than or class="newline" />Returned as: a real number. A correct result will be greater than or
equal to one; if specified for non-real data, or an error occurred, zero is equal to one; if specified for non-real data, or an error occurred, zero is
returned. returned.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 141--><p class="noindent" > <!--l. 141--><p class="noindent" >
<span <span
class="cmbx-10">info</span> </dt><dd class="pplb7t-">info</span> </dt><dd
class="description"> class="description">
<!--l. 141--><p class="noindent" >Error code.<br <!--l. 141--><p class="noindent" >Error code.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">out</span>.<br class="pplb7t-">out</span>.<br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl> class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">11.2 </span> <a <h4 class="subsectionHead"><span class="titlemark">11.2 </span> <a
id="x17-14400011.2"></a>psb_richardson &#8212; Richardson Iteration Driver Routine</h4> id="x17-14500011.2"></a>psb_richardson &#8212; Richardson Iteration Driver Routine</h4>
<!--l. 158--><p class="noindent" >This subroutine is a driver implementig a Richardson iteration <!--l. 158--><p class="noindent" >This subroutine is a driver implementig a Richardson iteration
<div class="math-display" > <div class="math-display" >
<img <img
@ -464,37 +468,37 @@ src="userhtml33x.png" alt="x = M - 1(b - Ax )+ x ,
k+1 k k k+1 k k
" class="math-display" ></div> " class="math-display" ></div>
<!--l. 159--><p class="nopar" > with the preconditioner operator <span <!--l. 159--><p class="nopar" > with the preconditioner operator <span
class="cmmi-10">M </span>defined in the previous section. class="zplmr7m-">M </span>defined in the previous section.
<!--l. 162--><p class="indent" > The stopping criterion can take the following values: <!--l. 162--><p class="indent" > The stopping criterion can take the following values:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 164--><p class="noindent" > <!--l. 164--><p class="noindent" >
<span <span
class="cmbx-10">1</span> </dt><dd class="pplb7t-">1</span> </dt><dd
class="description"> class="description">
<!--l. 164--><p class="noindent" >normwise backward error in the infinity norm; the iteration is stopped <!--l. 164--><p class="noindent" >normwise backward error in the infinity norm; the iteration is stopped
when when
<div class="math-display" > <div class="math-display" >
<img <img
src="userhtml34x.png" alt=" -----&#x2225;ri&#x2225;------ src="userhtml34x.png" alt=" ------&#x2225;ri&#x2225;------
err = (&#x2225;A&#x2225;&#x2225;xi&#x2225;+ &#x2225;b&#x2225;) &#x003C; eps err = (&#x2225;A &#x2225;&#x2225;xi&#x2225; + &#x2225;b&#x2225;) &#x003C; eps
" class="math-display" ></div> " class="math-display" ></div>
<!--l. 166--><p class="nopar" > <!--l. 166--><p class="nopar" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 167--><p class="noindent" > <!--l. 167--><p class="noindent" >
<span <span
class="cmbx-10">2</span> </dt><dd class="pplb7t-">2</span> </dt><dd
class="description"> class="description">
<!--l. 167--><p class="noindent" >Relative residual in the 2-norm; the iteration is stopped when <!--l. 167--><p class="noindent" >Relative residual in the 2-norm; the iteration is stopped when
<div class="math-display" > <div class="math-display" >
<img <img
src="userhtml35x.png" alt=" &#x2225;ri&#x2225;- src="userhtml35x.png" alt=" &#x2225;ri&#x2225;-
err = &#x2225;b&#x2225;2 &#x003C; eps err = &#x2225;b&#x2225; 2 &#x003C; eps
" class="math-display" ></div> " class="math-display" ></div>
<!--l. 169--><p class="nopar" > <!--l. 169--><p class="nopar" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 170--><p class="noindent" > <!--l. 170--><p class="noindent" >
<span <span
class="cmbx-10">3</span> </dt><dd class="pplb7t-">3</span> </dt><dd
class="description"> class="description">
<!--l. 170--><p class="noindent" >Relative residual reduction in the 2-norm; the iteration is stopped when <!--l. 170--><p class="noindent" >Relative residual reduction in the 2-norm; the iteration is stopped when
<div class="math-display" > <div class="math-display" >
@ -507,20 +511,21 @@ err = &#x2225;r0&#x2225;2 &#x003C; eps
<!--l. 174--><p class="noindent" >The behaviour is controlled by the istop argument (see later). In the above formulae, <span <!--l. 174--><p class="noindent" >The behaviour is controlled by the istop argument (see later). In the above formulae, <span
class="cmmi-10">x</span><sub><span class="zplmr7m-">x</span><sub><span
class="cmmi-7">i</span></sub> class="zplmr7m-x-x-76">i</span></sub>
is the tentative solution and <span is the tentative solution and <span
class="cmmi-10">r</span><sub><span class="zplmr7m-">r</span><sub><span
class="cmmi-7">i</span></sub> = <span class="zplmr7m-x-x-76">i</span></sub> <span
class="cmmi-10">b </span><span class="zplmr7t-">= </span><span
class="cmsy-10">- </span><span class="zplmr7m-">b</span><span
class="cmmi-10">Ax</span><sub><span class="zplmr7y-">-</span><span
class="cmmi-7">i</span></sub> the corresponding residual at the <span class="zplmr7m-">Ax</span><sub><span
class="cmmi-10">i</span>-th class="zplmr7m-x-x-76">i</span></sub> the corresponding residual at the <span
class="zplmr7m-">i</span>-th
iteration. iteration.
<!--l. 179--> <!--l. 179-->
<pre class="lstlisting" id="listing-168"><span class="label"><a <pre class="lstlisting" id="listing-219"><span class="label"><a
id="x17-144001r1"></a></span><span style="color:#000000"><span id="x17-145001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_richardson</span></span><span style="color:#000000"><span class="cmtt-10">psb_richardson</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -539,7 +544,7 @@ class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">info</span></span><span style="color:#000000"><span class="cmtt-10">info</span></span><span style="color:#000000"><span
class="cmtt-10">,&amp;</span></span> class="cmtt-10">,&amp;</span></span>
<span class="label"><a <span class="label"><a
id="x17-144002r2"></a></span><span id="x17-145002r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
@ -560,28 +565,28 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 185--><p class="noindent" > <!--l. 185--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 185--><p class="noindent" >Synchronous. <!--l. 185--><p class="noindent" >Synchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 186--><p class="noindent" > <!--l. 186--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 186--><p class="noindent" > <!--l. 186--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 187--><p class="noindent" > <!--l. 187--><p class="noindent" >
<span <span
class="cmbx-10">a</span> </dt><dd class="pplb7t-">a</span> </dt><dd
class="description"> class="description">
<!--l. 187--><p class="noindent" >the local portion of global sparse matrix <span <!--l. 187--><p class="noindent" >the local portion of global sparse matrix <span
class="cmmi-10">A</span>. <br class="zplmr7m-">A</span>. <br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a structured data of type <a class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -590,32 +595,32 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 193--><p class="noindent" > <!--l. 193--><p class="noindent" >
<span <span
class="cmbx-10">prec</span> </dt><dd class="pplb7t-">prec</span> </dt><dd
class="description"> class="description">
<!--l. 193--><p class="noindent" >The data structure containing the preconditioner.<br <!--l. 193--><p class="noindent" >The data structure containing the preconditioner.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a structured data of type <a class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#precdata"><span href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span class="cmtt-10">_Tprec</span><span
class="cmtt-10">_type</span></a>. class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 198--><p class="noindent" > <!--l. 198--><p class="noindent" >
<span <span
class="cmbx-10">b</span> </dt><dd class="pplb7t-">b</span> </dt><dd
class="description"> class="description">
<!--l. 198--><p class="noindent" >The RHS vector. <br <!--l. 198--><p class="noindent" >The RHS vector. <br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -625,18 +630,18 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 203--><p class="noindent" > <!--l. 203--><p class="noindent" >
<span <span
class="cmbx-10">x</span> </dt><dd class="pplb7t-">x</span> </dt><dd
class="description"> class="description">
<!--l. 203--><p class="noindent" >The initial guess. <br <!--l. 203--><p class="noindent" >The initial guess. <br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br class="pplb7t-">inout</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -646,29 +651,29 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 208--><p class="noindent" > <!--l. 208--><p class="noindent" >
<span <span
class="cmbx-10">eps</span> </dt><dd class="pplb7t-">eps</span> </dt><dd
class="description"> class="description">
<!--l. 208--><p class="noindent" >The stopping tolerance. <br <!--l. 208--><p class="noindent" >The stopping tolerance. <br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a real number. class="newline" />Specified as: a real number.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 213--><p class="noindent" > <!--l. 213--><p class="noindent" >
<span <span
class="cmbx-10">desc</span><span class="pplb7t-">desc</span><span
class="cmbx-10">_a</span> </dt><dd class="pplb7t-">_a</span> </dt><dd
class="description"> class="description">
<!--l. 213--><p class="noindent" >contains data structures for communications.<br <!--l. 213--><p class="noindent" >contains data structures for communications.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a structured data of type <a class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#descdata"><span href="userhtmlse3.html#descdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -677,74 +682,77 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 218--><p class="noindent" > <!--l. 218--><p class="noindent" >
<span <span
class="cmbx-10">itmax</span> </dt><dd class="pplb7t-">itmax</span> </dt><dd
class="description"> class="description">
<!--l. 218--><p class="noindent" >The maximum number of iterations to perform.<br <!--l. 218--><p class="noindent" >The maximum number of iterations to perform.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Default: <span class="newline" />Default: <span
class="cmmi-10">itmax </span>= 1000.<br class="zplmr7m-">itmax </span><span
class="zplmr7t-">= </span>1000.<br
class="newline" />Specified as: an integer variable <span class="newline" />Specified as: an integer variable <span
class="cmmi-10">itmax </span><span class="zplmr7m-">itmax </span><span
class="cmsy-10">&#x2265; </span>1. class="zplmr7y-">&#x2265; </span>1.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 224--><p class="noindent" > <!--l. 224--><p class="noindent" >
<span <span
class="cmbx-10">itrace</span> </dt><dd class="pplb7t-">itrace</span> </dt><dd
class="description"> class="description">
<!--l. 224--><p class="noindent" >If <span <!--l. 224--><p class="noindent" >If <span
class="cmmi-10">&#x003E; </span>0 print out an informational message about convergence every <span class="zplmr7m-">&#x003E; </span>0 print out an informational message about convergence every <span
class="cmmi-10">itrace</span> class="zplmr7m-">itrace</span>
iterations. If = 0 print a message in case of convergence failure.<br iterations. If <span
class="zplmr7t-">= </span>0 print a message in case of convergence failure.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Default: <span class="newline" />Default: <span
class="cmmi-10">itrace </span>= <span class="zplmr7m-">itrace </span><span
class="cmsy-10">-</span>1.<br class="zplmr7t-">= </span><span
class="zplmr7y-">-</span>1.<br
class="newline" /> class="newline" />
</dd><dt class="description"> </dd><dt class="description">
<!--l. 232--><p class="noindent" > <!--l. 232--><p class="noindent" >
<span <span
class="cmbx-10">istop</span> </dt><dd class="pplb7t-">istop</span> </dt><dd
class="description"> class="description">
<!--l. 232--><p class="noindent" >An integer specifying the stopping criterion.<br <!--l. 232--><p class="noindent" >An integer specifying the stopping criterion.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Values: 1: use the normwise backward error, 2: use the scaled 2-norm of class="newline" />Values: 1: use the normwise backward error, 2: use the scaled 2-norm of
the residual, 3: use the residual reduction in the 2-norm. Default: 2. the residual, 3: use the residual reduction in the 2-norm. Default: 2.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 238--><p class="noindent" > <!--l. 238--><p class="noindent" >
<span <span
class="cmbx-10">On Return</span> </dt><dd class="pplb7t-">On Return</span> </dt><dd
class="description"> class="description">
<!--l. 238--><p class="noindent" > <!--l. 238--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 239--><p class="noindent" > <!--l. 239--><p class="noindent" >
<span <span
class="cmbx-10">x</span> </dt><dd class="pplb7t-">x</span> </dt><dd
class="description"> class="description">
<!--l. 239--><p class="noindent" >The computed solution. <br <!--l. 239--><p class="noindent" >The computed solution. <br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br class="pplb7t-">inout</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -754,41 +762,41 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 244--><p class="noindent" > <!--l. 244--><p class="noindent" >
<span <span
class="cmbx-10">iter</span> </dt><dd class="pplb7t-">iter</span> </dt><dd
class="description"> class="description">
<!--l. 244--><p class="noindent" >The number of iterations performed.<br <!--l. 244--><p class="noindent" >The number of iterations performed.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">out</span>.<br class="pplb7t-">out</span>.<br
class="newline" />Returned as: an integer variable. class="newline" />Returned as: an integer variable.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 249--><p class="noindent" > <!--l. 249--><p class="noindent" >
<span <span
class="cmbx-10">err</span> </dt><dd class="pplb7t-">err</span> </dt><dd
class="description"> class="description">
<!--l. 249--><p class="noindent" >The convergence estimate on exit.<br <!--l. 249--><p class="noindent" >The convergence estimate on exit.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">out</span>.<br class="pplb7t-">out</span>.<br
class="newline" />Returned as: a real number. class="newline" />Returned as: a real number.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 254--><p class="noindent" > <!--l. 254--><p class="noindent" >
<span <span
class="cmbx-10">info</span> </dt><dd class="pplb7t-">info</span> </dt><dd
class="description"> class="description">
<!--l. 254--><p class="noindent" >Error code.<br <!--l. 254--><p class="noindent" >Error code.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">out</span>.<br class="pplb7t-">out</span>.<br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl> class="newline" />An integer value; 0 means no error has been detected.</dd></dl>

@ -15,17 +15,17 @@ href="userhtmlse11.html" >prev</a>] [<a
href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a
href="userhtmlse9.html#tailuserhtmlse12.html">tail</a>] [<a href="userhtmlse9.html#tailuserhtmlse12.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div> href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">12 </span> <a <h3 class="sectionHead"><span class="titlemark">12 </span> <a
id="x19-14500012"></a>Extensions</h3> id="x19-14600012"></a>Extensions</h3>
<!--l. 3--><p class="noindent" >The EXT, CUDA and RSB subdirectories contains a set of extensions to the base <!--l. 3--><p class="noindent" >The EXT, CUDA and RSB subdirectories contains a set of extensions to the base
library. The extensions provide additional storage formats beyond the ones already library. The extensions provide additional storage formats beyond the ones already
contained in the base library, as well as interfaces to: contained in the base library, as well as interfaces to:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 8--><p class="noindent" > <!--l. 8--><p class="noindent" >
<span <span
class="cmbx-10">SPGPU</span> </dt><dd class="pplb7t-">SPGPU</span> </dt><dd
class="description"> class="description">
<!--l. 8--><p class="noindent" >a CUDA library originally <!--l. 8--><p class="noindent" >a CUDA library originally
published as <a published as <a
href="https://code.google.com/p/spgpu/" class="url" ><span href="https://code.google.com/p/spgpu/" class="url" ><span
class="cmtt-10">https://code.google.com/p/spgpu/</span></a> and now included class="cmtt-10">https://code.google.com/p/spgpu/</span></a> and now included
@ -34,22 +34,22 @@ class="cmtt-10">cuda</span></span></span> subdir, for computations on NVIDIA GPU
</dd><dt class="description"> </dd><dt class="description">
<!--l. 11--><p class="noindent" > <!--l. 11--><p class="noindent" >
<span <span
class="cmbx-10">LIBRSB</span> </dt><dd class="pplb7t-">LIBRSB</span> </dt><dd
class="description"> class="description">
<!--l. 11--><p class="noindent" ><a <!--l. 11--><p class="noindent" ><a
href="http://sourceforge.net/projects/librsb/" class="url" ><span href="http://sourceforge.net/projects/librsb/" class="url" ><span
class="cmtt-10">http://sourceforge.net/projects/librsb/</span></a>, for computations on class="cmtt-10">http://sourceforge.net/projects/librsb/</span></a>, for computations on
multicore parallel machines.</dd></dl> multicore parallel machines.</dd></dl>
<!--l. 14--><p class="noindent" >The infrastructure laid out in the base library to allow for these extensions is detailed in <!--l. 14--><p class="noindent" >The infrastructure laid out in the base library to allow for these extensions is detailed in
the references&#x00A0;<span class="cite">[<a the references&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XDesPat:11">20</a>,&#x00A0;<a href="userhtmlli2.html#XDesPat:11">21</a>,&#x00A0;<a
href="userhtmlli2.html#XCaFiRo:2014">21</a>,&#x00A0;<a href="userhtmlli2.html#XCaFiRo:2014">22</a>,&#x00A0;<a
href="userhtmlli2.html#XSparse03">10</a>]</span>; the CUDA-specific data formats are described href="userhtmlli2.html#XSparse03">11</a>]</span>; the CUDA-specific data formats are described
in&#x00A0;<span class="cite">[<a in&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XOurTechRep">22</a>]</span>. href="userhtmlli2.html#XOurTechRep">23</a>]</span>.
<!--l. 19--><p class="noindent" > <!--l. 19--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">12.1 </span> <a <h4 class="subsectionHead"><span class="titlemark">12.1 </span> <a
id="x19-14600012.1"></a>Using the extensions</h4> id="x19-14700012.1"></a>Using the extensions</h4>
<!--l. 21--><p class="noindent" >A sample application using the PSBLAS extensions will contain the following <!--l. 21--><p class="noindent" >A sample application using the PSBLAS extensions will contain the following
steps: steps:
<ul class="itemize1"> <ul class="itemize1">
@ -60,16 +60,16 @@ class="cmtt-10">psb_ext_mod</span></span></span>, <span class="obeylines-h"><spa
class="cmtt-10">psb_cuda_mod</span></span></span>); class="cmtt-10">psb_cuda_mod</span></span></span>);
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 26--><p class="noindent" >Declare a <span <!--l. 26--><p class="noindent" >Declare a <span
class="cmti-10">mold </span>variable of the necessary type (e.g. class="pplri7t-">mold </span>variable of the necessary type (e.g.
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_d_ell_sparse_mat</span></span></span>, <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_d_ell_sparse_mat</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_d_hlg_sparse_mat</span></span></span>, <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_d_hlg_sparse_mat</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_d_vect_cuda</span></span></span>); class="cmtt-10">psb_d_vect_cuda</span></span></span>);
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 29--><p class="noindent" >Pass the mold variable to the base library interface where needed to ensure <!--l. 29--><p class="noindent" >Pass the mold variable to the base library interface where needed to
the appropriate dynamic type.</li></ul> ensure the appropriate dynamic type.</li></ul>
@ -141,126 +141,128 @@ class="cmtt-10">test/cuda/kernel </span>subdirectories, where we provide sample
speed of the sparse matrix-vector product with the various data structures included speed of the sparse matrix-vector product with the various data structures included
in the library. in the library.
<!--l. 146--><p class="noindent" > <!--l. 146--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">12.2 </span> <a <h4 class="subsectionHead"><span class="titlemark">12.2 </span> <a
id="x19-14700012.2"></a>Extensions&#8217; Data Structures</h4> id="x19-14800012.2"></a>Extensions&#8217; Data Structures</h4>
<!--l. 150--><p class="noindent" >Access to the facilities provided by the EXT library is mainly achieved through <!--l. 150--><p class="noindent" >Access to the facilities provided by the EXT library is mainly achieved through
the data types that are provided within. The data classes are derived from the data types that are provided within. The data classes are derived from
the base classes in PSBLAS, through the Fortran&#x00A0;2003 mechanism of <span the base classes in PSBLAS, through the Fortran&#x00A0;2003 mechanism of <span
class="cmti-10">type</span> class="pplri7t-">type</span>
<span <span
class="cmti-10">extension</span>&#x00A0;<span class="cite">[<a class="pplri7t-">extension</span>&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XMRC:11">17</a>]</span>. href="userhtmlli2.html#XMRC:11">18</a>]</span>.
<!--l. 155--><p class="indent" > The data classes are divided between the general purpose CPU extensions, the <!--l. 155--><p class="indent" > The data classes are divided between the general purpose CPU extensions, the
GPU interfaces and the RSB interfaces. In the description we will make use of the GPU interfaces and the RSB interfaces. In the description we will make use of the
notation introduced in Table&#x00A0;<a notation introduced in Table&#x00A0;<a
href="#x19-147001r21">21<!--tex4ht:ref: tab:notation --></a>. href="#x19-148001r22">22<!--tex4ht:ref: tab:notation --></a>.
<div class="table"> <div class="table">
<!--l. 160--><p class="indent" > <a <!--l. 160--><p class="indent" > <a
id="x19-147001r21"></a><hr class="float"><div class="float" id="x19-148001r22"></a><hr class="float"><div class="float"
> >
<div class="caption" <div class="caption"
><span class="id">Table&#x00A0;21: </span><span ><span class="id">Table&#x00A0;22: </span><span
class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-147001r21 --> class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-148001r22 -->
<div class="center" <div class="center"
> >
<!--l. 162--><p class="noindent" > <!--l. 162--><p class="noindent" >
<div class="tabular"> <table id="TBL-23" class="tabular" <div class="tabular"> <table id="TBL-25" class="tabular"
><colgroup id="TBL-23-1g"><col ><colgroup id="TBL-25-1g"><col
id="TBL-23-1"><col id="TBL-25-1"><col
id="TBL-23-2"></colgroup><tr id="TBL-25-2"></colgroup><tr
class="hline"><td><hr></td><td><hr></td></tr><tr class="hline"><td><hr></td><td><hr></td></tr><tr
style="vertical-align:baseline;" id="TBL-23-1-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-1-1" style="vertical-align:baseline;" id="TBL-25-1-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-1-1"
class="td11"><span class="td11"><span
class="cmr-8">Name </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-1-2" class="pplr7t-x-x-80">Name </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-1-2"
class="td11"><span class="td11"><span
class="cmr-8">Description </span></td> class="pplr7t-x-x-80">Description </span></td>
</tr><tr </tr><tr
class="hline"><td><hr></td><td><hr></td></tr><tr class="hline"><td><hr></td><td><hr></td></tr><tr
style="vertical-align:baseline;" id="TBL-23-2-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-2-1" style="vertical-align:baseline;" id="TBL-25-2-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-2-1"
class="td11"><span class="td11"><span
class="cmr-8">M </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-2-2" class="pplr7t-x-x-80">M </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-2-2"
class="td11"><span class="td11"><span
class="cmr-8">Number of rows in matrix </span></td></tr><tr class="pplr7t-x-x-80">Number of rows in matrix </span></td></tr><tr
style="vertical-align:baseline;" id="TBL-23-3-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-3-1" style="vertical-align:baseline;" id="TBL-25-3-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-3-1"
class="td
11"><span
class="pplr7t-x-x-80">N </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-3-2"
class="td11"><span class="td11"><span
class="cmr-8">N </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-3-2" class="pplr7t-x-x-80">Number of columns in matrix </span></td></tr><tr
style="vertical-align:baseline;" id="TBL-25-4-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-4-1"
class="td
11"><span
class="pplr7t-x-x-80">NZ </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-4-2"
class="td11"><span class="td11"><span
class="cmr-8">Number of columns in matrix</span></td> class="pplr7t-x-x-80">Number of nonzeros in matrix </span></td></tr><tr
</tr><tr style="vertical-align:baseline;" id="TBL-25-5-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-5-1"
style="vertical-align:baseline;" id="TBL-23-4-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-4-1" class="td
class="td11"><span 11"><span
class="cmr-8">NZ </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-4-2" class="pplr7t-x-x-80">AVGNZR </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-5-2"
class="td11"><span
class="cmr-8">Number of nonzeros in matrix </span></td></tr><tr
style="vertical-align:baseline;" id="TBL-23-5-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-5-1"
class="td11"><span class="td11"><span
class="cmr-8">AVGNZR </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-5-2" class="pplr7t-x-x-80">Average number of nonzeros per row </span></td>
class="td11"><span
class="cmr-8">Average number of nonzeros per row</span></td>
</tr><tr </tr><tr
style="vertical-align:baseline;" id="TBL-23-6-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-6-1" style="vertical-align:baseline;" id="TBL-25-6-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-6-1"
class="td11"><span class="td11"><span
class="cmr-8">MAXNZR</span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-6-2" class="pplr7t-x-x-80">MAXNZR</span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-6-2"
class="td11"><span class="td11"><span
class="cmr-8">Maximum number of nonzeros per row</span></td> class="pplr7t-x-x-80">Maximum number of nonzeros per row</span></td>
</tr><tr </tr><tr
style="vertical-align:baseline;" id="TBL-23-7-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-7-1" style="vertical-align:baseline;" id="TBL-25-7-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-7-1"
class="td11"><span class="td11"><span
class="cmr-8">NDIAG </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-7-2" class="pplr7t-x-x-80">NDIAG </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-7-2"
class="td11"><span class="td11"><span
class="cmr-8">Numero of nonzero diagonals </span></td> class="pplr7t-x-x-80">Numero of nonzero diagonals </span></td>
</tr><tr </tr><tr
style="vertical-align:baseline;" id="TBL-23-8-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-8-1" style="vertical-align:baseline;" id="TBL-25-8-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-8-1"
class="td11"><span class="td11"><span
class="cmr-8">AS </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-8-2" class="pplr7t-x-x-80">AS </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-8-2"
class="td11"><span class="td11"><span
class="cmr-8">Coefficients array </span></td> class="pplr7t-x-x-80">Coefficients array </span></td>
</tr><tr </tr><tr
style="vertical-align:baseline;" id="TBL-23-9-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-9-1" style="vertical-align:baseline;" id="TBL-25-9-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-9-1"
class="td11"><span class="td11"><span
class="cmr-8">IA </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-9-2" class="pplr7t-x-x-80">IA </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-9-2"
class="td11"><span class="td11"><span
class="cmr-8">Row indices array </span></td> class="pplr7t-x-x-80">Row indices array </span></td>
</tr><tr </tr><tr
style="vertical-align:baseline;" id="TBL-23-10-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-10-1" style="vertical-align:baseline;" id="TBL-25-10-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-10-1"
class="td11"><span class="td11"><span
class="cmr-8">JA </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-10-2" class="pplr7t-x-x-80">JA </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-10-2"
class="td11"><span class="td11"><span
class="cmr-8">Column indices array </span></td> class="pplr7t-x-x-80">Column indices array </span></td>
</tr><tr </tr><tr
style="vertical-align:baseline;" id="TBL-23-11-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-11-1" style="vertical-align:baseline;" id="TBL-25-11-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-11-1"
class="td11"><span class="td11"><span
class="cmr-8">IRP </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-11-2" class="pplr7t-x-x-80">IRP </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-11-2"
class="td11"><span class="td11"><span
class="cmr-8">Row start pointers array </span></td> class="pplr7t-x-x-80">Row start pointers array </span></td>
</tr><tr </tr><tr
style="vertical-align:baseline;" id="TBL-23-12-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-12-1" style="vertical-align:baseline;" id="TBL-25-12-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-12-1"
class="td11"><span class="td11"><span
class="cmr-8">JCP </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-12-2" class="pplr7t-x-x-80">JCP </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-12-2"
class="td11"><span class="td11"><span
class="cmr-8">Column start pointers array </span></td> class="pplr7t-x-x-80">Column start pointers array </span></td>
</tr><tr </tr><tr
style="vertical-align:baseline;" id="TBL-23-13-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-13-1" style="vertical-align:baseline;" id="TBL-25-13-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-13-1"
class="td11"><span class="td11"><span
class="cmr-8">NZR </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-13-2" class="pplr7t-x-x-80">NZR </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-13-2"
class="td11"><span class="td11"><span
class="cmr-8">Number of nonzeros per row array </span></td> class="pplr7t-x-x-80">Number of nonzeros per row array </span></td>
</tr><tr </tr><tr
style="vertical-align:baseline;" id="TBL-23-14-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-14-1" style="vertical-align:baseline;" id="TBL-25-14-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-14-1"
class="td11"><span class="td11"><span
class="cmr-8">OFFSET </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-14-2" class="pplr7t-x-x-80">OFFSET </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-25-14-2"
class="td11"><span class="td11"><span
class="cmr-8">Offset for diagonals </span></td> class="pplr7t-x-x-80">Offset for diagonals </span></td>
</tr><tr </tr><tr
class="hline"><td><hr></td><td><hr></td></tr><tr class="hline"><td><hr></td><td><hr></td></tr><tr
style="vertical-align:baseline;" id="TBL-23-15-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-15-1" style="vertical-align:baseline;" id="TBL-25-15-"><td style="white-space:nowrap; text-align:left;" id="TBL-25-15-1"
class="td11"> </td></tr></table> </div> class="td11"> </td></tr></table> </div>
</div> </div>
@ -274,7 +276,7 @@ class="td11"> </td></tr></table>
<a <a
id="x19-147002r5"></a> id="x19-148002r5"></a>
@ -283,18 +285,18 @@ src="mat.png" alt="PIC"
width="147" height="147" > width="147" height="147" >
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Figure&#x00A0;5: </span><span ><span class="id">Figure&#x00A0;5: </span><span
class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-147002r5 --> class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-148002r5 -->
<!--l. 198--><p class="indent" > </div><hr class="endfigure"> <!--l. 198--><p class="indent" > </div><hr class="endfigure">
<h4 class="subsectionHead"><span class="titlemark">12.3 </span> <a <h4 class="subsectionHead"><span class="titlemark">12.3 </span> <a
id="x19-14800012.3"></a>CPU-class extensions</h4> id="x19-14900012.3"></a>CPU-class extensions</h4>
<!--l. 203--><p class="noindent" > <!--l. 203--><p class="noindent" >
<h5 class="likesubsubsectionHead"><a <h5 class="likesubsubsectionHead"><a
id="x19-149000"></a>ELLPACK</h5> id="x19-150000"></a>ELLPACK</h5>
<!--l. 205--><p class="noindent" >The ELLPACK/ITPACK format (shown in Figure&#x00A0;<a <!--l. 205--><p class="noindent" >The ELLPACK/ITPACK format (shown in Figure&#x00A0;<a
href="#x19-149001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional href="#x19-150001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional
arrays <span class="obeylines-h"><span class="verb"><span arrays <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">JA</span></span></span> with <span class="obeylines-h"><span class="verb"><span class="cmtt-10">JA</span></span></span> with <span class="obeylines-h"><span class="verb"><span
@ -302,7 +304,7 @@ class="cmtt-10">M</span></span></span> rows and <span class="obeylines-h"><span
class="cmtt-10">MAXNZR</span></span></span> columns, where <span class="obeylines-h"><span class="verb"><span class="cmtt-10">MAXNZR</span></span></span> columns, where <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">MAXNZR</span></span></span> is the maximum class="cmtt-10">MAXNZR</span></span></span> is the maximum
number of nonzeros in any row&#x00A0;<span class="cite">[<span number of nonzeros in any row&#x00A0;<span class="cite">[<span
class="cmbx-10">?</span>]</span>. Each row of the arrays <span class="obeylines-h"><span class="verb"><span class="pplb7t-">?</span>]</span>. Each row of the arrays <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">JA</span></span></span> contains the class="cmtt-10">JA</span></span></span> contains the
coefficients and column indices; rows shorter than <span class="obeylines-h"><span class="verb"><span coefficients and column indices; rows shorter than <span class="obeylines-h"><span class="verb"><span
@ -315,7 +317,7 @@ row.
<a <a
id="x19-149001r6"></a> id="x19-150001r6"></a>
@ -325,13 +327,13 @@ width="233" height="233" >
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Figure&#x00A0;6: </span><span ><span class="id">Figure&#x00A0;6: </span><span
class="content">ELLPACK compression of matrix in Figure&#x00A0;<a class="content">ELLPACK compression of matrix in Figure&#x00A0;<a
href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-149001r6 --> href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-150001r6 -->
<!--l. 225--><p class="indent" > </div><hr class="endfigure"> <!--l. 225--><p class="indent" > </div><hr class="endfigure">
<a <a
id="x19-149002r1"></a> id="x19-150002r1"></a>
@ -341,9 +343,8 @@ href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
<!--l. 231--> <!--l. 231-->
<pre class="lstlisting" id="listing-169"><span class="label"><a <pre class="lstlisting" id="listing-220"><span class="label"><a
id="x19-149003r1"></a></span><span id="x19-150003r1"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
@ -352,8 +353,7 @@ class="cmtt-9">i</span></span><span style="color:#000000"><span
class="cmtt-9">=1,</span></span><span style="color:#000000"><span class="cmtt-9">=1,</span></span><span style="color:#000000"><span
class="cmtt-9">n</span></span> class="cmtt-9">n</span></span>
<span class="label"><a <span class="label"><a
id="x19-149004r2"></a></span><span id="x19-150004r2"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
@ -362,8 +362,7 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">t</span></span><span style="color:#000000"><span class="cmtt-9">t</span></span><span style="color:#000000"><span
class="cmtt-9">=0</span></span> class="cmtt-9">=0</span></span>
<span class="label"><a <span class="label"><a
id="x19-149005r3"></a></span><span id="x19-150005r3"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
@ -374,8 +373,7 @@ class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">=1,</span></span><span style="color:#000000"><span class="cmtt-9">=1,</span></span><span style="color:#000000"><span
class="cmtt-9">maxnzr</span></span> class="cmtt-9">maxnzr</span></span>
<span class="label"><a <span class="label"><a
id="x19-149006r4"></a></span><span id="x19-150006r4"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
@ -403,8 +401,7 @@ class="cmtt-9">,</span></span><span style="color:#000000"><span
class="cmtt-9">j</span></span><span style="color:#000000"><span class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">))</span></span> class="cmtt-9">))</span></span>
<span class="label"><a <span class="label"><a
id="x19-149007r5"></a></span><span id="x19-150007r5"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
@ -413,8 +410,7 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">do</span></span> class="cmtt-9">do</span></span>
<span class="label"><a <span class="label"><a
id="x19-149008r6"></a></span><span id="x19-150008r6"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
@ -427,8 +423,7 @@ class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="c
class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">t</span></span> class="cmtt-9">t</span></span>
<span class="label"><a <span class="label"><a
id="x19-149009r7"></a></span><span id="x19-150009r7"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
@ -436,35 +431,36 @@ class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style=
class="cmtt-9">do</span></span></pre> class="cmtt-9">do</span></span></pre>
<a <a
id="x19-149010r1"></a> id="x19-150010r1"></a>
<a <a
id="x19-149011"></a> id="x19-150011"></a>
<span <span
class="cmbx-10">Algorithm</span><span class="pplb7t-">Algorithm</span><span
class="cmbx-10">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format class="pplb7t-">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format
</div><hr class="endfloat" /> </div><hr class="endfloat" />
<!--l. 242--><p class="indent" > The matrix-vector product <span <!--l. 242--><p class="indent" > The matrix-vector product <span
class="cmmi-10">y </span>= <span class="zplmr7m-">y </span><span
class="cmmi-10">Ax </span>can be computed with the code shown in class="zplmr7t-">= </span><span
class="zplmr7m-">Ax </span>can be computed with the code shown in
Alg.&#x00A0;<a Alg.&#x00A0;<a
href="#x19-149010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and href="#x19-150010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and
two floating-point operations per inner iteration. two floating-point operations per inner iteration.
<!--l. 247--><p class="indent" > Unless all rows have exactly the same number of nonzeros, some of the coefficients <!--l. 247--><p class="indent" > Unless all rows have exactly the same number of nonzeros, some of the
in the <span class="obeylines-h"><span class="verb"><span coefficients in the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> array will be zeros; therefore this data structure will have an overhead both class="cmtt-10">AS</span></span></span> array will be zeros; therefore this data structure will have an
in terms of memory space and redundant operations (multiplications by zero). The overhead both in terms of memory space and redundant operations (multiplications
overhead can be acceptable if: by zero). The overhead can be acceptable if:
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x19-149013x1"> class="enumerate" id="x19-150013x1">
<!--l. 253--><p class="noindent" >The maximum number of nonzeros per row is not much larger than the <!--l. 253--><p class="noindent" >The maximum number of nonzeros per row is not much larger than the
average; average;
</li> </li>
<li <li
class="enumerate" id="x19-149015x2"> class="enumerate" id="x19-150015x2">
<!--l. 255--><p class="noindent" >The regularity of the data structure allows for faster code, e.g. by allowing <!--l. 255--><p class="noindent" >The regularity of the data structure allows for faster code, e.g. by allowing
vectorization, thereby offsetting the additional storage requirements.</li></ol> vectorization, thereby offsetting the additional storage requirements.</li></ol>
<!--l. 259--><p class="noindent" >In the extreme case where the input matrix has one full row, the ELLPACK <!--l. 259--><p class="noindent" >In the extreme case where the input matrix has one full row, the ELLPACK
@ -492,62 +488,72 @@ class="cmtt-10">psb_T_ell_sparse_mat</span></span></span>:
</pre> </pre>
<!--l. 295--><p class="nopar" > </div></div> <!--l. 295--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a <h5 class="likesubsubsectionHead"><a
id="x19-150000"></a>Hacked ELLPACK</h5> id="x19-151000"></a>Hacked ELLPACK</h5>
<!--l. 303--><p class="noindent" >The <span
class="cmti-10">hacked ELLPACK </span>(<span
class="cmbx-10">HLL</span>) format alleviates the main problem of the ELLPACK
format, that is, the amount of memory required by padding for sparse matrices in
which the maximum row length is larger than the average.
<!--l. 303--><p class="noindent" >The <span
class="pplri7t-">hacked ELLPACK </span>(<span
class="pplb7t-">HLL</span>) format alleviates the main problem of the ELLPACK
format, that is, the amount of memory required by padding for sparse matrices in
which the maximum row length is larger than the average.
<!--l. 308--><p class="indent" > The number of elements allocated to padding is <!--l. 308--><p class="indent" > The number of elements allocated to padding is
[(<span <span
class="cmmi-10">m</span><span class="zplmr7t-">[(</span><span
class="cmsy-10">*</span><span class="zplmr7m-">m</span><span
class="cmmi-10">maxNR</span>) <span class="zplmr7y-">*</span><span
class="cmsy-10">- </span>(<span class="zplmr7m-">maxNR</span><span
class="cmmi-10">m</span><span class="zplmr7t-">) </span><span
class="cmsy-10">*</span><span class="zplmr7y-">- </span><span
class="cmmi-10">avgNR</span>) = <span class="zplmr7t-">(</span><span
class="cmmi-10">m</span><span class="zplmr7m-">m</span><span
class="cmsy-10">* </span>(<span class="zplmr7y-">*</span><span
class="cmmi-10">maxNR</span><span class="zplmr7m-">avgNR</span><span
class="cmsy-10">-</span><span class="zplmr7t-">) = </span><span
class="cmmi-10">avgNR</span>)] for both <span class="obeylines-h"><span class="verb"><span class="zplmr7m-">m</span><span
class="zplmr7y-">* </span><span
class="zplmr7t-">(</span><span
class="zplmr7m-">maxNR</span><span
class="zplmr7y-">-</span><span
class="zplmr7m-">avgNR</span><span
class="zplmr7t-">)] </span>for both <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">JA</span></span></span> arrays, class="cmtt-10">JA</span></span></span> arrays,
where <span where <span
class="cmmi-10">m </span>is equal to the number of rows of the matrix, <span class="zplmr7m-">m </span>is equal to the number of rows of the matrix, <span
class="cmmi-10">maxNR </span>is the maximum class="zplmr7m-">maxNR </span>is the maximum
number of nonzero elements in every row and <span number of nonzero elements in every row and <span
class="cmmi-10">avgNR </span>is the average number of class="zplmr7m-">avgNR </span>is the average number of
nonzeros. Therefore a single densely populated row can seriously affect the total size nonzeros. Therefore a single densely populated row can seriously affect the total size
of the allocation. of the allocation.
<!--l. 317--><p class="indent" > To limit this effect, in the HLL format we break the original matrix into equally <!--l. 317--><p class="indent" > To limit this effect, in the HLL format we break the original matrix into equally
sized groups of rows (called <span sized groups of rows (called <span
class="cmti-10">hacks</span>), and then store these groups as independent class="pplri7t-">hacks</span>), and then store these groups as independent
matrices in ELLPACK format. The groups can be arranged selecting rows in an matrices in ELLPACK format. The groups can be arranged selecting rows in an
arbitrarily manner; indeed, if the rows are sorted by decreasing number of nonzeros arbitrarily manner; indeed, if the rows are sorted by decreasing number of nonzeros
we obtain essentially the JAgged Diagonals format. If the rows are not in the original we obtain essentially the JAgged Diagonals format. If the rows are not in the original
order, then an additional vector <span order, then an additional vector <span
class="cmti-10">rIdx </span>is required, storing the actual row index for class="pplri7t-">rIdx </span>is required, storing the actual row index for each
each row in the data structure. row in the data structure.
<!--l. 327--><p class="indent" > The multiple ELLPACK-like buffers are stacked together inside a single, one <!--l. 327--><p class="indent" > The multiple ELLPACK-like buffers are stacked together inside a single, one
dimensional array; an additional vector <span dimensional array; an additional vector <span
class="cmti-10">hackOffsets </span>is provided to keep track of the class="pplri7t-">hackOffsets </span>is provided to keep track of the
individual submatrices. All hacks have the same number of rows <span individual submatrices. All hacks have the same number of rows <span
class="cmti-10">hackSize</span>; hence, the class="pplri7t-">hackSize</span>; hence, the
<span <span
class="cmti-10">hackOffsets </span>vector is an array of (<span class="pplri7t-">hackOffsets </span>vector is an array of <span
class="cmmi-10">m&#x2215;hackSize</span>) + 1 elements, each one pointing to class="zplmr7t-">(</span><span
the first index of a submatrix inside the stacked <span class="zplmr7m-">m</span><span
class="cmti-10">cM</span>/<span class="zplmr7t-">/</span><span
class="cmti-10">rP </span>buffers, plus an additional class="zplmr7m-">hackSize</span><span
class="zplmr7t-">) + </span>1 elements, each one pointing to the
first index of a submatrix inside the stacked <span
class="pplri7t-">cM</span>/<span
class="pplri7t-">rP </span>buffers, plus an additional
element pointing past the end of the last block, where the next one would begin. We element pointing past the end of the last block, where the next one would begin. We
thus have the property that the elements of the <span thus have the property that the elements of the <span
class="cmmi-10">k</span>-th <span class="zplmr7m-">k</span>-th <span
class="cmti-10">hack </span>are stored between class="pplri7t-">hack </span>are stored between
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">hackOffsets[k]</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">hackOffsets[k]</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">hackOffsets[k+1]</span></span></span>, similarly to what happens in the CSR class="cmtt-10">hackOffsets[k+1]</span></span></span>, similarly to what happens in the CSR
@ -558,7 +564,7 @@ format.
<a <a
id="x19-150001r7"></a> id="x19-151001r7"></a>
@ -568,7 +574,7 @@ width="248" height="248" >
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Figure&#x00A0;7: </span><span ><span class="id">Figure&#x00A0;7: </span><span
class="content">Hacked ELLPACK compression of matrix in Figure&#x00A0;<a class="content">Hacked ELLPACK compression of matrix in Figure&#x00A0;<a
href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-150001r7 --> href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-151001r7 -->
@ -595,9 +601,9 @@ class="cmtt-10">psb_T_hll_sparse_mat</span></span></span>:
</pre> </pre>
<!--l. 388--><p class="nopar" > </div></div> <!--l. 388--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a <h5 class="likesubsubsectionHead"><a
id="x19-151000"></a>Diagonal storage</h5> id="x19-152000"></a>Diagonal storage</h5>
<!--l. 396--><p class="noindent" >The DIAgonal (DIA) format (shown in Figure&#x00A0;<a <!--l. 396--><p class="noindent" >The DIAgonal (DIA) format (shown in Figure&#x00A0;<a
href="#x19-151001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span href="#x19-152001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> class="cmtt-10">AS</span></span></span>
containing in each column the coefficients along a diagonal of the matrix, and an containing in each column the coefficients along a diagonal of the matrix, and an
integer array <span class="obeylines-h"><span class="verb"><span integer array <span class="obeylines-h"><span class="verb"><span
@ -605,9 +611,10 @@ class="cmtt-10">OFFSET</span></span></span> that determines where each diagonal
class="cmtt-10">AS</span></span></span> class="cmtt-10">AS</span></span></span>
are padded with zeros as necessary. are padded with zeros as necessary.
<!--l. 402--><p class="indent" > The code to compute the matrix-vector product <span <!--l. 402--><p class="indent" > The code to compute the matrix-vector product <span
class="cmmi-10">y </span>= <span class="zplmr7m-">y </span><span
class="cmmi-10">Ax </span>is shown in Alg.&#x00A0;<a class="zplmr7t-">= </span><span
href="#x19-151003r2">2<!--tex4ht:ref: alg:dia --></a>; it class="zplmr7m-">Ax </span>is shown in Alg.&#x00A0;<a
href="#x19-152003r2">2<!--tex4ht:ref: alg:dia --></a>; it
costs one memory read per outer iteration, plus three memory reads, one memory costs one memory read per outer iteration, plus three memory reads, one memory
write and two floating-point operations per inner iteration. The accesses to write and two floating-point operations per inner iteration. The accesses to
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
@ -620,7 +627,7 @@ required.
<a <a
id="x19-151001r8"></a> id="x19-152001r8"></a>
@ -630,13 +637,13 @@ width="248" height="248" >
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Figure&#x00A0;8: </span><span ><span class="id">Figure&#x00A0;8: </span><span
class="content">DIA compression of matrix in Figure&#x00A0;<a class="content">DIA compression of matrix in Figure&#x00A0;<a
href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-151001r8 --> href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-152001r8 -->
<!--l. 419--><p class="indent" > </div><hr class="endfigure"> <!--l. 419--><p class="indent" > </div><hr class="endfigure">
<a <a
id="x19-151002r2"></a> id="x19-152002r2"></a>
@ -662,12 +669,12 @@ href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
</pre> </pre>
<!--l. 450--><p class="nopar" > </div></div> <!--l. 450--><p class="nopar" > </div></div>
<a <a
id="x19-151003r2"></a> id="x19-152003r2"></a>
<a <a
id="x19-151004"></a> id="x19-152004"></a>
<span <span
class="cmbx-10">Algorithm</span><span class="pplb7t-">Algorithm</span><span
class="cmbx-10">&#x00A0;2:</span>&#x00A0; Matrix-Vector product in DIA format class="pplb7t-">&#x00A0;2:</span>&#x00A0; Matrix-Vector product in DIA format
@ -691,7 +698,7 @@ class="cmtt-10">psb_T_dia_sparse_mat</span></span></span>:
</pre> </pre>
<!--l. 486--><p class="nopar" > </div></div> <!--l. 486--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a <h5 class="likesubsubsectionHead"><a
id="x19-152000"></a>Hacked DIA</h5> id="x19-153000"></a>Hacked DIA</h5>
<!--l. 495--><p class="noindent" >Storage by DIAgonals is an attractive option for matrices whose coefficients are <!--l. 495--><p class="noindent" >Storage by DIAgonals is an attractive option for matrices whose coefficients are
located on a small set of diagonals, since they do away with storing explicitly the located on a small set of diagonals, since they do away with storing explicitly the
indices and therefore reduce significantly memory traffic. However, having a few indices and therefore reduce significantly memory traffic. However, having a few
@ -705,40 +712,44 @@ class="cmtt-10">y</span></span></span> is too large to remain
in the cache memory, the associated cache miss penalty is paid multiple in the cache memory, the associated cache miss penalty is paid multiple
times. times.
<!--l. 507--><p class="indent" > The <span <!--l. 507--><p class="indent" > The <span
class="cmti-10">hacked DIA </span>(<span class="pplri7t-">hacked DIA </span>(<span
class="cmbx-10">HDIA</span>) format was designed to contain the amount of padding, class="pplb7t-">HDIA</span>) format was designed to contain the amount of padding,
by breaking the original matrix into equally sized groups of rows (<span by breaking the original matrix into equally sized groups of rows (<span
class="cmti-10">hacks</span>), and then class="pplri7t-">hacks</span>), and then
storing these groups as independent matrices in DIA format. This approach is similar storing these groups as independent matrices in DIA format. This approach is
to that of HLL, and requires using an offset vector for each submatrix. Again, similar to that of HLL, and requires using an offset vector for each submatrix. Again,
similarly to HLL, the various submatrices are stacked inside a linear array to similarly to HLL, the various submatrices are stacked inside a linear array to
improve memory management. The fact that the matrix is accessed in slices improve memory management. The fact that the matrix is accessed in slices
helps in reducing cache misses, especially regarding accesses to the vector helps in reducing cache misses, especially regarding accesses to the vector
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">y</span></span></span>. class="cmtt-10">y</span></span></span>.
<!--l. 519--><p class="indent" > An additional vector <span <!--l. 519--><p class="indent" > An additional vector <span
class="cmti-10">hackOffsets </span>is provided to complete the matrix format; given class="pplri7t-">hackOffsets </span>is provided to complete the matrix format; given
that <span that <span
class="cmti-10">hackSize </span>is the number of rows of each hack, the <span class="pplri7t-">hackSize </span>is the number of rows of each hack, the <span
class="cmti-10">hackOffsets </span>vector is made by class="pplri7t-">hackOffsets </span>vector is made by an
an array of (<span array of <span
class="cmmi-10">m&#x2215;hackSize</span>) + 1 elements, pointing to the first diagonal offset of a class="zplmr7t-">(</span><span
class="zplmr7m-">m</span><span
class="zplmr7t-">/</span><span
class="zplmr7m-">hackSize</span><span
class="zplmr7t-">) + </span>1 elements, pointing to the first diagonal offset of a
submatrix inside the stacked <span submatrix inside the stacked <span
class="cmti-10">offsets </span>buffers, plus an additional element equal to the class="pplri7t-">offsets </span>buffers, plus an additional element equal to the
number of nonzero diagonals in the whole matrix. We thus have the property that number of nonzero diagonals in the whole matrix. We thus have the property
the number of diagonals of the <span that the number of diagonals of the <span
class="cmmi-10">k</span>-th <span class="zplmr7m-">k</span>-th <span
class="cmti-10">hack </span>is given by <span class="pplri7t-">hack </span>is given by <span
class="cmti-10">hackOffsets[k+1] -</span> class="pplri7t-">hackOffsets[k+1] -</span>
<span <span
class="cmti-10">hackOffsets[k]</span>. class="pplri7t-">hackOffsets[k]</span>.
<!--l. 529--><p class="indent" > <hr class="figure"><div class="figure" <!--l. 529--><p class="indent" > <hr class="figure"><div class="figure"
> >
<a <a
id="x19-152001r9"></a> id="x19-153001r9"></a>
@ -748,7 +759,7 @@ width="248" height="248" >
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Figure&#x00A0;9: </span><span ><span class="id">Figure&#x00A0;9: </span><span
class="content">Hacked DIA compression of matrix in Figure&#x00A0;<a class="content">Hacked DIA compression of matrix in Figure&#x00A0;<a
href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-152001r9 --> href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-153001r9 -->
@ -792,8 +803,8 @@ class="cmtt-10">psb_T_hdia_sparse_mat</span></span></span>:
<h4 class="subsectionHead"><span class="titlemark">12.4 </span> <a <h4 class="subsectionHead"><span class="titlemark">12.4 </span> <a
id="x19-15300012.4"></a>CUDA-class extensions</h4> id="x19-15400012.4"></a>CUDA-class extensions</h4>
<!--l. 4--><p class="noindent" >For computing with CUDA we define a dual memorization strategy in which each <!--l. 4--><p class="noindent" >For computing with CUDA we define a dual memorization strategy in which each
variable on the CPU (&#8220;host&#8221;) side has a GPU (&#8220;device&#8221;) side. When a GPU-type variable on the CPU (&#8220;host&#8221;) side has a GPU (&#8220;device&#8221;) side. When a GPU-type
variable is initialized, the data contained is (usually) the same on both sides. Each variable is initialized, the data contained is (usually) the same on both sides. Each
@ -801,10 +812,10 @@ operator invoked on the variable may change the data so that only the host side
the device side are up-to-date. the device side are up-to-date.
<!--l. 11--><p class="indent" > Keeping track of the updates to data in the variables is essential: we want to <!--l. 11--><p class="indent" > Keeping track of the updates to data in the variables is essential: we want to
perform most computations on the GPU, but we cannot afford the time needed to perform most computations on the GPU, but we cannot afford the time needed to
move data between the host memory and the device memory because the bandwidth move data between the host memory and the device memory because the
of the interconnection bus would become the main bottleneck of the computation. bandwidth of the interconnection bus would become the main bottleneck of the
Thus, each and every computational routine in the library is built according to the computation. Thus, each and every computational routine in the library is built
following principles: according to the following principles:
<ul class="itemize1"> <ul class="itemize1">
<li class="itemize"> <li class="itemize">
<!--l. 18--><p class="noindent" >If the data type being handled is GPU-enabled, make sure that its device <!--l. 18--><p class="noindent" >If the data type being handled is GPU-enabled, make sure that its device
@ -818,20 +829,20 @@ following principles:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 25--><p class="noindent" > <!--l. 25--><p class="noindent" >
<span <span
class="cmbx-10">explicitly</span> </dt><dd class="pplb7t-">explicitly</span> </dt><dd
class="description"> class="description">
<!--l. 25--><p class="noindent" >by invoking a synchronization method; <!--l. 25--><p class="noindent" >by invoking a synchronization method;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 26--><p class="noindent" > <!--l. 26--><p class="noindent" >
<span <span
class="cmbx-10">implicitly</span> </dt><dd class="pplb7t-">implicitly</span> </dt><dd
class="description"> class="description">
<!--l. 26--><p class="noindent" >by invoking a method that involves other data items that are not <!--l. 26--><p class="noindent" >by invoking a method that involves other data items that are not
GPU-enabled, e.g., by assignment ov a vector to a normal array.</dd></dl> GPU-enabled, e.g., by assignment ov a vector to a normal array.</dd></dl>
</li></ul> </li></ul>
<!--l. 31--><p class="noindent" >In this way, data items are put on the GPU memory &#8220;on demand&#8221; and remain there as <!--l. 31--><p class="noindent" >In this way, data items are put on the GPU memory &#8220;on demand&#8221; and remain there as
long as &#8220;normal&#8221; computations are carried out. As an example, the following call to a long as &#8220;normal&#8221; computations are carried out. As an example, the following call to
matrix-vector product a matrix-vector product
<div class="center" <div class="center"
> >
<!--l. 39--><p class="noindent" > <!--l. 39--><p class="noindent" >
@ -850,11 +861,11 @@ then
<!--l. 52--><p class="noindent" >The first kernel invocation will find the data in main memory, and will <!--l. 52--><p class="noindent" >The first kernel invocation will find the data in main memory, and will
copy it to the GPU memory, thus incurring a significant overhead; the copy it to the GPU memory, thus incurring a significant overhead; the
result is however <span result is however <span
class="cmti-10">not </span>copied back, and therefore: class="pplri7t-">not </span>copied back, and therefore:
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 56--><p class="noindent" >Subsequent kernel invocations involving the same vector will find the data <!--l. 56--><p class="noindent" >Subsequent kernel invocations involving the same vector will find the
on the GPU side so that they will run at full speed.</li></ul> data on the GPU side so that they will run at full speed.</li></ul>
<!--l. 60--><p class="noindent" >For all invocations after the first the only data that will have to be transferred to/from <!--l. 60--><p class="noindent" >For all invocations after the first the only data that will have to be transferred to/from
the main memory will be the scalars <code class="lstinline"><span style="color:#000000">alpha</span></code> and <code class="lstinline"><span style="color:#000000">beta</span></code>, and the return code the main memory will be the scalars <code class="lstinline"><span style="color:#000000">alpha</span></code> and <code class="lstinline"><span style="color:#000000">beta</span></code>, and the return code
<code class="lstinline"><span style="color:#000000">info</span></code>. <code class="lstinline"><span style="color:#000000">info</span></code>.
@ -862,7 +873,7 @@ the main memory will be the scalars <code class="lstinline"><span style="color:#
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 65--><p class="noindent" > <!--l. 65--><p class="noindent" >
<span <span
class="cmbx-10">Vectors:</span> </dt><dd class="pplb7t-">Vectors:</span> </dt><dd
class="description"> class="description">
<!--l. 65--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_vect_gpu</span></code> provides a GPU-enabled extension of <!--l. 65--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_vect_gpu</span></code> provides a GPU-enabled extension of
the inner type <code class="lstinline"><span style="color:#000000">psb_T_base_vect_type</span></code>, and must be used together with the inner type <code class="lstinline"><span style="color:#000000">psb_T_base_vect_type</span></code>, and must be used together with
@ -871,23 +882,23 @@ class="description">
</dd><dt class="description"> </dd><dt class="description">
<!--l. 69--><p class="noindent" > <!--l. 69--><p class="noindent" >
<span <span
class="cmbx-10">CSR:</span> </dt><dd class="pplb7t-">CSR:</span> </dt><dd
class="description"> class="description">
<!--l. 69--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_csrg_sparse_mat</span></code> provides an interface to the GPU <!--l. 69--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_csrg_sparse_mat</span></code> provides an interface to the GPU
version of CSR available in the NVIDIA CuSPARSE library; version of CSR available in the NVIDIA CuSPARSE library;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 72--><p class="noindent" > <!--l. 72--><p class="noindent" >
<span <span
class="cmbx-10">HYB:</span> </dt><dd class="pplb7t-">HYB:</span> </dt><dd
class="description"> class="description">
<!--l. 72--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hybg_sparse_mat</span></code> provides an interface to the HYB <!--l. 72--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hybg_sparse_mat</span></code> provides an interface to the HYB
GPU storage available in the NVIDIA CuSPARSE library. The internal GPU storage available in the NVIDIA CuSPARSE library. The internal
structure is opaque, hence the host side is just CSR; the HYB data format structure is opaque, hence the host side is just CSR; the HYB data format
is only available up to CUDA version 10. is only available up to CUDA version 10.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 77--><p class="noindent" > <!--l. 77--><p class="noindent" >
<span <span
class="cmbx-10">ELL:</span> </dt><dd class="pplb7t-">ELL:</span> </dt><dd
class="description"> class="description">
<!--l. 77--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_elg_sparse_mat</span></code> provides an interface to the <!--l. 77--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_elg_sparse_mat</span></code> provides an interface to the
ELLPACK implementation from SPGPU; ELLPACK implementation from SPGPU;
@ -897,14 +908,14 @@ class="description">
</dd><dt class="description"> </dd><dt class="description">
<!--l. 80--><p class="noindent" > <!--l. 80--><p class="noindent" >
<span <span
class="cmbx-10">HLL:</span> </dt><dd class="pplb7t-">HLL:</span> </dt><dd
class="description"> class="description">
<!--l. 80--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hlg_sparse_mat</span></code> provides an interface to the Hacked <!--l. 80--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hlg_sparse_mat</span></code> provides an interface to the
ELLPACK implementation from SPGPU; Hacked ELLPACK implementation from SPGPU;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 82--><p class="noindent" > <!--l. 82--><p class="noindent" >
<span <span
class="cmbx-10">HDIA:</span> </dt><dd class="pplb7t-">HDIA:</span> </dt><dd
class="description"> class="description">
<!--l. 82--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hdiag_sparse_mat</span></code> provides an interface to the <!--l. 82--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hdiag_sparse_mat</span></code> provides an interface to the
Hacked DIAgonals implementation from SPGPU;</dd></dl> Hacked DIAgonals implementation from SPGPU;</dd></dl>

@ -15,13 +15,13 @@ href="userhtmlse12.html" >prev</a>] [<a
href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail</a>] [<a href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail</a>] [<a
href="userhtmlse10.html#tailuserhtmlse13.html">tail</a>] [<a href="userhtmlse10.html#tailuserhtmlse13.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div> href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">13 </span> <a <h3 class="sectionHead"><span class="titlemark">13 </span> <a
id="x20-15400013"></a>CUDA Environment Routines</h3> id="x20-15500013"></a>CUDA Environment Routines</h3>
<!--l. 91--><p class="noindent" > <!--l. 91--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-155000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4> id="x20-156000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4>
<a <a
id="Q1-20-192"></a> id="Q1-20-194"></a>
<div class="center" <div class="center"
> >
<!--l. 99--><p class="noindent" > <!--l. 99--><p class="noindent" >
@ -33,44 +33,44 @@ call&#x00A0;psb_cuda_init(ctxt&#x00A0;[,&#x00A0;device])
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 110--><p class="noindent" > <!--l. 110--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 110--><p class="noindent" >Synchronous. <!--l. 110--><p class="noindent" >Synchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 111--><p class="noindent" > <!--l. 111--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 111--><p class="noindent" > <!--l. 111--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 112--><p class="noindent" > <!--l. 112--><p class="noindent" >
<span <span
class="cmbx-10">device</span> </dt><dd class="pplb7t-">device</span> </dt><dd
class="description"> class="description">
<!--l. 112--><p class="noindent" >ID of CUDA device to attach to.<br <!--l. 112--><p class="noindent" >ID of CUDA device to attach to.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local</span>.<br class="pplb7t-">local</span>.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: an integer value. &#x00A0;Default: use <code class="lstinline"><span style="color:#000000">mod</span><span style="color:#000000">(</span><span style="color:#000000">iam</span><span style="color:#000000">,</span><span style="color:#000000">ngpu</span><span style="color:#000000">)</span></code> where <code class="lstinline"><span style="color:#000000">iam</span></code> is class="newline" />Specified as: an integer value. &#x00A0;Default: use <code class="lstinline"><span style="color:#000000">mod</span><span style="color:#000000">(</span><span style="color:#000000">iam</span><span style="color:#000000">,</span><span style="color:#000000">ngpu</span><span style="color:#000000">)</span></code> where <code class="lstinline"><span style="color:#000000">iam</span></code> is
the calling process index and <code class="lstinline"><span style="color:#000000">ngpu</span></code> is the total number of CUDA devices the calling process index and <code class="lstinline"><span style="color:#000000">ngpu</span></code> is the total number of CUDA devices
available on the current node.</dd></dl> available on the current node.</dd></dl>
<!--l. 123--><p class="noindent" ><span <!--l. 123--><p class="noindent" ><span
class="cmbx-12">Notes</span> class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x20-155002x1"> class="enumerate" id="x20-156002x1">
<!--l. 125--><p class="noindent" >A call to this routine must precede any other PSBLAS-CUDA call.</li></ol> <!--l. 125--><p class="noindent" >A call to this routine must precede any other PSBLAS-CUDA call.</li></ol>
<!--l. 129--><p class="noindent" > <!--l. 129--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-156000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4> id="x20-157000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4>
<a <a
id="Q1-20-194"></a> id="Q1-20-196"></a>
<div class="center" <div class="center"
> >
<!--l. 137--><p class="noindent" > <!--l. 137--><p class="noindent" >
@ -82,33 +82,33 @@ call&#x00A0;psb_cuda_exit(ctxt)
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 148--><p class="noindent" > <!--l. 148--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 148--><p class="noindent" >Synchronous. <!--l. 148--><p class="noindent" >Synchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 149--><p class="noindent" > <!--l. 149--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 149--><p class="noindent" > <!--l. 149--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 150--><p class="noindent" > <!--l. 150--><p class="noindent" >
<span <span
class="cmbx-10">ctxt</span> </dt><dd class="pplb7t-">ctxt</span> </dt><dd
class="description"> class="description">
<!--l. 150--><p class="noindent" >the communication context identifying the virtual parallel machine.<br <!--l. 150--><p class="noindent" >the communication context identifying the virtual parallel machine.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global</span>.<br class="pplb7t-">global</span>.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span>.<br class="pplb7t-">required</span>.<br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: an integer variable.</dd></dl> class="newline" />Specified as: an integer variable.</dd></dl>
<!--l. 161--><p class="noindent" > <!--l. 161--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-157000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4> id="x20-158000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4>
<a <a
id="Q1-20-196"></a> id="Q1-20-198"></a>
@ -123,9 +123,9 @@ call&#x00A0;psb_cuda_DeviceSync()
CUDA-side code, have completed. CUDA-side code, have completed.
<!--l. 182--><p class="noindent" > <!--l. 182--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-158000"></a>psb_cuda_getDeviceCount </h4> id="x20-159000"></a>psb_cuda_getDeviceCount </h4>
<a <a
id="Q1-20-198"></a> id="Q1-20-200"></a>
<div class="center" <div class="center"
> >
<!--l. 190--><p class="noindent" > <!--l. 190--><p class="noindent" >
@ -136,9 +136,9 @@ ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDeviceCount()
<!--l. 199--><p class="noindent" >Get number of devices available on current computing node. <!--l. 199--><p class="noindent" >Get number of devices available on current computing node.
<!--l. 201--><p class="noindent" > <!--l. 201--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-159000"></a>psb_cuda_getDevice </h4> id="x20-160000"></a>psb_cuda_getDevice </h4>
<a <a
id="Q1-20-200"></a> id="Q1-20-202"></a>
<div class="center" <div class="center"
> >
<!--l. 209--><p class="noindent" > <!--l. 209--><p class="noindent" >
@ -147,14 +147,14 @@ ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice()
</pre> </pre>
<!--l. 213--><p class="nopar" > </div></div> <!--l. 213--><p class="nopar" > </div></div>
<!--l. 218--><p class="noindent" >Get device in use by current process. <!--l. 218--><p class="noindent" >Get device in use by current process.
<!--l. 220--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-160000"></a>psb_cuda_setDevice </h4>
<a
id="Q1-20-202"></a>
<!--l. 220--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-161000"></a>psb_cuda_setDevice </h4>
<a
id="Q1-20-204"></a>
<div class="center" <div class="center"
> >
<!--l. 228--><p class="noindent" > <!--l. 228--><p class="noindent" >
@ -165,9 +165,9 @@ info&#x00A0;=&#x00A0;psb_cuda_setDevice(dev)
<!--l. 237--><p class="noindent" >Set device to be used by current process. <!--l. 237--><p class="noindent" >Set device to be used by current process.
<!--l. 239--><p class="noindent" > <!--l. 239--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-161000"></a>psb_cuda_DeviceHasUVA </h4> id="x20-162000"></a>psb_cuda_DeviceHasUVA </h4>
<a <a
id="Q1-20-204"></a> id="Q1-20-206"></a>
<div class="center" <div class="center"
> >
<!--l. 247--><p class="noindent" > <!--l. 247--><p class="noindent" >
@ -178,9 +178,9 @@ hasUva&#x00A0;=&#x00A0;psb_cuda_DeviceHasUVA()
<!--l. 256--><p class="noindent" >Returns true if device currently in use supports UVA (Unified Virtual Addressing). <!--l. 256--><p class="noindent" >Returns true if device currently in use supports UVA (Unified Virtual Addressing).
<!--l. 259--><p class="noindent" > <!--l. 259--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-162000"></a>psb_cuda_WarpSize </h4> id="x20-163000"></a>psb_cuda_WarpSize </h4>
<a <a
id="Q1-20-206"></a> id="Q1-20-208"></a>
<div class="center" <div class="center"
> >
<!--l. 267--><p class="noindent" > <!--l. 267--><p class="noindent" >
@ -189,14 +189,14 @@ nw&#x00A0;=&#x00A0;psb_cuda_WarpSize()
</pre> </pre>
<!--l. 271--><p class="nopar" > </div></div> <!--l. 271--><p class="nopar" > </div></div>
<!--l. 276--><p class="noindent" >Returns the warp size. <!--l. 276--><p class="noindent" >Returns the warp size.
<!--l. 279--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-163000"></a>psb_cuda_MultiProcessors </h4>
<a
id="Q1-20-208"></a>
<!--l. 279--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-164000"></a>psb_cuda_MultiProcessors </h4>
<a
id="Q1-20-210"></a>
<div class="center" <div class="center"
> >
<!--l. 287--><p class="noindent" > <!--l. 287--><p class="noindent" >
@ -207,9 +207,9 @@ nmp&#x00A0;=&#x00A0;psb_cuda_MultiProcessors()
<!--l. 296--><p class="noindent" >Returns the number of multiprocessors in the CUDA device. <!--l. 296--><p class="noindent" >Returns the number of multiprocessors in the CUDA device.
<!--l. 298--><p class="noindent" > <!--l. 298--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-164000"></a>psb_cuda_MaxThreadsPerMP </h4> id="x20-165000"></a>psb_cuda_MaxThreadsPerMP </h4>
<a <a
id="Q1-20-210"></a> id="Q1-20-212"></a>
<div class="center" <div class="center"
> >
<!--l. 306--><p class="noindent" > <!--l. 306--><p class="noindent" >
@ -220,9 +220,9 @@ nt&#x00A0;=&#x00A0;psb_cuda_MaxThreadsPerMP()
<!--l. 315--><p class="noindent" >Returns the maximum number of threads per multiprocessor. <!--l. 315--><p class="noindent" >Returns the maximum number of threads per multiprocessor.
<!--l. 318--><p class="noindent" > <!--l. 318--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-165000"></a>psb_cuda_MaxRegistersPerBlock </h4> id="x20-166000"></a>psb_cuda_MaxRegistersPerBlock </h4>
<a <a
id="Q1-20-212"></a> id="Q1-20-214"></a>
<div class="center" <div class="center"
> >
<!--l. 326--><p class="noindent" > <!--l. 326--><p class="noindent" >
@ -231,14 +231,14 @@ nr&#x00A0;=&#x00A0;psb_cuda_MaxRegistersPerBlock()
</pre> </pre>
<!--l. 330--><p class="nopar" > </div></div> <!--l. 330--><p class="nopar" > </div></div>
<!--l. 335--><p class="noindent" >Returns the maximum number of register per thread block. <!--l. 335--><p class="noindent" >Returns the maximum number of register per thread block.
<!--l. 338--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-166000"></a>psb_cuda_MemoryClockRate </h4>
<a
id="Q1-20-214"></a>
<!--l. 338--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-167000"></a>psb_cuda_MemoryClockRate </h4>
<a
id="Q1-20-216"></a>
<div class="center" <div class="center"
> >
<!--l. 346--><p class="noindent" > <!--l. 346--><p class="noindent" >
@ -249,9 +249,9 @@ cl&#x00A0;=&#x00A0;psb_cuda_MemoryClockRate()
<!--l. 355--><p class="noindent" >Returns the memory clock rate in KHz, as an integer. <!--l. 355--><p class="noindent" >Returns the memory clock rate in KHz, as an integer.
<!--l. 357--><p class="noindent" > <!--l. 357--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-167000"></a>psb_cuda_MemoryBusWidth </h4> id="x20-168000"></a>psb_cuda_MemoryBusWidth </h4>
<a <a
id="Q1-20-216"></a> id="Q1-20-218"></a>
<div class="center" <div class="center"
> >
<!--l. 365--><p class="noindent" > <!--l. 365--><p class="noindent" >
@ -262,9 +262,9 @@ nb&#x00A0;=&#x00A0;psb_cuda_MemoryBusWidth()
<!--l. 374--><p class="noindent" >Returns the memory bus width in bits. <!--l. 374--><p class="noindent" >Returns the memory bus width in bits.
<!--l. 376--><p class="noindent" > <!--l. 376--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-168000"></a>psb_cuda_MemoryPeakBandwidth </h4> id="x20-169000"></a>psb_cuda_MemoryPeakBandwidth </h4>
<a <a
id="Q1-20-218"></a> id="Q1-20-220"></a>
<div class="center" <div class="center"
> >
<!--l. 384--><p class="noindent" > <!--l. 384--><p class="noindent" >
@ -282,7 +282,7 @@ bw&#x00A0;=&#x00A0;psb_cuda_MemoryPeakBandwidth()
<!--l. 126--><p class="indent" > <!--l. 134--><p class="indent" >

@ -16,26 +16,26 @@ href="userhtmlse1.html" >prev</a>] [<a
href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a
href="#tailuserhtmlse2.html">tail</a>] [<a href="#tailuserhtmlse2.html">tail</a>] [<a
href="userhtml.html#userhtmlse2.html" >up</a>] </p></div> href="userhtml.html#userhtmlse2.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">2 </span> <a <h3 class="sectionHead"><span class="titlemark">2 </span> <a
id="x4-30002"></a>General overview</h3> id="x4-30002"></a>General overview</h3>
<!--l. 74--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for <!--l. 74--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for
sparse linear systems on distributed memory parallel computers. The system sparse linear systems on distributed memory parallel computers. The system
coefficient matrix <span coefficient matrix <span
class="cmmi-10">A </span>must be square; it may be real or complex, nonsymmetric, and class="zplmr7m-">A </span>must be square; it may be real or complex, nonsymmetric, and
its sparsity pattern needs not to be symmetric. The serial computation parts are its sparsity pattern needs not to be symmetric. The serial computation parts are
based on the serial sparse BLAS, so that any extension made to the data structures based on the serial sparse BLAS, so that any extension made to the data structures of
of the serial kernels is available to the parallel version. The overall design and the serial kernels is available to the parallel version. The overall design and
parallelization strategy have been influenced by the structure of the ScaLAPACK parallelization strategy have been influenced by the structure of the ScaLAPACK
parallel library. The layered structure of the PSBLAS library is shown in figure&#x00A0;<a parallel library. The layered structure of the PSBLAS library is shown in figure&#x00A0;<a
href="#x4-3001r1">1<!--tex4ht:ref: fig:psblas --></a>; href="#x4-3001r1">1<!--tex4ht:ref: fig:psblas --></a>;
lower layers of the library indicate an encapsulation relationship with upper lower layers of the library indicate an encapsulation relationship with upper layers.
layers. The ongoing discussion focuses on the Fortran&#x00A0;2003 layer immediately The ongoing discussion focuses on the Fortran&#x00A0;2003 layer immediately
below the application layer. The serial parts of the computation on each below the application layer. The serial parts of the computation on each
process are executed through calls to the serial sparse BLAS subroutines. In a process are executed through calls to the serial sparse BLAS subroutines. In a
similar way, the inter-process message exchanges are encapsulated in an similar way, the inter-process message exchanges are encapsulated in an
applicaiton layer that has been strongly inspired by the Basic Linear Algebra applicaiton layer that has been strongly inspired by the Basic Linear Algebra
Communication Subroutines (BLACS) library&#x00A0;<span class="cite">[<a Communication Subroutines (BLACS) library&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XBLACS">6</a>]</span>. Usually there is no need to deal href="userhtmlli2.html#XBLACS">7</a>]</span>. Usually there is no need to deal
directly with MPI; however, in some cases, MPI routines are used directly directly with MPI; however, in some cases, MPI routines are used directly
to improve efficiency. For further details on our communication layer see to improve efficiency. For further details on our communication layer see
Sec.&#x00A0;<a Sec.&#x00A0;<a
@ -76,7 +76,7 @@ mesh.
process that will own the corresponding row in the coefficient matrix and will process that will own the corresponding row in the coefficient matrix and will
carry out all related computations. This allocation strategy is equivalent to a carry out all related computations. This allocation strategy is equivalent to a
partition of the discretization mesh into <span partition of the discretization mesh into <span
class="cmti-10">sub-domains</span>. Our library supports any class="pplri7t-">sub-domains</span>. Our library supports any
distribution that keeps together the coefficients of each matrix row; there are no distribution that keeps together the coefficients of each matrix row; there are no
other constraints on the variable assignment. This choice is consistent with other constraints on the variable assignment. This choice is consistent with
simple data distributions such as <span class="obeylines-h"><span class="verb"><span simple data distributions such as <span class="obeylines-h"><span class="verb"><span
@ -85,7 +85,7 @@ class="cmtt-10">BLOCK</span></span></span>, as well as completely
arbitrary assignments of equation indices to processes. In particular it is arbitrary assignments of equation indices to processes. In particular it is
consistent with the usage of graph partitioning tools commonly available in consistent with the usage of graph partitioning tools commonly available in
the literature, e.g. METIS&#x00A0;<span class="cite">[<a the literature, e.g. METIS&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XMETIS">13</a>]</span>. Dense vectors conform to sparse matrices, href="userhtmlli2.html#XMETIS">14</a>]</span>. Dense vectors conform to sparse matrices,
that is, the entries of a vector follow the same distribution of the matrix that is, the entries of a vector follow the same distribution of the matrix
rows. rows.
<!--l. 146--><p class="indent" > We assume that the sparse matrix is built in parallel, where each process generates <!--l. 146--><p class="indent" > We assume that the sparse matrix is built in parallel, where each process generates
@ -94,35 +94,35 @@ node. However, it is possible to hold the entire matrix in one process and distr
explicitly<span class="footnote-mark"><a explicitly<span class="footnote-mark"><a
href="userhtml5.html#fn1x0"><sup class="textsuperscript">1</sup></a></span><a href="userhtml5.html#fn1x0"><sup class="textsuperscript">1</sup></a></span><a
id="x4-3002f1"></a> , id="x4-3002f1"></a> ,
even though the resulting memory bottleneck would make this option unattractive in even though the resulting memory bottleneck would make this option unattractive
most cases. in most cases.
<h4 class="subsectionHead"><span class="titlemark">2.1 </span> <a <h4 class="subsectionHead"><span class="titlemark">2.1 </span> <a
id="x4-40002.1"></a>Basic Nomenclature</h4> id="x4-40002.1"></a>Basic Nomenclature</h4>
<!--l. 158--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed <!--l. 158--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
memory machine is guided by the structure of the physical model, and specifically by memory machine is guided by the structure of the physical model, and specifically
the discretization mesh of the PDE. by the discretization mesh of the PDE.
<!--l. 163--><p class="indent" > Each point of the discretization mesh will have (at least) one associated <!--l. 163--><p class="indent" > Each point of the discretization mesh will have (at least) one associated
equation/variable, and therefore one index. We say that point <span equation/variable, and therefore one index. We say that point <span
class="cmmi-10">i </span><span class="zplmr7m-">i </span><span
class="cmti-10">depends </span>on point <span class="pplri7t-">depends </span>on point <span
class="cmmi-10">j </span>if class="zplmr7m-">j </span>if
the equation for a variable associated with <span the equation for a variable associated with <span
class="cmmi-10">i </span>contains a term in <span class="zplmr7m-">i </span>contains a term in <span
class="cmmi-10">j</span>, or equivalently if class="zplmr7m-">j</span>, or equivalently if
<span <span
class="cmmi-10">a</span><sub><span class="zplmr7m-">a</span><sub><span
class="cmmi-7">ij</span></sub><span class="zplmr7m-x-x-76">ij</span></sub><span
class="cmmi-10">&#x2260;</span>0. After the partition of the discretization mesh into <span class="zplmr7m-">&#x2260;</span>0. After the partition of the discretization mesh into <span
class="cmti-10">sub-domains </span>assigned class="pplri7t-">sub-domains </span>assigned
to the parallel processes, we classify the points of a given sub-domain as to the parallel processes, we classify the points of a given sub-domain as
following. following.
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 172--><p class="noindent" > <!--l. 172--><p class="noindent" >
<span <span
class="cmbx-10">Internal.</span> </dt><dd class="pplb7t-">Internal.</span> </dt><dd
class="description"> class="description">
<!--l. 172--><p class="noindent" >An internal point of a given domain <span <!--l. 172--><p class="noindent" >An internal point of a given domain <span
class="cmti-10">depends </span>only on points of the same class="pplri7t-">depends </span>only on points of the same
domain. If all points of a domain are assigned to one process, then domain. If all points of a domain are assigned to one process, then
a computational step (e.g., a matrix-vector product) of the equations a computational step (e.g., a matrix-vector product) of the equations
@ -133,22 +133,22 @@ class="cmti-10">depends </span>only on points of the same
</dd><dt class="description"> </dd><dt class="description">
<!--l. 181--><p class="noindent" > <!--l. 181--><p class="noindent" >
<span <span
class="cmbx-10">Boundary.</span> </dt><dd class="pplb7t-">Boundary.</span> </dt><dd
class="description"> class="description">
<!--l. 181--><p class="noindent" >A point of a given domain is a boundary point if it <span <!--l. 181--><p class="noindent" >A point of a given domain is a boundary point if it <span
class="cmti-10">depends </span>on points class="pplri7t-">depends </span>on points
belonging to other domains. belonging to other domains.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 185--><p class="noindent" > <!--l. 185--><p class="noindent" >
<span <span
class="cmbx-10">Halo.</span> </dt><dd class="pplb7t-">Halo.</span> </dt><dd
class="description"> class="description">
<!--l. 185--><p class="noindent" >A halo point for a given domain is a point belonging to another domain <!--l. 185--><p class="noindent" >A halo point for a given domain is a point belonging to another domain
such that there is a boundary point which <span such that there is a boundary point which <span
class="cmti-10">depends </span>on it. Whenever performing class="pplri7t-">depends </span>on it. Whenever performing
a computational step, such as a matrix-vector product, the values associated a computational step, such as a matrix-vector product, the values associated
with halo points are requested from other domains. A boundary point of a with halo points are requested from other domains. A boundary point of
given domain is usually a halo point for some other domain<span class="footnote-mark"><a a given domain is usually a halo point for some other domain<span class="footnote-mark"><a
href="userhtml6.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a href="userhtml6.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a
id="x4-4001f2"></a> ; id="x4-4001f2"></a> ;
therefore the cardinality of the boundary points set denotes the amount therefore the cardinality of the boundary points set denotes the amount
@ -156,50 +156,53 @@ href="userhtml6.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a
</dd><dt class="description"> </dd><dt class="description">
<!--l. 198--><p class="noindent" > <!--l. 198--><p class="noindent" >
<span <span
class="cmbx-10">Overlap.</span> </dt><dd class="pplb7t-">Overlap.</span> </dt><dd
class="description"> class="description">
<!--l. 198--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any <!--l. 198--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any
operation that involves an overlap point has to be replicated for each operation that involves an overlap point has to be replicated for each
assignment.</dd></dl> assignment.</dd></dl>
<!--l. 202--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a <!--l. 202--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
feature of Domain Decomposition Schwarz preconditioners which are the subject of feature of Domain Decomposition Schwarz preconditioners which are the subject of
related research work&#x00A0;<span class="cite">[<a related research work&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#X2007c">3</a>,&#x00A0;<a href="userhtmlli2.html#X2007c">4</a>,&#x00A0;<a
href="userhtmlli2.html#X2007d">2</a>]</span>. href="userhtmlli2.html#X2007d">3</a>]</span>.
<!--l. 207--><p class="indent" > We denote the sets of internal, boundary and halo points for a given subdomain <!--l. 207--><p class="indent" > We denote the sets of internal, boundary and halo points for a given subdomain
by <span by <span
class="cmsy-10"><img class="zplmr7y-"><img
src="cmsy10-49.png" alt="I" class="10x-x-49" /></span>, <span src="zplmr7y-49.png" alt="I" class="x-x-49" /></span>, <span
class="cmsy-10"><img class="zplmr7y-"><img
src="cmsy10-42.png" alt="B" class="10x-x-42" /> </span>and <span src="zplmr7y-42.png" alt="B" class="x-x-42" /> </span>and <span
class="cmsy-10"><img class="zplmr7y-"><img
src="cmsy10-48.png" alt="H" class="10x-x-48" /></span>. Each subdomain is assigned to one process; each process usually owns src="zplmr7y-48.png" alt="H" class="x-x-48" /></span>. Each subdomain is assigned to one process; each process usually
one subdomain, although the user may choose to assign more than one subdomain to owns one subdomain, although the user may choose to assign more than one
a process. If each process <span subdomain to a process. If each process <span
class="cmmi-10">i </span>owns one subdomain, the number of rows in class="zplmr7m-">i </span>owns one subdomain, the number of rows
the local sparse matrix is <span in the local sparse matrix is <span
class="cmsy-10">|<img class="zplmr7y-">|<img
src="cmsy10-49.png" alt="I" class="10x-x-49" /></span><sub><span src="zplmr7y-49.png" alt="I" class="x-x-49" /></span><sub><span
class="cmmi-7">i</span></sub><span class="zplmr7m-x-x-76">i</span></sub><span
class="cmsy-10">| </span>+ <span class="zplmr7y-">| </span><span
class="cmsy-10">|<img class="zplmr7t-">+ </span><span
src="cmsy10-42.png" alt="B" class="10x-x-42" /></span><sub><span class="zplmr7y-">|<img
class="cmmi-7">i</span></sub><span src="zplmr7y-42.png" alt="B" class="x-x-42" /></span><sub><span
class="cmsy-10">|</span>, and the number of local columns (i.e. class="zplmr7m-x-x-76">i</span></sub><span
class="zplmr7y-">|</span>, and the number of local columns (i.e.
those for which there exists at least one non-zero entry in the local rows) is those for which there exists at least one non-zero entry in the local rows) is
<span <span
class="cmsy-10">|<img class="zplmr7y-">|<img
src="cmsy10-49.png" alt="I" class="10x-x-49" /></span><sub><span src="zplmr7y-49.png" alt="I" class="x-x-49" /></span><sub><span
class="cmmi-7">i</span></sub><span class="zplmr7m-x-x-76">i</span></sub><span
class="cmsy-10">| </span>+ <span class="zplmr7y-">| </span><span
class="cmsy-10">|<img class="zplmr7t-">+ </span><span
src="cmsy10-42.png" alt="B" class="10x-x-42" /></span><sub><span class="zplmr7y-">|<img
class="cmmi-7">i</span></sub><span src="zplmr7y-42.png" alt="B" class="x-x-42" /></span><sub><span
class="cmsy-10">| </span>+ <span class="zplmr7m-x-x-76">i</span></sub><span
class="cmsy-10">|<img class="zplmr7y-">| </span><span
src="cmsy10-48.png" alt="H" class="10x-x-48" /></span><sub><span class="zplmr7t-">+ </span><span
class="cmmi-7">i</span></sub><span class="zplmr7y-">|<img
class="cmsy-10">|</span>. src="zplmr7y-48.png" alt="H" class="x-x-48" /></span><sub><span
class="zplmr7m-x-x-76">i</span></sub><span
class="zplmr7y-">|</span>.
<!--l. 217--><p class="indent" > <hr class="figure"><div class="figure" <!--l. 217--><p class="indent" > <hr class="figure"><div class="figure"
> >
@ -226,13 +229,13 @@ class="content">Point classfication.</span></div><!--tex4ht:label?: x4-4003r2 --
<!--l. 229--><p class="indent" > This classification of mesh points guides the naming scheme that we adopted in <!--l. 229--><p class="indent" > This classification of mesh points guides the naming scheme that we adopted in
the library internals and in the data structures. We explicitly note that &#8220;Halo&#8221; points the library internals and in the data structures. We explicitly note that &#8220;Halo&#8221; points
are also often called &#8220;ghost&#8221; points in the literature. are also often called &#8220;ghost&#8221; points in the literature.
<h4 class="subsectionHead"><span class="titlemark">2.2 </span> <a <h4 class="subsectionHead"><span class="titlemark">2.2 </span> <a
id="x4-50002.2"></a>Library contents</h4> id="x4-50002.2"></a>Library contents</h4>
<!--l. 238--><p class="noindent" >The PSBLAS library consists of various classes of subroutines: <!--l. 238--><p class="noindent" >The PSBLAS library consists of various classes of subroutines:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 240--><p class="noindent" > <!--l. 240--><p class="noindent" >
<span <span
class="cmbx-10">Computational routines</span> </dt><dd class="pplb7t-">Computational routines</span> </dt><dd
class="description"> class="description">
<!--l. 240--><p class="noindent" >comprising: <!--l. 240--><p class="noindent" >comprising:
<ul class="itemize1"> <ul class="itemize1">
@ -253,13 +256,13 @@ class="description">
</dd><dt class="description"> </dd><dt class="description">
<!--l. 249--><p class="noindent" > <!--l. 249--><p class="noindent" >
<span <span
class="cmbx-10">Communication routines</span> </dt><dd class="pplb7t-">Communication routines</span> </dt><dd
class="description"> class="description">
<!--l. 249--><p class="noindent" >handling halo and overlap communications; <!--l. 249--><p class="noindent" >handling halo and overlap communications;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 251--><p class="noindent" > <!--l. 251--><p class="noindent" >
<span <span
class="cmbx-10">Data management and auxiliary routines</span> </dt><dd class="pplb7t-">Data management and auxiliary routines</span> </dt><dd
class="description"> class="description">
<!--l. 251--><p class="noindent" >including: <!--l. 251--><p class="noindent" >including:
<ul class="itemize1"> <ul class="itemize1">
@ -283,17 +286,17 @@ class="description">
</dd><dt class="description"> </dd><dt class="description">
<!--l. 259--><p class="noindent" > <!--l. 259--><p class="noindent" >
<span <span
class="cmbx-10">Preconditioner routines</span> </dt><dd class="pplb7t-">Preconditioner routines</span> </dt><dd
class="description"> class="description">
<!--l. 259--><p class="noindent" > <!--l. 259--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 260--><p class="noindent" > <!--l. 260--><p class="noindent" >
<span <span
class="cmbx-10">Iterative methods</span> </dt><dd class="pplb7t-">Iterative methods</span> </dt><dd
class="description"> class="description">
<!--l. 260--><p class="noindent" >a subset of Krylov subspace iterative methods</dd></dl> <!--l. 260--><p class="noindent" >a subset of Krylov subspace iterative methods</dd></dl>
<!--l. 263--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined in <!--l. 263--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined
the PSBLAS software package: in the PSBLAS software package:
<ul class="itemize1"> <ul class="itemize1">
<li class="itemize"> <li class="itemize">
<!--l. 266--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span <!--l. 266--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span
@ -341,15 +344,15 @@ as:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 288--><p class="noindent" > <!--l. 288--><p class="noindent" >
<span <span
class="cmbx-10">global</span> </dt><dd class="pplb7t-">global</span> </dt><dd
class="description"> class="description">
<!--l. 288--><p class="noindent" >For input arguments, the value must be the same on all processes <!--l. 288--><p class="noindent" >For input arguments, the value must be the same on all processes
participating in the subroutine call; for output arguments the value is participating in the subroutine call; for output arguments the value is
guaranteed to be the same. guaranteed to be the same.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 291--><p class="noindent" > <!--l. 291--><p class="noindent" >
<span <span
class="cmbx-10">local</span> </dt><dd class="pplb7t-">local</span> </dt><dd
class="description"> class="description">
<!--l. 291--><p class="noindent" >Each process has its own value(s) independently.</dd></dl> <!--l. 291--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
<!--l. 293--><p class="noindent" >To finish our general description, we define a version string with the constant <!--l. 293--><p class="noindent" >To finish our general description, we define a version string with the constant
@ -360,36 +363,36 @@ src="userhtml0x.png" alt="psb_version_string_
<!--l. 295--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span <!--l. 295--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">3.8.0</span></span></span> class="cmtt-10">3.8.0</span></span></span>
<!--l. 298--><p class="noindent" > <!--l. 298--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">2.3 </span> <a <h4 class="subsectionHead"><span class="titlemark">2.3 </span> <a
id="x4-60002.3"></a>Application structure</h4> id="x4-60002.3"></a>Application structure</h4>
<!--l. 301--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are <!--l. 301--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are
created and exist with reference to a discretized space to which there corresponds created and exist with reference to a discretized space to which there corresponds
an index space and a matrix sparsity pattern. As an example, consider a an index space and a matrix sparsity pattern. As an example, consider a
cell-centered finite-volume discretization of the Navier-Stokes equations on a cell-centered finite-volume discretization of the Navier-Stokes equations on a
simulation domain; the index space 1<span simulation domain; the index space 1<span
class="cmmi-10">&#x2026;</span><span class="zplmr7m-">&#x2026;</span><span
class="cmmi-10">n </span>is isomorphic to the set of cell centers, class="zplmr7m-">n </span>is isomorphic to the set of cell centers,
whereas the pattern of the associated linear system matrix is isomorphic to the whereas the pattern of the associated linear system matrix is isomorphic to the
adjacency graph imposed on the discretization mesh by the discretization adjacency graph imposed on the discretization mesh by the discretization
stencil. stencil.
<!--l. 311--><p class="indent" > Thus the first order of business is to establish an index space, and this is done <!--l. 311--><p class="indent" > Thus the first order of business is to establish an index space, and this is done
with a call to <span class="obeylines-h"><span class="verb"><span with a call to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span> in which we specify the size of the index space <span class="cmtt-10">psb_cdall</span></span></span> in which we specify the size of the index space <span
class="cmmi-10">n </span>and the class="zplmr7m-">n </span>and the
allocation of the elements of the index space to the various processes making up the allocation of the elements of the index space to the various processes making up the
MPI (virtual) parallel machine. MPI (virtual) parallel machine.
<!--l. 317--><p class="indent" > The index space is partitioned among processes, and this creates a mapping from <!--l. 317--><p class="indent" > The index space is partitioned among processes, and this creates a mapping from
the &#8220;global&#8221; numbering 1<span the &#8220;global&#8221; numbering 1<span
class="cmmi-10">&#x2026;</span><span class="zplmr7m-">&#x2026;</span><span
class="cmmi-10">n </span>to a numbering &#8220;local&#8221; to each process; each process <span class="zplmr7m-">n </span>to a numbering &#8220;local&#8221; to each process; each process <span
class="cmmi-10">i</span> class="zplmr7m-">i</span>
will own a certain subset 1<span will own a certain subset 1<span
class="cmmi-10">&#x2026;</span><span class="zplmr7m-">&#x2026;</span><span
class="cmmi-10">n</span><sub>row<sub><span class="zplmr7m-">n</span><sub>row<sub><span
class="cmmi-5">i</span></sub></sub>, each element of which corresponds to a certain class="zplmr7m-x-x-60">i</span></sub></sub>, each element of which corresponds to a certain
element of 1<span element of 1<span
class="cmmi-10">&#x2026;</span><span class="zplmr7m-">&#x2026;</span><span
class="cmmi-10">n</span>. The user does not set explicitly this mapping; when the application class="zplmr7m-">n</span>. The user does not set explicitly this mapping; when the application
needs to indicate to which element of the index space a certain item is related, needs to indicate to which element of the index space a certain item is related,
such as the row and column index of a matrix coefficient, it does so in the such as the row and column index of a matrix coefficient, it does so in the
&#8220;global&#8221; numbering, and the library will translate into the appropriate &#8220;local&#8221; &#8220;global&#8221; numbering, and the library will translate into the appropriate &#8220;local&#8221;
@ -398,8 +401,8 @@ numbering.
<!--l. 327--><p class="indent" > For a given index space 1<span <!--l. 327--><p class="indent" > For a given index space 1<span
class="cmmi-10">&#x2026;</span><span class="zplmr7m-">&#x2026;</span><span
class="cmmi-10">n </span>there are many possible associated topologies, i.e. class="zplmr7m-">n </span>there are many possible associated topologies, i.e.
many different discretization stencils; thus the description of the index space is not many different discretization stencils; thus the description of the index space is not
completed until the user has defined a sparsity pattern, either explicitly through completed until the user has defined a sparsity pattern, either explicitly through
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
@ -410,19 +413,20 @@ class="cmtt-10">psb_cdasb</span></span></span> and a sparse matrix with a call t
class="cmtt-10">psb_spasb</span></span></span>. After <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_spasb</span></span></span>. After <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span> each class="cmtt-10">psb_cdasb</span></span></span> each
process <span process <span
class="cmmi-10">i </span>will have defined a set of &#8220;halo&#8221; (or &#8220;ghost&#8221;) indices <span class="zplmr7m-">i </span>will have defined a set of &#8220;halo&#8221; (or &#8220;ghost&#8221;) indices <span
class="cmmi-10">n</span><sub>row<sub><span class="zplmr7m-">n</span><sub>row<sub><span
class="cmmi-5">i</span></sub></sub> + 1<span class="zplmr7m-x-x-60">i</span></sub></sub> <span
class="cmmi-10">&#x2026;</span><span class="zplmr7t-">+ </span>1<span
class="cmmi-10">n</span><sub>col<sub> class="zplmr7m-">&#x2026;</span><span
class="zplmr7m-">n</span><sub>col<sub>
<span <span
class="cmmi-5">i</span></sub></sub>, class="zplmr7m-x-x-60">i</span></sub></sub>,
denoting elements of the index space that are <span denoting elements of the index space that are <span
class="cmti-10">not </span>assigned to process <span class="pplri7t-">not </span>assigned to process <span
class="cmmi-10">i</span>; however the class="zplmr7m-">i</span>; however the
variables associated with them are needed to complete computations associated with variables associated with them are needed to complete computations associated with
the sparse matrix <span the sparse matrix <span
class="cmmi-10">A</span>, and thus they have to be fetched from (neighbouring) class="zplmr7m-">A</span>, and thus they have to be fetched from (neighbouring)
processes. The descriptor of the index space is built exactly for the purpose processes. The descriptor of the index space is built exactly for the purpose
of properly sequencing the communication steps required to achieve this of properly sequencing the communication steps required to achieve this
objective. objective.
@ -432,18 +436,18 @@ matrix/vector creation and linear system solution as follows:
<li <li
class="enumerate" id="x4-6002x1"> class="enumerate" id="x4-6002x1">
<!--l. 347--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span <!--l. 347--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span> class="cmtt-10">psb_init</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x4-6004x2"> class="enumerate" id="x4-6004x2">
<!--l. 348--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span <!--l. 348--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span> class="cmtt-10">psb_cdall</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x4-6006x3"> class="enumerate" id="x4-6006x3">
<!--l. 349--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span <!--l. 349--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geall</span></span></span> class="cmtt-10">psb_geall</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x4-6008x4"> class="enumerate" id="x4-6008x4">
@ -459,12 +463,12 @@ class="cmtt-10">psb_geins</span></span></span>
<li <li
class="enumerate" id="x4-6012x1"> class="enumerate" id="x4-6012x1">
<!--l. 355--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span <!--l. 355--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span> class="cmtt-10">psb_cdasb</span></span></span>,
</li> </li>
<li <li
class="enumerate" id="x4-6014x2"> class="enumerate" id="x4-6014x2">
<!--l. 356--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span <!--l. 356--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span> class="cmtt-10">psb_spasb</span></span></span>,
@ -472,147 +476,157 @@ class="cmtt-10">psb_spasb</span></span></span>
<li <li
class="enumerate" id="x4-6016x3"> class="enumerate" id="x4-6016x3">
<!--l. 357--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span <!--l. 357--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geasb</span></span></span></li></ol> class="cmtt-10">psb_geasb</span></span></span>;</li></ol>
</li> </li>
<li <li
class="enumerate" id="x4-6018x6"> class="enumerate" id="x4-6018x6">
<!--l. 359--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span <!--l. 359--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%init</span></span></span> and build it with class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%set</span></span></span>, and build it with
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%build</span></span></span><span class="footnote-mark"><a class="cmtt-10">prec%build</span></span></span><span class="footnote-mark"><a
href="userhtml7.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a href="userhtml7.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a
id="x4-6019f3"></a> . id="x4-6019f3"></a> ;
</li> </li>
<li <li
class="enumerate" id="x4-6022x7"> class="enumerate" id="x4-6022x7">
<!--l. 363--><p class="noindent" >Call the iterative driver <span class="obeylines-h"><span class="verb"><span <!--l. 364--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov</span></span></span> with the method of choice, e.g. class="cmtt-10">psb_krylov</span></span></span>
<span class="obeylines-h"><span class="verb"><span with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bicgstab</span></span></span>.</li></ol> class="cmtt-10">bicgstab</span></span></span>.</li></ol>
<!--l. 366--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span <!--l. 367--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">test/pargen/</span></span></span>. class="cmtt-10">test/pargen/</span></span></span>.
<!--l. 369--><p class="indent" > For a simulation in which the same discretization mesh is used over multiple time <!--l. 370--><p class="indent" > For a simulation in which the same discretization mesh is used over multiple
steps, the following structure may be more appropriate: time steps, the following structure may be more appropriate:
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x4-6024x1"> class="enumerate" id="x4-6024x1">
<!--l. 372--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span <!--l. 373--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span> class="cmtt-10">psb_init</span></span></span>
</li> </li>
<li <li
class="enumerate" id="x4-6026x2"> class="enumerate" id="x4-6026x2">
<!--l. 373--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span <!--l. 374--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span> class="cmtt-10">psb_cdall</span></span></span>
</li> </li>
<li <li
class="enumerate" id="x4-6028x3"> class="enumerate" id="x4-6028x3">
<!--l. 374--><p class="noindent" >Loop over the topology of the discretization mesh and build the descriptor <!--l. 375--><p class="noindent" >Loop over the topology of the discretization mesh and build the
with <span class="obeylines-h"><span class="verb"><span descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins</span></span></span> class="cmtt-10">psb_cdins</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x4-6030x4"> class="enumerate" id="x4-6030x4">
<!--l. 376--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span <!--l. 377--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span> class="cmtt-10">psb_cdasb</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x4-6032x5"> class="enumerate" id="x4-6032x5">
<!--l. 377--><p class="noindent" >Allocate the sparse matrices and dense vectors with <span class="obeylines-h"><span class="verb"><span <!--l. 378--><p class="noindent" >Allocate the sparse matrices and dense vectors with; <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spall</span></span></span> and class="cmtt-10">psb_spall</span></span></span> and
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geall</span></span></span> class="cmtt-10">psb_geall</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x4-6034x6"> class="enumerate" id="x4-6034x6">
<!--l. 379--><p class="noindent" >Loop over the time steps: <!--l. 380--><p class="noindent" >Loop over the time steps:
<ol class="enumerate2" > <ol class="enumerate2" >
<li <li
class="enumerate" id="x4-6036x1"> class="enumerate" id="x4-6036x1">
<!--l. 381--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span <!--l. 382--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_sprn</span></span></span>; class="cmtt-10">psb_sprn</span></span></span>;
also zero out the dense vectors; also zero out the dense vectors;
</li> </li>
<li <li
class="enumerate" id="x4-6038x2"> class="enumerate" id="x4-6038x2">
<!--l. 384--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update them <!--l. 385--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update
with <span class="obeylines-h"><span class="verb"><span them with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span> class="cmtt-10">psb_geins</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x4-6040x3"> class="enumerate" id="x4-6040x3">
<!--l. 386--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span <!--l. 387--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geasb</span></span></span> class="cmtt-10">psb_geasb</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x4-6042x4"> class="enumerate" id="x4-6042x4">
<!--l. 387--><p class="noindent" >Choose and build preconditioner with <span class="obeylines-h"><span class="verb"><span <!--l. 388--><p class="noindent" >
class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%build</span></span></span>
</li> </li>
<li <li
class="enumerate" id="x4-6044x5"> class="enumerate" id="x4-6044x5">
<!--l. 389--><p class="noindent" >Call the iterative method of choice, e.g. <span class="obeylines-h"><span class="verb"><span <!--l. 388--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_bicgstab</span></span></span></li></ol> class="cmtt-10">prec%init</span></span></span> and
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%set</span></span></span>, and build it with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%build</span></span></span>;
</li>
<li
class="enumerate" id="x4-6046x6">
<!--l. 391--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g.
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov</span></span></span> with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bicgstab</span></span></span>.</li></ol>
</li></ol> </li></ol>
<!--l. 392--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be <!--l. 395--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
called on the data that is actually allocated to the current process, i.e. each process called on the data that is actually allocated to the current process, i.e. each process
generates its own data. generates its own data.
<!--l. 397--><p class="indent" > In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span <!--l. 400--><p class="indent" > In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>, nor is there a class="cmtt-10">psb_spins</span></span></span>, nor is there a
requirement to build a matrix row in its entirety before calling the routine; this requirement to build a matrix row in its entirety before calling the routine; this
allows the application programmer to walk through the discretization mesh element allows the application programmer to walk through the discretization mesh element
by element, generating the main part of a given matrix row but also contributions to by element, generating the main part of a given matrix row but also contributions to
the rows corresponding to neighbouring elements. the rows corresponding to neighbouring elements.
<!--l. 404--><p class="indent" > From a functional point of view it is even possible to execute one call for each <!--l. 407--><p class="indent" > From a functional point of view it is even possible to execute one call for each
nonzero coefficient; however this would have a substantial computational nonzero coefficient; however this would have a substantial computational
overhead. It is therefore advisable to pack a certain amount of data into each overhead. It is therefore advisable to pack a certain amount of data into each
call to the insertion routine, say touching on a few tens of rows; the best call to the insertion routine, say touching on a few tens of rows; the best
performng value would depend on both the architecture of the computer being
used and on the problem structure. At the opposite extreme, it would be
possible to generate the entire part of a coefficient matrix residing on a
performng value would depend on both the architecture of the computer
being used and on the problem structure. At the opposite extreme, it would
be possible to generate the entire part of a coefficient matrix residing on a
process and pass it in a single call to <span class="obeylines-h"><span class="verb"><span process and pass it in a single call to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a
doubling of memory occupation, and thus would be almost always far from doubling of memory occupation, and thus would be almost always far from
optimal. optimal.
<!--l. 420--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">2.3.1 </span> <a
<!--l. 417--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">2.3.1 </span> <a
id="x4-70002.3.1"></a>User-defined index mappings</h5> id="x4-70002.3.1"></a>User-defined index mappings</h5>
<!--l. 419--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the <!--l. 422--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
constraints outlined in sec.&#x00A0;<a constraints outlined in sec.&#x00A0;<a
href="#x4-60002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>: href="#x4-60002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>:
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x4-7002x1"> class="enumerate" id="x4-7002x1">
<!--l. 422--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span <!--l. 425--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span
class="cmmi-10">&#x2026;</span><span class="zplmr7m-">&#x2026;</span><span
class="cmmi-10">n</span><sub>row<sub><span class="zplmr7m-">n</span><sub>row<sub><span
class="cmmi-5">i</span></sub></sub>; class="zplmr7m-x-x-60">i</span></sub></sub>;
</li> </li>
<li <li
class="enumerate" id="x4-7004x2"> class="enumerate" id="x4-7004x2">
<!--l. 424--><p class="noindent" >The set of halo points must be mapped to the set <span <!--l. 427--><p class="noindent" >The set of halo points must be mapped to the set <span
class="cmmi-10">n</span><sub>row<sub><span class="zplmr7m-">n</span><sub>row<sub><span
class="cmmi-5">i</span></sub></sub> + 1<span class="zplmr7m-x-x-60">i</span></sub></sub> <span
class="cmmi-10">&#x2026;</span><span class="zplmr7t-">+ </span>1<span
class="cmmi-10">n</span><sub>col<sub> class="zplmr7m-">&#x2026;</span><span
class="zplmr7m-">n</span><sub>col<sub>
<span <span
class="cmmi-5">i</span></sub></sub>;</li></ol> class="zplmr7m-x-x-60">i</span></sub></sub>;</li></ol>
<!--l. 427--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure <!--l. 430--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
consistency of this mapping; some errors may be caught by the library, but consistency of this mapping; some errors may be caught by the library, but
this is not guaranteed. The application structure to support this usage is as this is not guaranteed. The application structure to support this usage is as
follows: follows:
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x4-7006x1"> class="enumerate" id="x4-7006x1">
<!--l. 433--><p class="noindent" >Initialize index <!--l. 436--><p class="noindent" >Initialize index
space with <span class="obeylines-h"><span class="verb"><span space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the
vectors <span class="obeylines-h"><span class="verb"><span vectors <span class="obeylines-h"><span class="verb"><span
@ -622,66 +636,66 @@ class="cmtt-10">lidx(:)</span></span></span> containing the corresponding local
</li> </li>
<li <li
class="enumerate" id="x4-7008x2"> class="enumerate" id="x4-7008x2">
<!--l. 438--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span <!--l. 441--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja(:)</span></span></span> and their associated local indices <span class="obeylines-h"><span class="verb"><span class="cmtt-10">ja(:)</span></span></span> and their associated local indices <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">lidx(:)</span></span></span> with class="cmtt-10">lidx(:)</span></span></span>
a(some) call(s) to <span class="obeylines-h"><span class="verb"><span with a(some) call(s) to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins(nz,ja,desc,info,lidx=lidx)</span></span></span>; class="cmtt-10">psb_cdins(nz,ja,desc,info,lidx=lidx)</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x4-7010x3"> class="enumerate" id="x4-7010x3">
<!--l. 441--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span <!--l. 444--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>; class="cmtt-10">psb_cdasb</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x4-7012x4"> class="enumerate" id="x4-7012x4">
<!--l. 442--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>
and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span> of the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">local</span></span></span> argument specifying that the indices in <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ia</span></span></span>,
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
<!--l. 449--><p class="noindent" > <!--l. 445--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in
<h4 class="subsectionHead"><span class="titlemark">2.4 </span> <a <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span> of the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">local</span></span></span> argument specifying that the
indices in <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ia</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
<!--l. 452--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">2.4 </span> <a
id="x4-80002.4"></a>Programming model</h4> id="x4-80002.4"></a>Programming model</h4>
<!--l. 451--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD) <!--l. 454--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
programming model: each process participating in the computation performs the programming model: each process participating in the computation performs the
same actions on a chunk of data. Parallelism is thus data-driven. same actions on a chunk of data. Parallelism is thus data-driven.
<!--l. 456--><p class="indent" > Because of this structure, many subroutines coordinate their action across the <!--l. 459--><p class="indent" > Because of this structure, many subroutines coordinate their action across the
various processes, thus providing an implicit synchronization point, and therefore various processes, thus providing an implicit synchronization point, and therefore
<span <span
class="cmti-10">must </span>be called simultaneously by all processes participating in the computation. This class="pplri7t-">must </span>be called simultaneously by all processes participating in the computation. This
is certainly true for the data allocation and assembly routines, for all the is certainly true for the data allocation and assembly routines, for all the
computational routines and for some of the tools routines. computational routines and for some of the tools routines.
<!--l. 464--><p class="indent" > However there are many cases where no synchronization, and indeed no <!--l. 467--><p class="indent" > However there are many cases where no synchronization, and indeed no
communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a
href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a> href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a>
are only acting on the local data structures, and thus may be called independently. are only acting on the local data structures, and thus may be called independently.
The most important case is that of the coefficient insertion routines: since the The most important case is that of the coefficient insertion routines: since the number
number of coefficients in the sparse and dense matrices varies among the processors, of coefficients in the sparse and dense matrices varies among the processors, and
and since the user is free to choose an arbitrary order in builiding the matrix entries, since the user is free to choose an arbitrary order in builiding the matrix entries,
these routines cannot imply a synchronization. these routines cannot imply a synchronization.
<!--l. 474--><p class="indent" > Throughout this user&#8217;s guide each subroutine will be clearly indicated <!--l. 477--><p class="indent" > Throughout this user&#8217;s guide each subroutine will be clearly indicated
as: as:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 477--><p class="noindent" > <!--l. 480--><p class="noindent" >
<span <span
class="cmbx-10">Synchronous:</span> </dt><dd class="pplb7t-">Synchronous:</span> </dt><dd
class="description"> class="description">
<!--l. 477--><p class="noindent" >must be called simultaneously by all the processes in the relevant <!--l. 480--><p class="noindent" >must be called simultaneously by all the processes in the relevant
communication context; communication context;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 479--><p class="noindent" > <!--l. 482--><p class="noindent" >
<span <span
class="cmbx-10">Asynchronous:</span> </dt><dd class="pplb7t-">Asynchronous:</span> </dt><dd
class="description"> class="description">
<!--l. 479--><p class="noindent" >may be called in a totally independent manner.</dd></dl> <!--l. 482--><p class="noindent" >may be called in a totally independent manner.</dd></dl>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -16,21 +16,21 @@ href="userhtmlse7.html" >prev</a>] [<a
href="userhtmlse7.html#tailuserhtmlse7.html" >prev-tail</a>] [<a href="userhtmlse7.html#tailuserhtmlse7.html" >prev-tail</a>] [<a
href="userhtmlse5.html#tailuserhtmlse8.html">tail</a>] [<a href="userhtmlse5.html#tailuserhtmlse8.html">tail</a>] [<a
href="userhtml.html#userhtmlse11.html" >up</a>] </p></div> href="userhtml.html#userhtmlse11.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">8 </span> <a <h3 class="sectionHead"><span class="titlemark">8 </span> <a
id="x13-1230008"></a>Error handling</h3> id="x13-1230008"></a>Error handling</h3>
<!--l. 5--><p class="noindent" >The PSBLAS library error handling policy has been completely rewritten in version <!--l. 5--><p class="noindent" >The PSBLAS library error handling policy has been completely rewritten in version
2.0. The idea behind the design of this new error handling strategy is to keep error 2.0. The idea behind the design of this new error handling strategy is to keep error
messages on a stack allowing the user to trace back up to the point where the first messages on a stack allowing the user to trace back up to the point where the first
error message has been generated. Every routine in the PSBLAS-2.0 library has, as error message has been generated. Every routine in the PSBLAS-2.0 library has, as
last non-optional argument, an integer <span class="obeylines-h"><span class="verb"><span last non-optional argument, an integer <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">info</span></span></span> variable; whenever, inside the routine, an class="cmtt-10">info</span></span></span> variable; whenever, inside the
error is detected, this variable is set to a value corresponding to a specific routine, an error is detected, this variable is set to a value corresponding to a
error code. Then this error code is also pushed on the error stack and then specific error code. Then this error code is also pushed on the error stack
either control is returned to the caller routine or the execution is aborted, and then either control is returned to the caller routine or the execution is
depending on the users choice. At the time when the execution is aborted, aborted, depending on the users choice. At the time when the execution is
an error message is printed on standard output with a level of verbosity aborted, an error message is printed on standard output with a level of
than can be chosen by the user. If the execution is not aborted, then, the verbosity than can be chosen by the user. If the execution is not aborted, then,
caller routine checks the value returned in the <span class="obeylines-h"><span class="verb"><span the caller routine checks the value returned in the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">info</span></span></span> variable and, if not class="cmtt-10">info</span></span></span> variable and, if not
zero, an error condition is raised. This process continues on all the levels of zero, an error condition is raised. This process continues on all the levels of
nested calls until the level where the user decides to abort the program nested calls until the level where the user decides to abort the program
@ -100,7 +100,6 @@ class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">info</span></span><span style="color:#000000"><span class="cmtt-9">info</span></span><span style="color:#000000"><span
class="cmtt-9">=</span></span><span style="color:#000000"><span class="cmtt-9">=</span></span><span style="color:#000000"><span
@ -111,7 +110,6 @@ class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_errpush</span></span><span style="color:#000000"><span class="cmtt-9">psb_errpush</span></span><span style="color:#000000"><span
@ -128,7 +126,6 @@ class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">9999</span></span> class="cmtt-9">9999</span></span>
@ -178,7 +175,6 @@ class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">info</span></span><span style="color:#000000"><span class="cmtt-9">info</span></span><span style="color:#000000"><span
class="cmtt-9">=</span></span><span style="color:#000000"><span class="cmtt-9">=</span></span><span style="color:#000000"><span
@ -189,7 +185,6 @@ class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_errpush</span></span><span style="color:#000000"><span class="cmtt-9">psb_errpush</span></span><span style="color:#000000"><span
@ -206,7 +201,6 @@ class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">9999</span></span> class="cmtt-9">9999</span></span>
@ -246,7 +240,6 @@ class="cmtt-9">then</span></span>
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_error</span></span><span style="color:#000000"><span class="cmtt-9">psb_error</span></span><span style="color:#000000"><span
@ -258,7 +251,6 @@ class="cmtt-9">)</span></span>
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">return</span></span> class="cmtt-9">return</span></span>
<span class="label"><a <span class="label"><a
@ -272,7 +264,6 @@ class="cmtt-9">else</span></span>
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">return</span></span> class="cmtt-9">return</span></span>
<span class="label"><a <span class="label"><a
@ -337,7 +328,7 @@ Format&#x00A0;FOO&#x00A0;is&#x00A0;unknown
========================================================== ==========================================================
Aborting... Aborting...
</pre> </pre>
<!--l. 156--><p class="nopar" > </div> </div> <!--l. 156--><p class="nopar" > </div></div>
</div> </div>
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Listing 6: </span><span ><span class="id">Listing 6: </span><span
@ -350,7 +341,7 @@ condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x13-1230
<h4 class="subsectionHead"><span class="titlemark">8.1 </span> <a <h4 class="subsectionHead"><span class="titlemark">8.1 </span> <a
id="x13-1240008.1"></a>psb_errpush &#8212; Pushes an error code onto the error stack</h4> id="x13-1240008.1"></a>psb_errpush &#8212; Pushes an error code onto the error stack</h4>
<!--l. 174--> <!--l. 174-->
<pre class="lstlisting" id="listing-155"><span class="label"><a <pre class="lstlisting" id="listing-155"><span class="label"><a
@ -371,77 +362,77 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 179--><p class="noindent" > <!--l. 179--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 179--><p class="noindent" >Asynchronous. <!--l. 179--><p class="noindent" >Asynchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 180--><p class="noindent" > <!--l. 180--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 180--><p class="noindent" > <!--l. 180--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 181--><p class="noindent" > <!--l. 181--><p class="noindent" >
<span <span
class="cmbx-10">err</span><span class="pplb7t-">err</span><span
class="cmbx-10">_c</span> </dt><dd class="pplb7t-">_c</span> </dt><dd
class="description"> class="description">
<!--l. 181--><p class="noindent" >the error code<br <!--l. 181--><p class="noindent" >the error code<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: an integer. class="newline" />Specified as: an integer.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 186--><p class="noindent" > <!--l. 186--><p class="noindent" >
<span <span
class="cmbx-10">r</span><span class="pplb7t-">r</span><span
class="cmbx-10">_name</span> </dt><dd class="pplb7t-">_name</span> </dt><dd
class="description"> class="description">
<!--l. 186--><p class="noindent" >the soutine where the error has been caught.<br <!--l. 186--><p class="noindent" >the soutine where the error has been caught.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: a string.<br class="newline" />Specified as: a string.<br
class="newline" /> class="newline" />
</dd><dt class="description"> </dd><dt class="description">
<!--l. 191--><p class="noindent" > <!--l. 191--><p class="noindent" >
<span <span
class="cmbx-10">i</span><span class="pplb7t-">i</span><span
class="cmbx-10">_err</span> </dt><dd class="pplb7t-">_err</span> </dt><dd
class="description"> class="description">
<!--l. 191--><p class="noindent" >addional info for error code<br <!--l. 191--><p class="noindent" >addional info for error code<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Specified as: an integer array<br class="newline" />Specified as: an integer array<br
class="newline" /> class="newline" />
</dd><dt class="description"> </dd><dt class="description">
<!--l. 195--><p class="noindent" > <!--l. 195--><p class="noindent" >
<span <span
class="cmbx-10">a</span><span class="pplb7t-">a</span><span
class="cmbx-10">_err</span> </dt><dd class="pplb7t-">_err</span> </dt><dd
class="description"> class="description">
<!--l. 195--><p class="noindent" >addional info for error code<br <!--l. 195--><p class="noindent" >addional info for error code<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">local </span><br class="pplb7t-">local </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Specified as: a string.<br class="newline" />Specified as: a string.<br
class="newline" /></dd></dl> class="newline" /></dd></dl>
<h4 class="subsectionHead"><span class="titlemark">8.2 </span> <a <h4 class="subsectionHead"><span class="titlemark">8.2 </span> <a
id="x13-1250008.2"></a>psb_error &#8212; Prints the error stack content and aborts execution</h4> id="x13-1250008.2"></a>psb_error &#8212; Prints the error stack content and aborts execution</h4>
<!--l. 204--> <!--l. 204-->
<pre class="lstlisting" id="listing-156"><span class="label"><a <pre class="lstlisting" id="listing-156"><span class="label"><a
@ -456,32 +447,32 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 209--><p class="noindent" > <!--l. 209--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 209--><p class="noindent" >Asynchronous. <!--l. 209--><p class="noindent" >Asynchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 210--><p class="noindent" > <!--l. 210--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 210--><p class="noindent" > <!--l. 210--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 211--><p class="noindent" > <!--l. 211--><p class="noindent" >
<span <span
class="cmbx-10">icontxt</span> </dt><dd class="pplb7t-">icontxt</span> </dt><dd
class="description"> class="description">
<!--l. 211--><p class="noindent" >the communication context.<br <!--l. 211--><p class="noindent" >the communication context.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">optional</span><br class="pplb7t-">optional</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: an integer.</dd></dl> class="newline" />Specified as: an integer.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">8.3 </span> <a <h4 class="subsectionHead"><span class="titlemark">8.3 </span> <a
id="x13-1260008.3"></a>psb_set_errverbosity &#8212; Sets the verbosity of error messages</h4> id="x13-1260008.3"></a>psb_set_errverbosity &#8212; Sets the verbosity of error messages</h4>
<!--l. 224--> <!--l. 224-->
<pre class="lstlisting" id="listing-157"><span class="label"><a <pre class="lstlisting" id="listing-157"><span class="label"><a
@ -496,32 +487,32 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 229--><p class="noindent" > <!--l. 229--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 229--><p class="noindent" >Asynchronous. <!--l. 229--><p class="noindent" >Asynchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 230--><p class="noindent" > <!--l. 230--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 230--><p class="noindent" > <!--l. 230--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 231--><p class="noindent" > <!--l. 231--><p class="noindent" >
<span <span
class="cmbx-10">v</span> </dt><dd class="pplb7t-">v</span> </dt><dd
class="description"> class="description">
<!--l. 231--><p class="noindent" >the verbosity level<br <!--l. 231--><p class="noindent" >the verbosity level<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global</span><br class="pplb7t-">global</span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: an integer.</dd></dl> class="newline" />Specified as: an integer.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">8.4 </span> <a <h4 class="subsectionHead"><span class="titlemark">8.4 </span> <a
id="x13-1270008.4"></a>psb_set_erraction &#8212; Set the type of action to be taken upon error id="x13-1270008.4"></a>psb_set_erraction &#8212; Set the type of action to be taken upon error
condition</h4> condition</h4>
<!--l. 241--> <!--l. 241-->
@ -537,28 +528,28 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 246--><p class="noindent" > <!--l. 246--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 246--><p class="noindent" >Asynchronous. <!--l. 246--><p class="noindent" >Asynchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 247--><p class="noindent" > <!--l. 247--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 247--><p class="noindent" > <!--l. 247--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 248--><p class="noindent" > <!--l. 248--><p class="noindent" >
<span <span
class="cmbx-10">err</span><span class="pplb7t-">err</span><span
class="cmbx-10">_act</span> </dt><dd class="pplb7t-">_act</span> </dt><dd
class="description"> class="description">
<!--l. 248--><p class="noindent" >the type of action.<br <!--l. 248--><p class="noindent" >the type of action.<br
class="newline" />Scope: <span class="newline" />Scope: <span
class="cmbx-10">global </span><br class="pplb7t-">global </span><br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required</span><br class="pplb7t-">required</span><br
class="newline" />Intent: <span class="newline" />Intent: <span
class="cmbx-10">in</span>.<br class="pplb7t-">in</span>.<br
class="newline" />Specified as: an integer. Possible values: <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: an integer. Possible values: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_act_ret</span></span></span>, <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_act_ret</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_act_abort</span></span></span>.</dd></dl> class="cmtt-10">psb_act_abort</span></span></span>.</dd></dl>

@ -16,7 +16,7 @@ href="userhtmlse8.html" >prev</a>] [<a
href="userhtmlse8.html#tailuserhtmlse8.html" >prev-tail</a>] [<a href="userhtmlse8.html#tailuserhtmlse8.html" >prev-tail</a>] [<a
href="userhtmlse6.html#tailuserhtmlse9.html">tail</a>] [<a href="userhtmlse6.html#tailuserhtmlse9.html">tail</a>] [<a
href="userhtml.html#userhtmlse12.html" >up</a>] </p></div> href="userhtml.html#userhtmlse12.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">9 </span> <a <h3 class="sectionHead"><span class="titlemark">9 </span> <a
id="x14-1280009"></a>Utilities</h3> id="x14-1280009"></a>Utilities</h3>
<!--l. 4--><p class="noindent" >We have some utilities available for input and output of sparse matrices; the <!--l. 4--><p class="noindent" >We have some utilities available for input and output of sparse matrices; the
interfaces to these routines are available in the module <span class="obeylines-h"><span class="verb"><span interfaces to these routines are available in the module <span class="obeylines-h"><span class="verb"><span
@ -24,7 +24,7 @@ class="cmtt-10">psb_util_mod</span></span></span>.
<h4 class="subsectionHead"><span class="titlemark">9.1 </span> <a <h4 class="subsectionHead"><span class="titlemark">9.1 </span> <a
id="x14-1290009.1"></a> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing id="x14-1290009.1"></a> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing
format</h4> format</h4>
<!--l. 16--> <!--l. 16-->
@ -50,53 +50,53 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 21--><p class="noindent" > <!--l. 21--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 21--><p class="noindent" >Asynchronous. <!--l. 21--><p class="noindent" >Asynchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 22--><p class="noindent" > <!--l. 22--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 22--><p class="noindent" > <!--l. 22--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 23--><p class="noindent" > <!--l. 23--><p class="noindent" >
<span <span
class="cmbx-10">filename</span> </dt><dd class="pplb7t-">filename</span> </dt><dd
class="description"> class="description">
<!--l. 23--><p class="noindent" >The name of the file to be read.<br <!--l. 23--><p class="noindent" >The name of the file to be read.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in class="cmtt-10">-</span></span></span>, in
which case the default input unit 5 (i.e. standard input in Unix jargon) is which case the default input unit 5 (i.e. standard input in Unix jargon) is
used. Default: <span class="obeylines-h"><span class="verb"><span used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>. class="cmtt-10">-</span></span></span>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 28--><p class="noindent" > <!--l. 28--><p class="noindent" >
<span <span
class="cmbx-10">iunit</span> </dt><dd class="pplb7t-">iunit</span> </dt><dd
class="description"> class="description">
<!--l. 28--><p class="noindent" >The Fortran file unit number.<br <!--l. 28--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.</dd></dl> class="cmtt-10">-</span></span></span>.</dd></dl>
<!--l. 33--><p class="noindent" > <!--l. 33--><p class="noindent" >
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 34--><p class="noindent" > <!--l. 34--><p class="noindent" >
<span <span
class="cmbx-10">On Return</span> </dt><dd class="pplb7t-">On Return</span> </dt><dd
class="description"> class="description">
<!--l. 34--><p class="noindent" > <!--l. 34--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 35--><p class="noindent" > <!--l. 35--><p class="noindent" >
<span <span
class="cmbx-10">a</span> </dt><dd class="pplb7t-">a</span> </dt><dd
class="description"> class="description">
<!--l. 35--><p class="noindent" >the sparse matrix read from file.<br <!--l. 35--><p class="noindent" >the sparse matrix read from file.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">required</span>.<br class="pplb7t-">required</span>.<br
class="newline" />Specified as: a structured data of type <a class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -108,37 +108,37 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 38--><p class="noindent" > <!--l. 38--><p class="noindent" >
<span <span
class="cmbx-10">b</span> </dt><dd class="pplb7t-">b</span> </dt><dd
class="description"> class="description">
<!--l. 38--><p class="noindent" >Rigth hand side(s).<br <!--l. 38--><p class="noindent" >Rigth hand side(s).<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">Optional </span><br class="pplb7t-">Optional </span><br
class="newline" />An array of type real or complex, rank 2 and having the ALLOCATABLE class="newline" />An array of type real or complex, rank 2 and having the ALLOCATABLE
attribute; will be allocated and filled in if the input file contains a right attribute; will be allocated and filled in if the input file contains a right
hand side, otherwise will be left in the UNALLOCATED state. hand side, otherwise will be left in the UNALLOCATED state.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 43--><p class="noindent" > <!--l. 43--><p class="noindent" >
<span <span
class="cmbx-10">mtitle</span> </dt><dd class="pplb7t-">mtitle</span> </dt><dd
class="description"> class="description">
<!--l. 43--><p class="noindent" >Matrix title.<br <!--l. 43--><p class="noindent" >Matrix title.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">Optional </span><br class="pplb7t-">Optional </span><br
class="newline" />A charachter variable of length 72 holding a copy of the matrix title as class="newline" />A charachter variable of length 72 holding a copy of the matrix title as
specified by the Harwell-Boeing format and contained in the input file. specified by the Harwell-Boeing format and contained in the input file.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 48--><p class="noindent" > <!--l. 48--><p class="noindent" >
<span <span
class="cmbx-10">iret</span> </dt><dd class="pplb7t-">iret</span> </dt><dd
class="description"> class="description">
<!--l. 48--><p class="noindent" >Error code.<br <!--l. 48--><p class="noindent" >Error code.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl> class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">9.2 </span> <a <h4 class="subsectionHead"><span class="titlemark">9.2 </span> <a
id="x14-1300009.2"></a>hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing id="x14-1300009.2"></a>hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing
format</h4> format</h4>
<!--l. 59--> <!--l. 59-->
@ -166,23 +166,23 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 66--><p class="noindent" > <!--l. 66--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 66--><p class="noindent" >Asynchronous. <!--l. 66--><p class="noindent" >Asynchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 67--><p class="noindent" > <!--l. 67--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 67--><p class="noindent" > <!--l. 67--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 68--><p class="noindent" > <!--l. 68--><p class="noindent" >
<span <span
class="cmbx-10">a</span> </dt><dd class="pplb7t-">a</span> </dt><dd
class="description"> class="description">
<!--l. 68--><p class="noindent" >the sparse matrix to be written.<br <!--l. 68--><p class="noindent" >the sparse matrix to be written.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">required</span>.<br class="pplb7t-">required</span>.<br
class="newline" />Specified as: a structured data of type <a class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -191,35 +191,35 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 71--><p class="noindent" > <!--l. 71--><p class="noindent" >
<span <span
class="cmbx-10">b</span> </dt><dd class="pplb7t-">b</span> </dt><dd
class="description"> class="description">
<!--l. 71--><p class="noindent" >Rigth hand side.<br <!--l. 71--><p class="noindent" >Rigth hand side.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">Optional </span><br class="pplb7t-">Optional </span><br
class="newline" />An array of type real or complex, rank 1 and having the ALLOCATABLE class="newline" />An array of type real or complex, rank 1 and having the ALLOCATABLE
attribute; will be allocated and filled in if the input file contains a right attribute; will be allocated and filled in if the input file contains a right
hand side. hand side.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 76--><p class="noindent" > <!--l. 76--><p class="noindent" >
<span <span
class="cmbx-10">filename</span> </dt><dd class="pplb7t-">filename</span> </dt><dd
class="description"> class="description">
<!--l. 76--><p class="noindent" >The name of the file to be written to.<br <!--l. 76--><p class="noindent" >The name of the file to be written to.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in class="cmtt-10">-</span></span></span>, in
which case the default output unit 6 (i.e. standard output in Unix jargon) which case the default output unit 6 (i.e. standard output in Unix jargon)
is used. Default: <span class="obeylines-h"><span class="verb"><span is used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>. class="cmtt-10">-</span></span></span>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 81--><p class="noindent" > <!--l. 81--><p class="noindent" >
<span <span
class="cmbx-10">iunit</span> </dt><dd class="pplb7t-">iunit</span> </dt><dd
class="description"> class="description">
<!--l. 81--><p class="noindent" >The Fortran file unit number.<br <!--l. 81--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>. class="cmtt-10">-</span></span></span>.
@ -228,43 +228,43 @@ class="cmtt-10">-</span></span></span>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 84--><p class="noindent" > <!--l. 84--><p class="noindent" >
<span <span
class="cmbx-10">key</span> </dt><dd class="pplb7t-">key</span> </dt><dd
class="description"> class="description">
<!--l. 84--><p class="noindent" >Matrix key.<br <!--l. 84--><p class="noindent" >Matrix key.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">Optional </span><br class="pplb7t-">Optional </span><br
class="newline" />A charachter variable of length 8 holding the matrix key as specified by class="newline" />A charachter variable of length 8 holding the matrix key as specified by
the Harwell-Boeing format and to be written to file. the Harwell-Boeing format and to be written to file.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 89--><p class="noindent" > <!--l. 89--><p class="noindent" >
<span <span
class="cmbx-10">mtitle</span> </dt><dd class="pplb7t-">mtitle</span> </dt><dd
class="description"> class="description">
<!--l. 89--><p class="noindent" >Matrix title.<br <!--l. 89--><p class="noindent" >Matrix title.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">Optional </span><br class="pplb7t-">Optional </span><br
class="newline" />A charachter variable of length 72 holding the matrix title as specified by class="newline" />A charachter variable of length 72 holding the matrix title as specified by
the Harwell-Boeing format and to be written to file.</dd></dl> the Harwell-Boeing format and to be written to file.</dd></dl>
<!--l. 96--><p class="noindent" > <!--l. 96--><p class="noindent" >
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 97--><p class="noindent" > <!--l. 97--><p class="noindent" >
<span <span
class="cmbx-10">On Return</span> </dt><dd class="pplb7t-">On Return</span> </dt><dd
class="description"> class="description">
<!--l. 97--><p class="noindent" > <!--l. 97--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 98--><p class="noindent" > <!--l. 98--><p class="noindent" >
<span <span
class="cmbx-10">iret</span> </dt><dd class="pplb7t-">iret</span> </dt><dd
class="description"> class="description">
<!--l. 98--><p class="noindent" >Error code.<br <!--l. 98--><p class="noindent" >Error code.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl> class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">9.3 </span> <a <h4 class="subsectionHead"><span class="titlemark">9.3 </span> <a
id="x14-1310009.3"></a>mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket id="x14-1310009.3"></a>mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket
format</h4> format</h4>
<!--l. 111--> <!--l. 111-->
@ -286,53 +286,53 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 116--><p class="noindent" > <!--l. 116--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 116--><p class="noindent" >Asynchronous. <!--l. 116--><p class="noindent" >Asynchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 117--><p class="noindent" > <!--l. 117--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 117--><p class="noindent" > <!--l. 117--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 118--><p class="noindent" > <!--l. 118--><p class="noindent" >
<span <span
class="cmbx-10">filename</span> </dt><dd class="pplb7t-">filename</span> </dt><dd
class="description"> class="description">
<!--l. 118--><p class="noindent" >The name of the file to be read.<br <!--l. 118--><p class="noindent" >The name of the file to be read.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in class="cmtt-10">-</span></span></span>, in
which case the default input unit 5 (i.e. standard input in Unix jargon) is which case the default input unit 5 (i.e. standard input in Unix jargon) is
used. Default: <span class="obeylines-h"><span class="verb"><span used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>. class="cmtt-10">-</span></span></span>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 123--><p class="noindent" > <!--l. 123--><p class="noindent" >
<span <span
class="cmbx-10">iunit</span> </dt><dd class="pplb7t-">iunit</span> </dt><dd
class="description"> class="description">
<!--l. 123--><p class="noindent" >The Fortran file unit number.<br <!--l. 123--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.</dd></dl> class="cmtt-10">-</span></span></span>.</dd></dl>
<!--l. 128--><p class="noindent" > <!--l. 128--><p class="noindent" >
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 129--><p class="noindent" > <!--l. 129--><p class="noindent" >
<span <span
class="cmbx-10">On Return</span> </dt><dd class="pplb7t-">On Return</span> </dt><dd
class="description"> class="description">
<!--l. 129--><p class="noindent" > <!--l. 129--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 130--><p class="noindent" > <!--l. 130--><p class="noindent" >
<span <span
class="cmbx-10">a</span> </dt><dd class="pplb7t-">a</span> </dt><dd
class="description"> class="description">
<!--l. 130--><p class="noindent" >the sparse matrix read from file.<br <!--l. 130--><p class="noindent" >the sparse matrix read from file.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">required</span>.<br class="pplb7t-">required</span>.<br
class="newline" />Specified as: a structured data of type <a class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -344,16 +344,16 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 133--><p class="noindent" > <!--l. 133--><p class="noindent" >
<span <span
class="cmbx-10">iret</span> </dt><dd class="pplb7t-">iret</span> </dt><dd
class="description"> class="description">
<!--l. 133--><p class="noindent" >Error code.<br <!--l. 133--><p class="noindent" >Error code.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl> class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">9.4 </span> <a <h4 class="subsectionHead"><span class="titlemark">9.4 </span> <a
id="x14-1320009.4"></a>mm_array_read &#8212; Read a dense array from a file in the MatrixMarket id="x14-1320009.4"></a>mm_array_read &#8212; Read a dense array from a file in the MatrixMarket
format</h4> format</h4>
<!--l. 142--> <!--l. 142-->
@ -375,54 +375,54 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 147--><p class="noindent" > <!--l. 147--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 147--><p class="noindent" >Asynchronous. <!--l. 147--><p class="noindent" >Asynchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 148--><p class="noindent" > <!--l. 148--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 148--><p class="noindent" > <!--l. 148--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 149--><p class="noindent" > <!--l. 149--><p class="noindent" >
<span <span
class="cmbx-10">filename</span> </dt><dd class="pplb7t-">filename</span> </dt><dd
class="description"> class="description">
<!--l. 149--><p class="noindent" >The name of the file to be read.<br <!--l. 149--><p class="noindent" >The name of the file to be read.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in class="cmtt-10">-</span></span></span>, in
which case the default input unit 5 (i.e. standard input in Unix jargon) is which case the default input unit 5 (i.e. standard input in Unix jargon) is
used. Default: <span class="obeylines-h"><span class="verb"><span used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>. class="cmtt-10">-</span></span></span>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 154--><p class="noindent" > <!--l. 154--><p class="noindent" >
<span <span
class="cmbx-10">iunit</span> </dt><dd class="pplb7t-">iunit</span> </dt><dd
class="description"> class="description">
<!--l. 154--><p class="noindent" >The Fortran file unit number.<br <!--l. 154--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.</dd></dl> class="cmtt-10">-</span></span></span>.</dd></dl>
<!--l. 159--><p class="noindent" > <!--l. 159--><p class="noindent" >
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 160--><p class="noindent" > <!--l. 160--><p class="noindent" >
<span <span
class="cmbx-10">On Return</span> </dt><dd class="pplb7t-">On Return</span> </dt><dd
class="description"> class="description">
<!--l. 160--><p class="noindent" > <!--l. 160--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 161--><p class="noindent" > <!--l. 161--><p class="noindent" >
<span <span
class="cmbx-10">b</span> </dt><dd class="pplb7t-">b</span> </dt><dd
class="description"> class="description">
<!--l. 161--><p class="noindent" >Rigth hand side(s).<br <!--l. 161--><p class="noindent" >Rigth hand side(s).<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />An array of type real or complex, rank 1 or 2 and having the class="newline" />An array of type real or complex, rank 1 or 2 and having the
@ -431,24 +431,24 @@ href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>, of class="cmtt-10">_type</span></a>, of type
type real or complex.<br real or complex.<br
class="newline" />Will be allocated and filled in if the input file contains a right hand side, class="newline" />Will be allocated and filled in if the input file contains a right hand side,
otherwise will be left in the UNALLOCATED state. <br otherwise will be left in the UNALLOCATED state. <br
class="newline" /> class="newline" />
</dd><dt class="description"> </dd><dt class="description">
<!--l. 168--><p class="noindent" > <!--l. 168--><p class="noindent" >
<span <span
class="cmbx-10">iret</span> </dt><dd class="pplb7t-">iret</span> </dt><dd
class="description"> class="description">
<!--l. 168--><p class="noindent" >Error code.<br <!--l. 168--><p class="noindent" >Error code.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl> class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">9.5 </span> <a <h4 class="subsectionHead"><span class="titlemark">9.5 </span> <a
id="x14-1330009.5"></a>mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket id="x14-1330009.5"></a>mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket
format</h4> format</h4>
<!--l. 179--> <!--l. 179-->
@ -472,23 +472,23 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 183--><p class="noindent" > <!--l. 183--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 183--><p class="noindent" >Asynchronous. <!--l. 183--><p class="noindent" >Asynchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 184--><p class="noindent" > <!--l. 184--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 184--><p class="noindent" > <!--l. 184--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 185--><p class="noindent" > <!--l. 185--><p class="noindent" >
<span <span
class="cmbx-10">a</span> </dt><dd class="pplb7t-">a</span> </dt><dd
class="description"> class="description">
<!--l. 185--><p class="noindent" >the sparse matrix to be written.<br <!--l. 185--><p class="noindent" >the sparse matrix to be written.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">required</span>.<br class="pplb7t-">required</span>.<br
class="newline" />Specified as: a structured data of type <a class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span class="cmtt-10">psb</span><span
@ -497,34 +497,34 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 188--><p class="noindent" > <!--l. 188--><p class="noindent" >
<span <span
class="cmbx-10">mtitle</span> </dt><dd class="pplb7t-">mtitle</span> </dt><dd
class="description"> class="description">
<!--l. 188--><p class="noindent" >Matrix title.<br <!--l. 188--><p class="noindent" >Matrix title.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />A charachter variable holding a descriptive title for the matrix to be class="newline" />A charachter variable holding a descriptive title for the matrix to be
written to file. written to file.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 192--><p class="noindent" > <!--l. 192--><p class="noindent" >
<span <span
class="cmbx-10">filename</span> </dt><dd class="pplb7t-">filename</span> </dt><dd
class="description"> class="description">
<!--l. 192--><p class="noindent" >The name of the file to be written to.<br <!--l. 192--><p class="noindent" >The name of the file to be written to.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in class="cmtt-10">-</span></span></span>, in
which case the default output unit 6 (i.e. standard output in Unix jargon) which case the default output unit 6 (i.e. standard output in Unix jargon)
is used. Default: <span class="obeylines-h"><span class="verb"><span is used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>. class="cmtt-10">-</span></span></span>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 197--><p class="noindent" > <!--l. 197--><p class="noindent" >
<span <span
class="cmbx-10">iunit</span> </dt><dd class="pplb7t-">iunit</span> </dt><dd
class="description"> class="description">
<!--l. 197--><p class="noindent" >The Fortran file unit number.<br <!--l. 197--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.</dd></dl> class="cmtt-10">-</span></span></span>.</dd></dl>
@ -534,27 +534,27 @@ class="cmtt-10">-</span></span></span>.</dd></dl>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 203--><p class="noindent" > <!--l. 203--><p class="noindent" >
<span <span
class="cmbx-10">On Return</span> </dt><dd class="pplb7t-">On Return</span> </dt><dd
class="description"> class="description">
<!--l. 203--><p class="noindent" > <!--l. 203--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 204--><p class="noindent" > <!--l. 204--><p class="noindent" >
<span <span
class="cmbx-10">iret</span> </dt><dd class="pplb7t-">iret</span> </dt><dd
class="description"> class="description">
<!--l. 204--><p class="noindent" >Error code.<br <!--l. 204--><p class="noindent" >Error code.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl> class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<!--l. 209--><p class="noindent" ><span <!--l. 209--><p class="noindent" ><span
class="cmbx-12">Notes</span> class="pplb7t-x-x-120">Notes</span>
<!--l. 211--><p class="indent" > If this function is called on a matrix <code class="lstinline"><span style="color:#000000">a</span></code> on a distributed communicator only the <!--l. 211--><p class="indent" > If this function is called on a matrix <code class="lstinline"><span style="color:#000000">a</span></code> on a distributed communicator only the
local part is written in output. To get a single MatrixMarket file with the whole local part is written in output. To get a single MatrixMarket file with the whole
matrix when appropriate, e.g. for debugging purposes, one could <span matrix when appropriate, e.g. for debugging purposes, one could <span
class="cmti-10">gather </span>the whole class="pplri7t-">gather </span>the whole
matrix on a single rank and then write it. Consider the following example for a matrix on a single rank and then write it. Consider the following example for a
<span <span
class="cmti-10">double </span>precision matrix class="pplri7t-">double </span>precision matrix
<div class="center" <div class="center"
> >
<!--l. 227--><p class="noindent" > <!--l. 227--><p class="noindent" >
@ -581,7 +581,7 @@ psb_i_t&#x00A0;psb_c_&#x003C;s,d,c,z&#x003E;global_mat_write(ah,cdh);
<h4 class="subsectionHead"><span class="titlemark">9.6 </span> <a <h4 class="subsectionHead"><span class="titlemark">9.6 </span> <a
id="x14-1340009.6"></a>mm_array_write &#8212; Write a dense array from a file in the MatrixMarket id="x14-1340009.6"></a>mm_array_write &#8212; Write a dense array from a file in the MatrixMarket
format</h4> format</h4>
<!--l. 261--> <!--l. 261-->
@ -605,23 +605,23 @@ class="cmtt-10">)</span></span></pre>
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 266--><p class="noindent" > <!--l. 266--><p class="noindent" >
<span <span
class="cmbx-10">Type:</span> </dt><dd class="pplb7t-">Type:</span> </dt><dd
class="description"> class="description">
<!--l. 266--><p class="noindent" >Asynchronous. <!--l. 266--><p class="noindent" >Asynchronous.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 267--><p class="noindent" > <!--l. 267--><p class="noindent" >
<span <span
class="cmbx-10">On Entry</span> </dt><dd class="pplb7t-">On Entry</span> </dt><dd
class="description"> class="description">
<!--l. 267--><p class="noindent" > <!--l. 267--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 268--><p class="noindent" > <!--l. 268--><p class="noindent" >
<span <span
class="cmbx-10">b</span> </dt><dd class="pplb7t-">b</span> </dt><dd
class="description"> class="description">
<!--l. 268--><p class="noindent" >Rigth hand side(s).<br <!--l. 268--><p class="noindent" >Rigth hand side(s).<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />An array of type real or complex, rank 1 or 2, or an object of type class="newline" />An array of type real or complex, rank 1 or 2, or an object of type
<a <a
href="userhtmlse3.html#vdata"><span href="userhtmlse3.html#vdata"><span
@ -634,63 +634,63 @@ class="newline" />
</dd><dt class="description"> </dd><dt class="description">
<!--l. 273--><p class="noindent" > <!--l. 273--><p class="noindent" >
<span <span
class="cmbx-10">filename</span> </dt><dd class="pplb7t-">filename</span> </dt><dd
class="description"> class="description">
<!--l. 273--><p class="noindent" >The name of the file to be written.<br <!--l. 273--><p class="noindent" >The name of the file to be written.<br
class="newline" /> class="newline" />
</dd><dt class="description"> </dd><dt class="description">
<!--l. 274--><p class="noindent" > <!--l. 274--><p class="noindent" >
<span <span
class="cmbx-10">vtitle</span> </dt><dd class="pplb7t-">vtitle</span> </dt><dd
class="description"> class="description">
<!--l. 274--><p class="noindent" >Matrix title.<br <!--l. 274--><p class="noindent" >Matrix title.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />A charachter variable holding a descriptive title for the vector to be written class="newline" />A charachter variable holding a descriptive title for the vector to be
to file. Type:<span written to file. Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in class="cmtt-10">-</span></span></span>, in
which case the default input unit 5 (i.e. standard input in Unix jargon) is which case the default input unit 5 (i.e. standard input in Unix jargon) is
used. Default: <span class="obeylines-h"><span class="verb"><span used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>. class="cmtt-10">-</span></span></span>.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 282--><p class="noindent" > <!--l. 282--><p class="noindent" >
<span <span
class="cmbx-10">iunit</span> </dt><dd class="pplb7t-">iunit</span> </dt><dd
class="description"> class="description">
<!--l. 282--><p class="noindent" >The Fortran file unit number.<br <!--l. 282--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span class="newline" />Type:<span
class="cmbx-10">optional</span>.<br class="pplb7t-">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.</dd></dl> class="cmtt-10">-</span></span></span>.</dd></dl>
<!--l. 287--><p class="noindent" > <!--l. 287--><p class="noindent" >
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 288--><p class="noindent" > <!--l. 288--><p class="noindent" >
<span <span
class="cmbx-10">On Return</span> </dt><dd class="pplb7t-">On Return</span> </dt><dd
class="description"> class="description">
<!--l. 288--><p class="noindent" > <!--l. 288--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 289--><p class="noindent" > <!--l. 289--><p class="noindent" >
<span <span
class="cmbx-10">iret</span> </dt><dd class="pplb7t-">iret</span> </dt><dd
class="description"> class="description">
<!--l. 289--><p class="noindent" >Error code.<br <!--l. 289--><p class="noindent" >Error code.<br
class="newline" />Type: <span class="newline" />Type: <span
class="cmbx-10">required </span><br class="pplb7t-">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl> class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<!--l. 294--><p class="noindent" ><span <!--l. 294--><p class="noindent" ><span
class="cmbx-12">Notes</span> class="pplb7t-x-x-120">Notes</span>
<!--l. 296--><p class="indent" > If this function is called on a vector <code class="lstinline"><span style="color:#000000">v</span></code> on a distributed communicator only the <!--l. 296--><p class="indent" > If this function is called on a vector <code class="lstinline"><span style="color:#000000">v</span></code> on a distributed communicator only the
local part is written in output. To get a single MatrixMarket file with the whole local part is written in output. To get a single MatrixMarket file with the whole
vector when appropriate, e.g. for debugging purposes, one could <span vector when appropriate, e.g. for debugging purposes, one could <span
class="cmti-10">gather </span>the whole class="pplri7t-">gather </span>the whole
vector on a single rank and then write it. Consider the following example for a <span vector on a single rank and then write it. Consider the following example for a <span
class="cmti-10">double</span> class="pplri7t-">double</span>
precision vector precision vector
<div class="center" <div class="center"
> >

Binary file not shown.

After

Width:  |  Height:  |  Size: 325 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 378 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 290 B

File diff suppressed because one or more lines are too long

@ -8,6 +8,11 @@ in J.~Dongarra, K.~Madsen, J.~Wasniewski, editors,
Proceedings of PARA~04 Workshop on State of the Art Proceedings of PARA~04 Workshop on State of the Art
in Scientific Computing, pp.~546--553, Lecture Notes in Computer Science, in Scientific Computing, pp.~546--553, Lecture Notes in Computer Science,
Springer, 2005. Springer, 2005.
\bibitem{BERTACCINIFILIPPONE}
D. Bertaccini\ and\ S. Filippone,
{\em Sparse approximate inverse preconditioners on high performance GPU platforms},
Comput. Math. Appl., 71, (2016), no.~3, 693--711.
%
\bibitem{2007d} A. Buttari, D. di Serafino, P. D'Ambra, S. Filippone,\newblock \bibitem{2007d} A. Buttari, D. di Serafino, P. D'Ambra, S. Filippone,\newblock
2LEV-D2P4: a package of high-performance preconditioners,\newblock 2LEV-D2P4: a package of high-performance preconditioners,\newblock
Applicable Algebra in Engineering, Communications and Computing, Applicable Algebra in Engineering, Communications and Computing,
@ -183,7 +188,14 @@ MIT Press, 1998.
{\em Scientific Programming\/}~{\em 22,\/}~1, 1--19. {\em Scientific Programming\/}~{\em 22,\/}~1, 1--19.
\bibitem{OurTechRep} \bibitem{OurTechRep}
D.~Barbieri, V.~Cardellini, A.~Fanfarillo, S.~Filippone, Three storage formats D.~Barbieri, V.~Cardellini, A.~Fanfarillo, S.~Filippone, Three storage formats
for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6, Universit\`a di for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6,
Universit\`a di
Roma Tor Vergata (February 2015). Roma Tor Vergata (February 2015).
\bibitem{Filippone:2017:SMM:3034774.3017994}
S.~Filippone, V.~Cardellini, D.~Barbieri, and A.~Fanfarillo.
Sparse matrix-vector multiplication on {GPGPUs}.
{\em ACM Trans. Math. Softw.}, 43(4):30:1--30:49, 2017.
\end{thebibliography} \end{thebibliography}

@ -1317,13 +1317,14 @@ like Diagonal Scaling or Block Jacobi with incomplete
factorization ILU(0). factorization ILU(0).
A preconditioner is held in the \hypertarget{precdata}{{\tt A preconditioner is held in the \hypertarget{precdata}{{\tt
psb\_prec\_type}} data structure reported in psb\_Tprec\_type}} data structure reported in
figure~\ref{fig:prectype}. The \fortinline|psb_prec_type| figure~\ref{fig:prectype}. The \fortinline|psb_Tprec_type|
data type may contain a simple preconditioning matrix with the data type may contain a simple preconditioning matrix with the
associated communication descriptor.%% which may be different than the associated communication descriptor.
%% which may be different from the
%% system communication descriptor in the case of parallel %% system communication descriptor in the case of parallel
%% preconditioners like the Additive Schwarz one. Then the %% preconditioners like the Additive Schwarz one. Then the
%% \fortinline|psb_prec_type| may contain more than one preconditioning matrix %% \fortinline|psb_Tprec_type| may contain more than one preconditioning matrix
%% like in the case of Two-Level (in general Multi-Level) preconditioners. %% like in the case of Two-Level (in general Multi-Level) preconditioners.
%% The user can choose the type of preconditioner to be used by means of %% The user can choose the type of preconditioner to be used by means of
%% the \fortinline|psb_precset| subroutine; once the type of preconditioning %% the \fortinline|psb_precset| subroutine; once the type of preconditioning
@ -1407,8 +1408,8 @@ Given a heap object, the following methods are defined on it:
\item[dump] Print on file; \item[dump] Print on file;
\item[free] Release memory. \item[free] Release memory.
\end{description} \end{description}
These objects are used in AMG4PSBLAS to implement the factorization These objects are used to implement the factorization
algorithms. and approximate inversion algorithms.
%%% Local Variables: %%% Local Variables:
%%% mode: latex %%% mode: latex

@ -1,6 +1,6 @@
\section{Error handling} \section{Error handling\label{sec:errors}}
The PSBLAS library error handling policy has been completely rewritten The PSBLAS library error handling policy has been completely rewritten
in version 2.0. The idea behind the design of this new error handling in version 2.0. The idea behind the design of this new error handling

@ -344,24 +344,25 @@ A simple application structure will walk through the index space
allocation, matrix/vector creation and linear system solution as allocation, matrix/vector creation and linear system solution as
follows: follows:
\begin{enumerate} \begin{enumerate}
\item Initialize parallel environment with \verb|psb_init| \item Initialize parallel environment with \verb|psb_init|;
\item Initialize index space with \verb|psb_cdall| \item Initialize index space with \verb|psb_cdall|;
\item Allocate sparse matrix and dense vectors with \verb|psb_spall| \item Allocate sparse matrix and dense vectors with \verb|psb_spall|
and \verb|psb_geall| and \verb|psb_geall|;
\item Loop over all local rows, generate matrix and vector entries, \item Loop over all local rows, generate matrix and vector entries,
and insert them with \verb|psb_spins| and \verb|psb_geins| and insert them with \verb|psb_spins| and \verb|psb_geins|
\item Assemble the various entities: \item Assemble the various entities:
\begin{enumerate} \begin{enumerate}
\item \verb|psb_cdasb| \item \verb|psb_cdasb|,
\item \verb|psb_spasb| \item \verb|psb_spasb|,
\item \verb|psb_geasb| \item \verb|psb_geasb|;
\end{enumerate} \end{enumerate}
\item Choose the preconditioner to be used with \verb|prec%init| and \item Choose the preconditioner to be used with \verb|prec%init| and
\verb|prec%set|, and
build it with \verb|prec%build|\footnote{The subroutine style {\tt build it with \verb|prec%build|\footnote{The subroutine style {\tt
psb\_precinit} and {\tt psb\_precbl} are still supported for psb\_precinit} and {\tt psb\_precbld} are still supported for
backward compatibility}. backward compatibility};
\item Call the iterative driver \verb|psb_krylov| with the method of \item Call one of the iterative drivers with the method of
choice, e.g. \verb|bicgstab|. choice, e.g. \verb|psb_krylov| with \verb|bicgstab|.
\end{enumerate} \end{enumerate}
This is the structure of the sample programs in the directory This is the structure of the sample programs in the directory
\verb|test/pargen/|. \verb|test/pargen/|.
@ -372,21 +373,23 @@ multiple time steps, the following structure may be more appropriate:
\item Initialize parallel environment with \verb|psb_init| \item Initialize parallel environment with \verb|psb_init|
\item Initialize index space with \verb|psb_cdall| \item Initialize index space with \verb|psb_cdall|
\item Loop over the topology of the discretization mesh and build the \item Loop over the topology of the discretization mesh and build the
descriptor with \verb|psb_cdins| descriptor with \verb|psb_cdins|;
\item Assemble the descriptor with \verb|psb_cdasb| \item Assemble the descriptor with \verb|psb_cdasb|;
\item Allocate the sparse matrices and dense vectors with \item Allocate the sparse matrices and dense vectors with;
\verb|psb_spall| and \verb|psb_geall| \verb|psb_spall| and \verb|psb_geall|;
\item Loop over the time steps: \item Loop over the time steps:
\begin{enumerate} \begin{enumerate}
\item If after first time step, \item If after first time step,
reinitialize the sparse matrix with \verb|psb_sprn|; also zero out reinitialize the sparse matrix with \verb|psb_sprn|; also zero out
the dense vectors; the dense vectors;
\item Loop over the mesh, generate the coefficients and insert/update \item Loop over the mesh, generate the coefficients and insert/update
them with \verb|psb_spins| and \verb|psb_geins| them with \verb|psb_spins| and \verb|psb_geins|;
\item Assemble with \verb|psb_spasb| and \verb|psb_geasb| \item Assemble with \verb|psb_spasb| and \verb|psb_geasb|;
\item Choose and build preconditioner with \verb|prec%init| and \item \item Choose the preconditioner to be used with \verb|prec%init| and
\verb|prec%build| \verb|prec%set|, and
\item Call the iterative method of choice, e.g. \verb|psb_bicgstab| build it with \verb|prec%build|;
\item Call one of the iterative drivers with the method of
choice, e.g. \verb|psb_krylov| with \verb|bicgstab|.
\end{enumerate} \end{enumerate}
\end{enumerate} \end{enumerate}
The insertion routines will be called as many times as needed; The insertion routines will be called as many times as needed;

@ -76,14 +76,125 @@ $ptype$ string as follows\footnote{The string is case-insensitive}:
\item[DIAG] Diagonal scaling; each entry of the input vector is \item[DIAG] Diagonal scaling; each entry of the input vector is
multiplied by the reciprocal of the sum of the absolute values of multiplied by the reciprocal of the sum of the absolute values of
the coefficients in the corresponding row of matrix $A$; the coefficients in the corresponding row of matrix $A$;
\item[BJAC] Precondition by a factorization of the \item[BJAC] Precondition by a factorization or an approximante inverse
block-diagonal of matrix $A$, where block boundaries are determined of the block-diagonal of matrix $A$, where block boundaries are
by the data allocation boundaries for each process; requires no determined by the data allocation boundaries for each process;
communication. Only the incomplete factorization $ILU(0)$ is requires no communication. See also Table-\ref{tab:p_subsolve_1}.
currently implemented.
\end{description} \end{description}
\clearpage
\subsection{Set\label{sec:precset} --- set preconditioner parameters}
\begin{center}
\fortinline|call p%set(what,val,info)|
\end{center}
\noindent
This method sets the parameters defining the subdomain solver when the
preconditioner type is \verb|BJAC|. More precisely, the parameter
identified by \fortinline|what| is assigned the value
contained in \fortinline|val|.
{\vskip1.5\baselineskip\noindent\large\bfseries Arguments} \smallskip
\begin{tabular}{p{1.2cm}p{12cm}}
\fortinline|what| & \fortinline|character(len=*)|. \\
& The parameter to be set. It can be specified through its name;
the string is case-insensitive. See
Table~\ref{tab:p_subsolve_1}.\\
\fortinline|val | & \fortinline|integer| \emph{or} \fortinline|character(len=*)| \emph{or}
\fortinline|real(psb_spk_)| \emph{or} \fortinline|real(psb_dpk_)|,
\fortinline|intent(in)|.\\
& The value of the parameter to be set. The list of allowed
values and the corresponding data types is given in
Table~\ref{tab:p_subsolve_1}.
When the value is of type \fortinline|character(len=*)|,
it is also treated as case insensitive.\\
\fortinline|info| & \fortinline|integer, intent(out)|.\\
& Error code. If no error, 0 is returned. See Section~\ref{sec:errors}
for details.
\end{tabular}
\noindent
A number of subdomain solvers can be chosen with this method;
a list of the parameters that can be set, along with their allowed and
default values, is given in Table-\ref{tab:p_subsolve_1}.\\
\bsideways
\begin{center}
\small
% \begin{tabular}{|p{3.6cm}|l|p{1.9cm}|p{3.6cm}|p{6.5cm}|}
\begin{tabular}{|p{3.2cm}|l|p{2.6cm}|p{2.6cm}|p{6.7cm}|}
\hline
\fortinline|what| & \textsc{data type} & \fortinline|val| & \textsc{default} &
\textsc{comments} \\ \hline
\fortinline|'SUB_SOLVE'| & \fortinline|character(len=*)|
& \fortinline|'ILU'| \par
\fortinline|'ILUT'| \par
\par \fortinline|'INVT'| \par \fortinline|'INVK'| \par \fortinline|'AINV'|
&
& The local solver to be used with the smoother or one-level
preconditioner ILU($p$), ILU($p,t$),
Approximate Inverses INVK($p,q$),
INVT($p_1,p2,t_1,t_2$) and
AINV($t$); note that approximate inverses
are specifically suited for GPUs since they
do not employ triangular system solve
kernels,
see~\cite{BERTACCINIFILIPPONE}.\\ \hline
\fortinline|'SUB_FILLIN'| & \fortinline|integer|
& Any integer \par number~$\ge 0$
& 0
& Fill-in level $p$ of the incomplete LU factorizations. \\ \hline
\fortinline|'SUB_ILUTHRS'| & \fortinline|real(kind_parameter)|
& Any real number~$\ge 0$
& 0
& Drop tolerance $t$ in the ILU($p,t$) factorization. \\ \hline
\fortinline|'ILU_ALG'| & \fortinline|character(len=*)|
& \fortinline|'MILU'|
& \fortinline|'NONE'|
& ILU algorithmic variant \\ \hline
\fortinline|'ILUT_SCALE'| & \fortinline|character(len=*)|
& \fortinline|'MAXVAL'| \par
\fortinline|'DIAG'| \par
\fortinline|'ARSWUM'| \par
\fortinline|'ARCSUM'| \par
\fortinline|'ACLSUM'| \par
\fortinline|'NONE'|
& \fortinline|'NONE'|
& ILU scaling strategy \\ \hline
\fortinline|'INV_FILLIN'| & \fortinline|integer|
& Any integer \par number~$\ge 0$
& 0
& Second fill-in level $q$ of the INVK($p,q$)
approximate inverse. \\ \hline
\fortinline|'INV_ILUTHRS'| & \fortinline|real(kind_parameter)|
& Any real number~$\ge 0$
& 0
& Second drop tolerance $s$ in the
INVT($t,s$) approximate inverse. \\ \hline
\fortinline|'AINV_ALG'| & \fortinline|character(len=*)|
& \fortinline|'LLK'| \par
\fortinline|'SYM-LLK'| \par
\fortinline|'STAB-LLK'| \par
\fortinline|'MLK,LMX'|
& \fortinline|'LLK'|
& AINV algorithmic strategy. \\ \hline
\end{tabular}
\end{center}
\caption{Parameters defining the solver of the BJAC
preconditioner.\label{tab:p_subsolve_1}}
\esideways
\clearpage\subsection{build --- Builds a preconditioner} \clearpage\subsection{build --- Builds a preconditioner}
\begin{verbatim} \begin{verbatim}

@ -17,6 +17,8 @@
\newtheorem{theorem}{Theorem} \newtheorem{theorem}{Theorem}
\newtheorem{corollary}{Corollary} \newtheorem{corollary}{Corollary}
\usepackage{listings} \usepackage{listings}
\usepackage{rotating}
\usepackage{microtype}
\usepackage{algorithm2e} \usepackage{algorithm2e}
\usepackage{minted} \usepackage{minted}
\usemintedstyle{friendly} \usemintedstyle{friendly}
@ -91,12 +93,14 @@
\newcommand{\example}{\stepcounter{example}% \newcommand{\example}{\stepcounter{example}%
\section*{\examplename~\theexample}} \section*{\examplename~\theexample}}
\newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_prec\_type}}} \newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_Tprec\_type}}}
\newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}} \newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}}
\newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_Tspmat\_type}}} \newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_Tspmat\_type}}}
\newcommand{\vdata}{\hyperlink{vdata}{{\tt psb\_T\_vect\_type}}} \newcommand{\vdata}{\hyperlink{vdata}{{\tt psb\_T\_vect\_type}}}
\newcommand{\spbasedata}{\hypertarget{spbasedata}{{\tt psb\_T\_base\_sparse\_mat}}} \newcommand{\spbasedata}{\hypertarget{spbasedata}{{\tt psb\_T\_base\_sparse\_mat}}}
\newcommand{\vbasedata}{\hypertarget{vbasedata}{{\tt psb\_T\_base\_vect\_type}}} \newcommand{\vbasedata}{\hypertarget{vbasedata}{{\tt psb\_T\_base\_vect\_type}}}
\def\bsideways{\begin{sidewaystable}}
\def\esideways{\end{sidewaystable}}
\begin{document} \begin{document}
{ {

@ -17,8 +17,14 @@
\newtheorem{theorem}{Theorem} \newtheorem{theorem}{Theorem}
\newtheorem{corollary}{Corollary} \newtheorem{corollary}{Corollary}
\usepackage{listings} \usepackage{listings}
\usepackage{algorithm2e} \usepackage{rotating}
\usepackage{microtype} \usepackage{microtype}
\usepackage{algorithm2e}
\definecolor{bg}{rgb}{0.95,0.95,0.95}
\usepackage{breakurl}
\usepackage{mathpazo}
\usepackage[english]{babel}
\ifpdf \ifpdf
\newmintinline[fortinline]{fortran}{} \newmintinline[fortinline]{fortran}{}
\else% \else%
@ -78,12 +84,14 @@
\newcommand{\example}{\stepcounter{example}% \newcommand{\example}{\stepcounter{example}%
\section*{\examplename~\theexample}} \section*{\examplename~\theexample}}
\newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_prec\_type}}} \newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_Tprec\_type}}}
\newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}} \newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}}
\newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_Tspmat\_type}}} \newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_Tspmat\_type}}}
\newcommand{\vdata}{\hyperlink{vdata}{{\tt psb\_T\_vect\_type}}} \newcommand{\vdata}{\hyperlink{vdata}{{\tt psb\_T\_vect\_type}}}
\newcommand{\spbasedata}{\hypertarget{spbasedata}{{\tt psb\_T\_base\_sparse\_mat}}} \newcommand{\spbasedata}{\hypertarget{spbasedata}{{\tt psb\_T\_base\_sparse\_mat}}}
\newcommand{\vbasedata}{\hypertarget{vbasedata}{{\tt psb\_T\_base\_vect\_type}}} \newcommand{\vbasedata}{\hypertarget{vbasedata}{{\tt psb\_T\_base\_vect\_type}}}
\def\bsideways{\begin{table}}
\def\esideways{\end{table}}
\begin{document} \begin{document}
\lstset{language=Fortran} \lstset{language=Fortran}

@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas
# #
# Libraries used # Libraries used
LIBDIR=$(BASEDIR)/lib LIBDIR=$(BASEDIR)/lib
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS) LDLIBS=$(PSBLDLIBS)
# #
# Compilers and such # Compilers and such

@ -6,7 +6,7 @@ INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/ MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas include $(INCDIR)/Make.inc.psblas
LIBDIR=$(INSTALLDIR)/lib/ LIBDIR=$(INSTALLDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS) LDLIBS=$(PSBLDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).

@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas
# #
# Libraries used # Libraries used
LIBDIR=$(INSTALLDIR)/lib LIBDIR=$(INSTALLDIR)/lib
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS) LDLIBS=$(PSBLDLIBS)
# #
# Compilers and such # Compilers and such

@ -658,7 +658,7 @@ end module psb_d_pde3d_mod
program psb_d_pde3d program psb_d_pde3d
use psb_base_mod use psb_base_mod
use psb_prec_mod use psb_prec_mod
use psb_krylov_mod use psb_linsolve_mod
use psb_util_mod use psb_util_mod
use psb_d_pde3d_mod use psb_d_pde3d_mod
#if defined(OPENMP) #if defined(OPENMP)

@ -8,11 +8,11 @@ CSR Storage format for matrix A: CSR COO
0200 MAXIT 0200 MAXIT
10 ITRACE 10 ITRACE
002 IRST restart for RGMRES and BiCGSTABL 002 IRST restart for RGMRES and BiCGSTABL
INVK Block Solver ILU,ILUT,INVK,AINVT,AORTH INVK Block Solver ILU,ILUT,INVK,INVT,AINV
NONE If ILU : MILU or NONE othewise ignored NONE If ILU : MILU or NONE othewise ignored
NONE Scaling if ILUT: NONE, MAXVAL otherwise ignored NONE Scaling if ILUT: NONE, MAXVAL otherwise ignored
0 Level of fill for forward factorization 0 Level of fill for forward factorization
1 Level of fill for inverse factorization (only INVK) 1 Level of fill for inverse factorization (only INVK,INVT)
1E-1 Threshold for forward factorization 1E-1 Threshold for forward factorization
1E-1 Threshold for inverse factorization (Only INVK, AINVT) 1E-1 Threshold for inverse factorization (Only INVK, INVT)
LLK What orthogonalization algorithm? (Only AINVT) LLK What orthogonalization algorithm? (Only AINV)

@ -6,7 +6,7 @@ INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/ MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas include $(INCDIR)/Make.inc.psblas
LIBDIR=$(INSTALLDIR)/lib/ LIBDIR=$(INSTALLDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS) LDLIBS=$(PSBLDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).

@ -3,7 +3,7 @@ INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/ MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas include $(INCDIR)/Make.inc.psblas
LIBDIR=$(INSTALLDIR)/lib/ LIBDIR=$(INSTALLDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS) LDLIBS=$(PSBLDLIBS)
CCOPT= -g CCOPT= -g
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).

@ -6,7 +6,7 @@ include $(INCDIR)/Make.inc.psblas
# Libraries used # Libraries used
# #
LIBDIR=$(INSTALLDIR)/lib/ LIBDIR=$(INSTALLDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS) LDLIBS=$(PSBLDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).

Loading…
Cancel
Save