Update docs for allocate/deallocate _wrk

development
sfilippone 1 week ago
parent 40f683effd
commit 65aac92c16

@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a <br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span> href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a <br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span> href="userhtmlse11.html#x17-14500011" id="QQ2-17-176">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a <br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span> href="userhtmlse12.html#x19-14800012" id="QQ2-19-179">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a <br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span> href="userhtmlse13.html#x20-15700013" id="QQ2-20-194">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a <br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span> href="userhtmlli2.html#x21-172000" id="QQ2-21-223">References</a></span>
</div> </div>

@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a <br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span> href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a <br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span> href="userhtmlse11.html#x17-14500011" id="QQ2-17-176">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a <br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span> href="userhtmlse12.html#x19-14800012" id="QQ2-19-179">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a <br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span> href="userhtmlse13.html#x20-15700013" id="QQ2-20-194">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a <br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span> href="userhtmlli2.html#x21-172000" id="QQ2-21-223">References</a></span>
</div> </div>

@ -11,7 +11,7 @@
</head><body </head><body
> >
<div class="footnote-text"> <div class="footnote-text">
<!--l. 72--><p class="indent" > <span class="footnote-mark"><a <!--l. 73--><p class="indent" > <span class="footnote-mark"><a
id="fn4x0"><a id="fn4x0"><a
id="x16-136002x10.1"></a> <sup class="textsuperscript">4</sup></a></span><span id="x16-136002x10.1"></a> <sup class="textsuperscript">4</sup></a></span><span
class="pplr7t-x-x-80">The string is case-insensitive</span></div> class="pplr7t-x-x-80">The string is case-insensitive</span></div>

@ -13,7 +13,7 @@
<div class="footnote-text"> <div class="footnote-text">
<!--l. 53--><p class="noindent" ><span class="footnote-mark"><a <!--l. 53--><p class="noindent" ><span class="footnote-mark"><a
id="fn5x0"><a id="fn5x0"><a
id="x18-144004x11.1"></a> <sup class="textsuperscript">5</sup></a></span><span id="x18-146004x11.1"></a> <sup class="textsuperscript">5</sup></a></span><span
class="pplr7t-x-x-80">Note: the implementation is for </span><span class="pplr7t-x-x-80">Note: the implementation is for </span><span
class="zplmr7m-x-x-80">FCG</span><span class="zplmr7m-x-x-80">FCG</span><span
class="zplmr7t-x-x-80">(</span><span class="zplmr7t-x-x-80">(</span><span

@ -308,52 +308,56 @@ href="userhtmlse10.html#x15-14000010.5" id="QQ2-15-171">descr &#8212; Prints a d
href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-172">clone &#8212; clone current preconditioner</a></span> href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-172">clone &#8212; clone current preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.7 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.7 <a
href="userhtmlse10.html#x15-14200010.7" id="QQ2-15-173">free &#8212; Free a preconditioner</a></span> href="userhtmlse10.html#x15-14200010.7" id="QQ2-15-173">free &#8212; Free a preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.8 <a
href="userhtmlse10.html#x15-14300010.8" id="QQ2-15-174">allocate_wrk &#8212; preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.9 <a
href="userhtmlse10.html#x15-14400010.9" id="QQ2-15-175">deallocate_wrk &#8212; preconditioner</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a <br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14300011">Iterative Methods</a></span> href="userhtmlse11.html#x17-14500011">Iterative Methods</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.1 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.1 <a
href="userhtmlse11.html#x17-14400011.1" id="QQ2-17-175">psb_krylov &#8212; Krylov Methods Driver Routine</a></span> href="userhtmlse11.html#x17-14600011.1" id="QQ2-17-177">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.2 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.2 <a
href="userhtmlse11.html#x17-14500011.2" id="QQ2-17-176">psb_richardson &#8212; Richardson Iteration Driver Routine</a></span> href="userhtmlse11.html#x17-14700011.2" id="QQ2-17-178">psb_richardson &#8212; Richardson Iteration Driver Routine</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a <br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14600012">Extensions</a></span> href="userhtmlse12.html#x19-14800012">Extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.1 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.1 <a
href="userhtmlse12.html#x19-14700012.1" id="QQ2-19-178">Using the extensions</a></span> href="userhtmlse12.html#x19-14900012.1" id="QQ2-19-180">Using the extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.2 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.2 <a
href="userhtmlse12.html#x19-14800012.2" id="QQ2-19-179">Extensions&#8217; Data Structures</a></span> href="userhtmlse12.html#x19-15000012.2" id="QQ2-19-181">Extensions&#8217; Data Structures</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.3 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.3 <a
href="userhtmlse12.html#x19-14900012.3" id="QQ2-19-182">CPU-class extensions</a></span> href="userhtmlse12.html#x19-15100012.3" id="QQ2-19-184">CPU-class extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.4 <a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.4 <a
href="userhtmlse12.html#x19-15400012.4" id="QQ2-19-191">CUDA-class extensions</a></span> href="userhtmlse12.html#x19-15600012.4" id="QQ2-19-193">CUDA-class extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a <br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15500013">CUDA Environment Routines</a></span> href="userhtmlse13.html#x20-15700013">CUDA Environment Routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-194">psb_cuda_init</a></span> href="userhtmlse13.html#Q1-20-196">psb_cuda_init</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-196">psb_cuda_exit</a></span> href="userhtmlse13.html#Q1-20-198">psb_cuda_exit</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-198">psb_cuda_DeviceSync</a></span> href="userhtmlse13.html#Q1-20-200">psb_cuda_DeviceSync</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-200">psb_cuda_getDeviceCount</a></span> href="userhtmlse13.html#Q1-20-202">psb_cuda_getDeviceCount</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-202">psb_cuda_getDevice</a></span> href="userhtmlse13.html#Q1-20-204">psb_cuda_getDevice</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-204">psb_cuda_setDevice</a></span> href="userhtmlse13.html#Q1-20-206">psb_cuda_setDevice</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-206">psb_cuda_DeviceHasUVA</a></span> href="userhtmlse13.html#Q1-20-208">psb_cuda_DeviceHasUVA</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-208">psb_cuda_WarpSize</a></span> href="userhtmlse13.html#Q1-20-210">psb_cuda_WarpSize</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-210">psb_cuda_MultiProcessors</a></span> href="userhtmlse13.html#Q1-20-212">psb_cuda_MultiProcessors</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-212">psb_cuda_MaxThreadsPerMP</a></span> href="userhtmlse13.html#Q1-20-214">psb_cuda_MaxThreadsPerMP</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-214">psb_cuda_MaxRegisterPerBlock</a></span> href="userhtmlse13.html#Q1-20-216">psb_cuda_MaxRegisterPerBlock</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-216">psb_cuda_MemoryClockRate</a></span> href="userhtmlse13.html#Q1-20-218">psb_cuda_MemoryClockRate</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-218">psb_cuda_MemoryBusWidth</a></span> href="userhtmlse13.html#Q1-20-220">psb_cuda_MemoryBusWidth</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a <br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-220">psb_cuda_MemoryPeakBandwidth</a></span> href="userhtmlse13.html#Q1-20-222">psb_cuda_MemoryPeakBandwidth</a></span>
</div> </div>

@ -16,7 +16,7 @@ href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
href="#tailuserhtmlli2.html">tail</a>] [<a href="#tailuserhtmlli2.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div> href="userhtml.html# " >up</a>] </p></div>
<h3 class="likesectionHead"><a <h3 class="likesectionHead"><a
id="x21-170000"></a>References</h3> id="x21-172000"></a>References</h3>
<!--l. 2--><p class="noindent" > <!--l. 2--><p class="noindent" >
<div class="thebibliography"> <div class="thebibliography">
<p class="bibitem" ><span class="biblabel"> <p class="bibitem" ><span class="biblabel">

File diff suppressed because it is too large Load Diff

@ -16,7 +16,7 @@ href="userhtmlse10.html#tailuserhtmlse10.html" >prev-tail</a>] [<a
href="userhtmlse8.html#tailuserhtmlse11.html">tail</a>] [<a href="userhtmlse8.html#tailuserhtmlse11.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div> href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">11 </span> <a <h3 class="sectionHead"><span class="titlemark">11 </span> <a
id="x17-14300011"></a>Iterative Methods</h3> id="x17-14500011"></a>Iterative Methods</h3>
<!--l. 4--><p class="noindent" >In this chapter we provide routines for preconditioners and iterative methods. The <!--l. 4--><p class="noindent" >In this chapter we provide routines for preconditioners and iterative methods. The
interfaces for iterative methods are available in the module <span class="obeylines-h"><span class="verb"><span interfaces for iterative methods are available in the module <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_linsolve_mod</span></span></span>. class="cmtt-10">psb_linsolve_mod</span></span></span>.
@ -24,7 +24,7 @@ class="cmtt-10">psb_linsolve_mod</span></span></span>.
<h4 class="subsectionHead"><span class="titlemark">11.1 </span> <a <h4 class="subsectionHead"><span class="titlemark">11.1 </span> <a
id="x17-14400011.1"></a>psb_krylov &#8212; Krylov Methods Driver Routine</h4> id="x17-14600011.1"></a>psb_krylov &#8212; Krylov Methods Driver Routine</h4>
<!--l. 17--><p class="noindent" >This subroutine is a driver that provides a general interface for all the Krylov-Subspace <!--l. 17--><p class="noindent" >This subroutine is a driver that provides a general interface for all the Krylov-Subspace
family methods implemented in PSBLAS version 2. family methods implemented in PSBLAS version 2.
<!--l. 20--><p class="indent" > The stopping criterion can take the following values: <!--l. 20--><p class="indent" > The stopping criterion can take the following values:
@ -82,8 +82,8 @@ iteration.
<pre class="lstlisting" id="listing-218"><span class="label"><a <pre class="lstlisting" id="listing-219"><span class="label"><a
id="x17-144001r1"></a></span><span style="color:#000000"><span id="x17-146001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_krylov</span></span><span style="color:#000000"><span class="cmtt-10">psb_krylov</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -104,7 +104,7 @@ class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">info</span></span><span style="color:#000000"><span class="cmtt-10">info</span></span><span style="color:#000000"><span
class="cmtt-10">,&amp;</span></span> class="cmtt-10">,&amp;</span></span>
<span class="label"><a <span class="label"><a
id="x17-144002r2"></a></span><span id="x17-146002r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
@ -170,7 +170,7 @@ class="pplb7t-">FCG:</span> </dt><dd
class="description"> class="description">
<!--l. 52--><p class="noindent" >the Flexible Conjugate Gradient method<span class="footnote-mark"><a <!--l. 52--><p class="noindent" >the Flexible Conjugate Gradient method<span class="footnote-mark"><a
href="userhtml18.html#fn5x0"><sup class="textsuperscript">5</sup></a></span><a href="userhtml18.html#fn5x0"><sup class="textsuperscript">5</sup></a></span><a
id="x17-144003f5"></a> ; id="x17-146003f5"></a> ;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 55--><p class="noindent" > <!--l. 55--><p class="noindent" >
<span <span
@ -460,7 +460,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">11.2 </span> <a <h4 class="subsectionHead"><span class="titlemark">11.2 </span> <a
id="x17-14500011.2"></a>psb_richardson &#8212; Richardson Iteration Driver Routine</h4> id="x17-14700011.2"></a>psb_richardson &#8212; Richardson Iteration Driver Routine</h4>
<!--l. 158--><p class="noindent" >This subroutine is a driver implementig a Richardson iteration <!--l. 158--><p class="noindent" >This subroutine is a driver implementig a Richardson iteration
<div class="math-display" > <div class="math-display" >
<img <img
@ -524,8 +524,8 @@ class="zplmr7m-x-x-76">i</span></sub> the corresponding residual at the <span
class="zplmr7m-">i</span>-th class="zplmr7m-">i</span>-th
iteration. iteration.
<!--l. 179--> <!--l. 179-->
<pre class="lstlisting" id="listing-219"><span class="label"><a <pre class="lstlisting" id="listing-220"><span class="label"><a
id="x17-145001r1"></a></span><span style="color:#000000"><span id="x17-147001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_richardson</span></span><span style="color:#000000"><span class="cmtt-10">psb_richardson</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -544,7 +544,7 @@ class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">info</span></span><span style="color:#000000"><span class="cmtt-10">info</span></span><span style="color:#000000"><span
class="cmtt-10">,&amp;</span></span> class="cmtt-10">,&amp;</span></span>
<span class="label"><a <span class="label"><a
id="x17-145002r2"></a></span><span id="x17-147002r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span class="cmtt-10">&#x00A0;</span><span

@ -16,7 +16,7 @@ href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a
href="userhtmlse9.html#tailuserhtmlse12.html">tail</a>] [<a href="userhtmlse9.html#tailuserhtmlse12.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div> href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">12 </span> <a <h3 class="sectionHead"><span class="titlemark">12 </span> <a
id="x19-14600012"></a>Extensions</h3> id="x19-14800012"></a>Extensions</h3>
<!--l. 3--><p class="noindent" >The EXT, CUDA and RSB subdirectories contains a set of extensions to the base <!--l. 3--><p class="noindent" >The EXT, CUDA and RSB subdirectories contains a set of extensions to the base
library. The extensions provide additional storage formats beyond the ones already library. The extensions provide additional storage formats beyond the ones already
contained in the base library, as well as interfaces to: contained in the base library, as well as interfaces to:
@ -49,7 +49,7 @@ in&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XOurTechRep">23</a>]</span>. href="userhtmlli2.html#XOurTechRep">23</a>]</span>.
<!--l. 19--><p class="noindent" > <!--l. 19--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">12.1 </span> <a <h4 class="subsectionHead"><span class="titlemark">12.1 </span> <a
id="x19-14700012.1"></a>Using the extensions</h4> id="x19-14900012.1"></a>Using the extensions</h4>
<!--l. 21--><p class="noindent" >A sample application using the PSBLAS extensions will contain the following <!--l. 21--><p class="noindent" >A sample application using the PSBLAS extensions will contain the following
steps: steps:
<ul class="itemize1"> <ul class="itemize1">
@ -82,7 +82,7 @@ matrices):
<div class="minipage"><pre class="verbatim" id="verbatim-103"> <div class="minipage"><pre class="verbatim" id="verbatim-105">
program&#x00A0;my_cuda_test program&#x00A0;my_cuda_test
&#x00A0;&#x00A0;use&#x00A0;psb_base_mod &#x00A0;&#x00A0;use&#x00A0;psb_base_mod
&#x00A0;&#x00A0;use&#x00A0;psb_util_mod &#x00A0;&#x00A0;use&#x00A0;psb_util_mod
@ -142,7 +142,7 @@ speed of the sparse matrix-vector product with the various data structures inclu
in the library. in the library.
<!--l. 146--><p class="noindent" > <!--l. 146--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">12.2 </span> <a <h4 class="subsectionHead"><span class="titlemark">12.2 </span> <a
id="x19-14800012.2"></a>Extensions&#8217; Data Structures</h4> id="x19-15000012.2"></a>Extensions&#8217; Data Structures</h4>
<!--l. 150--><p class="noindent" >Access to the facilities provided by the EXT library is mainly achieved through <!--l. 150--><p class="noindent" >Access to the facilities provided by the EXT library is mainly achieved through
the data types that are provided within. The data classes are derived from the data types that are provided within. The data classes are derived from
the base classes in PSBLAS, through the Fortran&#x00A0;2003 mechanism of <span the base classes in PSBLAS, through the Fortran&#x00A0;2003 mechanism of <span
@ -153,20 +153,20 @@ href="userhtmlli2.html#XMRC:11">18</a>]</span>.
<!--l. 155--><p class="indent" > The data classes are divided between the general purpose CPU extensions, the <!--l. 155--><p class="indent" > The data classes are divided between the general purpose CPU extensions, the
GPU interfaces and the RSB interfaces. In the description we will make use of the GPU interfaces and the RSB interfaces. In the description we will make use of the
notation introduced in Table&#x00A0;<a notation introduced in Table&#x00A0;<a
href="#x19-148001r22">22<!--tex4ht:ref: tab:notation --></a>. href="#x19-150001r22">22<!--tex4ht:ref: tab:notation --></a>.
<div class="table"> <div class="table">
<!--l. 160--><p class="indent" > <a <!--l. 160--><p class="indent" > <a
id="x19-148001r22"></a><hr class="float"><div class="float" id="x19-150001r22"></a><hr class="float"><div class="float"
> >
<div class="caption" <div class="caption"
><span class="id">Table&#x00A0;22: </span><span ><span class="id">Table&#x00A0;22: </span><span
class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-148001r22 --> class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-150001r22 -->
<div class="center" <div class="center"
> >
<!--l. 162--><p class="noindent" > <!--l. 162--><p class="noindent" >
@ -276,7 +276,7 @@ class="td11"> </td></tr></table>
<a <a
id="x19-148002r5"></a> id="x19-150002r5"></a>
@ -285,18 +285,18 @@ src="mat.png" alt="PIC"
width="147" height="147" > width="147" height="147" >
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Figure&#x00A0;5: </span><span ><span class="id">Figure&#x00A0;5: </span><span
class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-148002r5 --> class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-150002r5 -->
<!--l. 198--><p class="indent" > </div><hr class="endfigure"> <!--l. 198--><p class="indent" > </div><hr class="endfigure">
<h4 class="subsectionHead"><span class="titlemark">12.3 </span> <a <h4 class="subsectionHead"><span class="titlemark">12.3 </span> <a
id="x19-14900012.3"></a>CPU-class extensions</h4> id="x19-15100012.3"></a>CPU-class extensions</h4>
<!--l. 203--><p class="noindent" > <!--l. 203--><p class="noindent" >
<h5 class="likesubsubsectionHead"><a <h5 class="likesubsubsectionHead"><a
id="x19-150000"></a>ELLPACK</h5> id="x19-152000"></a>ELLPACK</h5>
<!--l. 205--><p class="noindent" >The ELLPACK/ITPACK format (shown in Figure&#x00A0;<a <!--l. 205--><p class="noindent" >The ELLPACK/ITPACK format (shown in Figure&#x00A0;<a
href="#x19-150001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional href="#x19-152001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional
arrays <span class="obeylines-h"><span class="verb"><span arrays <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">JA</span></span></span> with <span class="obeylines-h"><span class="verb"><span class="cmtt-10">JA</span></span></span> with <span class="obeylines-h"><span class="verb"><span
@ -317,7 +317,7 @@ row.
<a <a
id="x19-150001r6"></a> id="x19-152001r6"></a>
@ -327,13 +327,13 @@ width="233" height="233" >
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Figure&#x00A0;6: </span><span ><span class="id">Figure&#x00A0;6: </span><span
class="content">ELLPACK compression of matrix in Figure&#x00A0;<a class="content">ELLPACK compression of matrix in Figure&#x00A0;<a
href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-150001r6 --> href="#x19-150002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-152001r6 -->
<!--l. 225--><p class="indent" > </div><hr class="endfigure"> <!--l. 225--><p class="indent" > </div><hr class="endfigure">
<a <a
id="x19-150002r1"></a> id="x19-152002r1"></a>
@ -343,8 +343,8 @@ href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
<!--l. 231--> <!--l. 231-->
<pre class="lstlisting" id="listing-220"><span class="label"><a <pre class="lstlisting" id="listing-221"><span class="label"><a
id="x19-150003r1"></a></span><span id="x19-152003r1"></a></span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
@ -353,7 +353,7 @@ class="cmtt-9">i</span></span><span style="color:#000000"><span
class="cmtt-9">=1,</span></span><span style="color:#000000"><span class="cmtt-9">=1,</span></span><span style="color:#000000"><span
class="cmtt-9">n</span></span> class="cmtt-9">n</span></span>
<span class="label"><a <span class="label"><a
id="x19-150004r2"></a></span><span id="x19-152004r2"></a></span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
@ -362,7 +362,7 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">t</span></span><span style="color:#000000"><span class="cmtt-9">t</span></span><span style="color:#000000"><span
class="cmtt-9">=0</span></span> class="cmtt-9">=0</span></span>
<span class="label"><a <span class="label"><a
id="x19-150005r3"></a></span><span id="x19-152005r3"></a></span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
@ -373,7 +373,7 @@ class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">=1,</span></span><span style="color:#000000"><span class="cmtt-9">=1,</span></span><span style="color:#000000"><span
class="cmtt-9">maxnzr</span></span> class="cmtt-9">maxnzr</span></span>
<span class="label"><a <span class="label"><a
id="x19-150006r4"></a></span><span id="x19-152006r4"></a></span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
@ -401,7 +401,7 @@ class="cmtt-9">,</span></span><span style="color:#000000"><span
class="cmtt-9">j</span></span><span style="color:#000000"><span class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">))</span></span> class="cmtt-9">))</span></span>
<span class="label"><a <span class="label"><a
id="x19-150007r5"></a></span><span id="x19-152007r5"></a></span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
@ -410,7 +410,7 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">do</span></span> class="cmtt-9">do</span></span>
<span class="label"><a <span class="label"><a
id="x19-150008r6"></a></span><span id="x19-152008r6"></a></span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
@ -423,7 +423,7 @@ class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="c
class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">t</span></span> class="cmtt-9">t</span></span>
<span class="label"><a <span class="label"><a
id="x19-150009r7"></a></span><span id="x19-152009r7"></a></span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
@ -431,9 +431,9 @@ class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style=
class="cmtt-9">do</span></span></pre> class="cmtt-9">do</span></span></pre>
<a <a
id="x19-150010r1"></a> id="x19-152010r1"></a>
<a <a
id="x19-150011"></a> id="x19-152011"></a>
<span <span
class="pplb7t-">Algorithm</span><span class="pplb7t-">Algorithm</span><span
class="pplb7t-">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format class="pplb7t-">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format
@ -446,7 +446,7 @@ class="zplmr7m-">y </span><span
class="zplmr7t-">= </span><span class="zplmr7t-">= </span><span
class="zplmr7m-">Ax </span>can be computed with the code shown in class="zplmr7m-">Ax </span>can be computed with the code shown in
Alg.&#x00A0;<a Alg.&#x00A0;<a
href="#x19-150010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and href="#x19-152010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and
two floating-point operations per inner iteration. two floating-point operations per inner iteration.
<!--l. 247--><p class="indent" > Unless all rows have exactly the same number of nonzeros, some of the <!--l. 247--><p class="indent" > Unless all rows have exactly the same number of nonzeros, some of the
coefficients in the <span class="obeylines-h"><span class="verb"><span coefficients in the <span class="obeylines-h"><span class="verb"><span
@ -455,12 +455,12 @@ overhead both in terms of memory space and redundant operations (multiplications
by zero). The overhead can be acceptable if: by zero). The overhead can be acceptable if:
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x19-150013x1"> class="enumerate" id="x19-152013x1">
<!--l. 253--><p class="noindent" >The maximum number of nonzeros per row is not much larger than the <!--l. 253--><p class="noindent" >The maximum number of nonzeros per row is not much larger than the
average; average;
</li> </li>
<li <li
class="enumerate" id="x19-150015x2"> class="enumerate" id="x19-152015x2">
<!--l. 255--><p class="noindent" >The regularity of the data structure allows for faster code, e.g. by allowing <!--l. 255--><p class="noindent" >The regularity of the data structure allows for faster code, e.g. by allowing
vectorization, thereby offsetting the additional storage requirements.</li></ol> vectorization, thereby offsetting the additional storage requirements.</li></ol>
<!--l. 259--><p class="noindent" >In the extreme case where the input matrix has one full row, the ELLPACK <!--l. 259--><p class="noindent" >In the extreme case where the input matrix has one full row, the ELLPACK
@ -473,7 +473,7 @@ class="cmtt-10">psb_T_ell_sparse_mat</span></span></span>:
<div class="center" <div class="center"
> >
<!--l. 281--><p class="noindent" > <!--l. 281--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-104"> <div class="minipage"><pre class="verbatim" id="verbatim-106">
&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_ell_sparse_mat &#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_ell_sparse_mat
&#x00A0;&#x00A0;&#x00A0;&#x00A0;! &#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;ITPACK/ELL&#x00A0;format,&#x00A0;extended. &#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;ITPACK/ELL&#x00A0;format,&#x00A0;extended.
@ -488,7 +488,7 @@ class="cmtt-10">psb_T_ell_sparse_mat</span></span></span>:
</pre> </pre>
<!--l. 295--><p class="nopar" > </div></div> <!--l. 295--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a <h5 class="likesubsubsectionHead"><a
id="x19-151000"></a>Hacked ELLPACK</h5> id="x19-153000"></a>Hacked ELLPACK</h5>
@ -564,7 +564,7 @@ format.
<a <a
id="x19-151001r7"></a> id="x19-153001r7"></a>
@ -574,7 +574,7 @@ width="248" height="248" >
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Figure&#x00A0;7: </span><span ><span class="id">Figure&#x00A0;7: </span><span
class="content">Hacked ELLPACK compression of matrix in Figure&#x00A0;<a class="content">Hacked ELLPACK compression of matrix in Figure&#x00A0;<a
href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-151001r7 --> href="#x19-150002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-153001r7 -->
@ -586,7 +586,7 @@ class="cmtt-10">psb_T_hll_sparse_mat</span></span></span>:
<div class="center" <div class="center"
> >
<!--l. 374--><p class="noindent" > <!--l. 374--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-105"> <div class="minipage"><pre class="verbatim" id="verbatim-107">
&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_hll_sparse_mat &#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_hll_sparse_mat
&#x00A0;&#x00A0;&#x00A0;&#x00A0;! &#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;HLL&#x00A0;format.&#x00A0;(Hacked&#x00A0;ELL) &#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;HLL&#x00A0;format.&#x00A0;(Hacked&#x00A0;ELL)
@ -601,9 +601,9 @@ class="cmtt-10">psb_T_hll_sparse_mat</span></span></span>:
</pre> </pre>
<!--l. 388--><p class="nopar" > </div></div> <!--l. 388--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a <h5 class="likesubsubsectionHead"><a
id="x19-152000"></a>Diagonal storage</h5> id="x19-154000"></a>Diagonal storage</h5>
<!--l. 396--><p class="noindent" >The DIAgonal (DIA) format (shown in Figure&#x00A0;<a <!--l. 396--><p class="noindent" >The DIAgonal (DIA) format (shown in Figure&#x00A0;<a
href="#x19-152001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span href="#x19-154001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> class="cmtt-10">AS</span></span></span>
containing in each column the coefficients along a diagonal of the matrix, and an containing in each column the coefficients along a diagonal of the matrix, and an
integer array <span class="obeylines-h"><span class="verb"><span integer array <span class="obeylines-h"><span class="verb"><span
@ -614,7 +614,7 @@ are padded with zeros as necessary.
class="zplmr7m-">y </span><span class="zplmr7m-">y </span><span
class="zplmr7t-">= </span><span class="zplmr7t-">= </span><span
class="zplmr7m-">Ax </span>is shown in Alg.&#x00A0;<a class="zplmr7m-">Ax </span>is shown in Alg.&#x00A0;<a
href="#x19-152003r2">2<!--tex4ht:ref: alg:dia --></a>; it href="#x19-154003r2">2<!--tex4ht:ref: alg:dia --></a>; it
costs one memory read per outer iteration, plus three memory reads, one memory costs one memory read per outer iteration, plus three memory reads, one memory
write and two floating-point operations per inner iteration. The accesses to write and two floating-point operations per inner iteration. The accesses to
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
@ -627,7 +627,7 @@ required.
<a <a
id="x19-152001r8"></a> id="x19-154001r8"></a>
@ -637,13 +637,13 @@ width="248" height="248" >
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Figure&#x00A0;8: </span><span ><span class="id">Figure&#x00A0;8: </span><span
class="content">DIA compression of matrix in Figure&#x00A0;<a class="content">DIA compression of matrix in Figure&#x00A0;<a
href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-152001r8 --> href="#x19-150002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-154001r8 -->
<!--l. 419--><p class="indent" > </div><hr class="endfigure"> <!--l. 419--><p class="indent" > </div><hr class="endfigure">
<a <a
id="x19-152002r2"></a> id="x19-154002r2"></a>
@ -655,7 +655,7 @@ href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
<div class="center" <div class="center"
> >
<!--l. 437--><p class="noindent" > <!--l. 437--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-106"> <div class="minipage"><pre class="verbatim" id="verbatim-108">
&#x00A0;&#x00A0;&#x00A0;&#x00A0;do&#x00A0;j=1,ndiag &#x00A0;&#x00A0;&#x00A0;&#x00A0;do&#x00A0;j=1,ndiag
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;if&#x00A0;(offset(j)&#x00A0;&#x003E;&#x00A0;0)&#x00A0;then &#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;if&#x00A0;(offset(j)&#x00A0;&#x003E;&#x00A0;0)&#x00A0;then
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;ir1&#x00A0;=&#x00A0;1;&#x00A0;ir2&#x00A0;=&#x00A0;m&#x00A0;-&#x00A0;offset(j); &#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;ir1&#x00A0;=&#x00A0;1;&#x00A0;ir2&#x00A0;=&#x00A0;m&#x00A0;-&#x00A0;offset(j);
@ -669,9 +669,9 @@ href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
</pre> </pre>
<!--l. 450--><p class="nopar" > </div></div> <!--l. 450--><p class="nopar" > </div></div>
<a <a
id="x19-152003r2"></a> id="x19-154003r2"></a>
<a <a
id="x19-152004"></a> id="x19-154004"></a>
<span <span
class="pplb7t-">Algorithm</span><span class="pplb7t-">Algorithm</span><span
class="pplb7t-">&#x00A0;2:</span>&#x00A0; Matrix-Vector product in DIA format class="pplb7t-">&#x00A0;2:</span>&#x00A0; Matrix-Vector product in DIA format
@ -684,7 +684,7 @@ class="cmtt-10">psb_T_dia_sparse_mat</span></span></span>:
<div class="center" <div class="center"
> >
<!--l. 473--><p class="noindent" > <!--l. 473--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-107"> <div class="minipage"><pre class="verbatim" id="verbatim-109">
&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_dia_sparse_mat &#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_dia_sparse_mat
&#x00A0;&#x00A0;&#x00A0;&#x00A0;! &#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;DIA&#x00A0;format,&#x00A0;extended. &#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;DIA&#x00A0;format,&#x00A0;extended.
@ -698,7 +698,7 @@ class="cmtt-10">psb_T_dia_sparse_mat</span></span></span>:
</pre> </pre>
<!--l. 486--><p class="nopar" > </div></div> <!--l. 486--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a <h5 class="likesubsubsectionHead"><a
id="x19-153000"></a>Hacked DIA</h5> id="x19-155000"></a>Hacked DIA</h5>
<!--l. 495--><p class="noindent" >Storage by DIAgonals is an attractive option for matrices whose coefficients are <!--l. 495--><p class="noindent" >Storage by DIAgonals is an attractive option for matrices whose coefficients are
located on a small set of diagonals, since they do away with storing explicitly the located on a small set of diagonals, since they do away with storing explicitly the
indices and therefore reduce significantly memory traffic. However, having a few indices and therefore reduce significantly memory traffic. However, having a few
@ -749,7 +749,7 @@ class="pplri7t-">hackOffsets[k]</span>.
<a <a
id="x19-153001r9"></a> id="x19-155001r9"></a>
@ -759,7 +759,7 @@ width="248" height="248" >
<br /> <div class="caption" <br /> <div class="caption"
><span class="id">Figure&#x00A0;9: </span><span ><span class="id">Figure&#x00A0;9: </span><span
class="content">Hacked DIA compression of matrix in Figure&#x00A0;<a class="content">Hacked DIA compression of matrix in Figure&#x00A0;<a
href="#x19-148002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-153001r9 --> href="#x19-150002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-155001r9 -->
@ -769,7 +769,7 @@ class="cmtt-10">psb_T_hdia_sparse_mat</span></span></span>:
<div class="center" <div class="center"
> >
<!--l. 568--><p class="noindent" > <!--l. 568--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-108"> <div class="minipage"><pre class="verbatim" id="verbatim-110">
&#x00A0;&#x00A0;type&#x00A0;pm &#x00A0;&#x00A0;type&#x00A0;pm
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;&#x00A0;::&#x00A0;data(:,:) &#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;&#x00A0;::&#x00A0;data(:,:)
&#x00A0;&#x00A0;end&#x00A0;type&#x00A0;pm &#x00A0;&#x00A0;end&#x00A0;type&#x00A0;pm
@ -804,7 +804,7 @@ class="cmtt-10">psb_T_hdia_sparse_mat</span></span></span>:
<h4 class="subsectionHead"><span class="titlemark">12.4 </span> <a <h4 class="subsectionHead"><span class="titlemark">12.4 </span> <a
id="x19-15400012.4"></a>CUDA-class extensions</h4> id="x19-15600012.4"></a>CUDA-class extensions</h4>
<!--l. 4--><p class="noindent" >For computing with CUDA we define a dual memorization strategy in which each <!--l. 4--><p class="noindent" >For computing with CUDA we define a dual memorization strategy in which each
variable on the CPU (&#8220;host&#8221;) side has a GPU (&#8220;device&#8221;) side. When a GPU-type variable on the CPU (&#8220;host&#8221;) side has a GPU (&#8220;device&#8221;) side. When a GPU-type
variable is initialized, the data contained is (usually) the same on both sides. Each variable is initialized, the data contained is (usually) the same on both sides. Each
@ -846,7 +846,7 @@ a matrix-vector product
<div class="center" <div class="center"
> >
<!--l. 39--><p class="noindent" > <!--l. 39--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-109"> <div class="minipage"><pre class="verbatim" id="verbatim-111">
&#x00A0;&#x00A0;&#x00A0;&#x00A0;call&#x00A0;psb_spmm(alpha,a,x,beta,y,desc_a,info) &#x00A0;&#x00A0;&#x00A0;&#x00A0;call&#x00A0;psb_spmm(alpha,a,x,beta,y,desc_a,info)
</pre> </pre>
<!--l. 43--><p class="nopar" > </div></div> <!--l. 43--><p class="nopar" > </div></div>

@ -16,16 +16,16 @@ href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail</a>] [<a
href="userhtmlse10.html#tailuserhtmlse13.html">tail</a>] [<a href="userhtmlse10.html#tailuserhtmlse13.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div> href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">13 </span> <a <h3 class="sectionHead"><span class="titlemark">13 </span> <a
id="x20-15500013"></a>CUDA Environment Routines</h3> id="x20-15700013"></a>CUDA Environment Routines</h3>
<!--l. 91--><p class="noindent" > <!--l. 91--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-156000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4> id="x20-158000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4>
<a <a
id="Q1-20-194"></a> id="Q1-20-196"></a>
<div class="center" <div class="center"
> >
<!--l. 99--><p class="noindent" > <!--l. 99--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-110"> <div class="minipage"><pre class="verbatim" id="verbatim-112">
call&#x00A0;psb_cuda_init(ctxt&#x00A0;[,&#x00A0;device]) call&#x00A0;psb_cuda_init(ctxt&#x00A0;[,&#x00A0;device])
</pre> </pre>
<!--l. 103--><p class="nopar" > </div></div> <!--l. 103--><p class="nopar" > </div></div>
@ -64,17 +64,17 @@ class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x20-156002x1"> class="enumerate" id="x20-158002x1">
<!--l. 125--><p class="noindent" >A call to this routine must precede any other PSBLAS-CUDA call.</li></ol> <!--l. 125--><p class="noindent" >A call to this routine must precede any other PSBLAS-CUDA call.</li></ol>
<!--l. 129--><p class="noindent" > <!--l. 129--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-157000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4> id="x20-159000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4>
<a <a
id="Q1-20-196"></a> id="Q1-20-198"></a>
<div class="center" <div class="center"
> >
<!--l. 137--><p class="noindent" > <!--l. 137--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-111"> <div class="minipage"><pre class="verbatim" id="verbatim-113">
call&#x00A0;psb_cuda_exit(ctxt) call&#x00A0;psb_cuda_exit(ctxt)
</pre> </pre>
<!--l. 141--><p class="nopar" > </div></div> <!--l. 141--><p class="nopar" > </div></div>
@ -106,16 +106,16 @@ class="pplb7t-">in</span>.<br
class="newline" />Specified as: an integer variable.</dd></dl> class="newline" />Specified as: an integer variable.</dd></dl>
<!--l. 161--><p class="noindent" > <!--l. 161--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-158000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4> id="x20-160000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4>
<a <a
id="Q1-20-198"></a> id="Q1-20-200"></a>
<div class="center" <div class="center"
> >
<!--l. 169--><p class="noindent" > <!--l. 169--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-112"> <div class="minipage"><pre class="verbatim" id="verbatim-114">
call&#x00A0;psb_cuda_DeviceSync() call&#x00A0;psb_cuda_DeviceSync()
</pre> </pre>
<!--l. 173--><p class="nopar" > </div></div> <!--l. 173--><p class="nopar" > </div></div>
@ -123,26 +123,26 @@ call&#x00A0;psb_cuda_DeviceSync()
CUDA-side code, have completed. CUDA-side code, have completed.
<!--l. 182--><p class="noindent" > <!--l. 182--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-159000"></a>psb_cuda_getDeviceCount </h4> id="x20-161000"></a>psb_cuda_getDeviceCount </h4>
<a <a
id="Q1-20-200"></a> id="Q1-20-202"></a>
<div class="center" <div class="center"
> >
<!--l. 190--><p class="noindent" > <!--l. 190--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-113"> <div class="minipage"><pre class="verbatim" id="verbatim-115">
ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDeviceCount() ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDeviceCount()
</pre> </pre>
<!--l. 194--><p class="nopar" > </div></div> <!--l. 194--><p class="nopar" > </div></div>
<!--l. 199--><p class="noindent" >Get number of devices available on current computing node. <!--l. 199--><p class="noindent" >Get number of devices available on current computing node.
<!--l. 201--><p class="noindent" > <!--l. 201--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-160000"></a>psb_cuda_getDevice </h4> id="x20-162000"></a>psb_cuda_getDevice </h4>
<a <a
id="Q1-20-202"></a> id="Q1-20-204"></a>
<div class="center" <div class="center"
> >
<!--l. 209--><p class="noindent" > <!--l. 209--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-114"> <div class="minipage"><pre class="verbatim" id="verbatim-116">
ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice() ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice()
</pre> </pre>
<!--l. 213--><p class="nopar" > </div></div> <!--l. 213--><p class="nopar" > </div></div>
@ -152,39 +152,39 @@ ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice()
<!--l. 220--><p class="noindent" > <!--l. 220--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-161000"></a>psb_cuda_setDevice </h4> id="x20-163000"></a>psb_cuda_setDevice </h4>
<a <a
id="Q1-20-204"></a> id="Q1-20-206"></a>
<div class="center" <div class="center"
> >
<!--l. 228--><p class="noindent" > <!--l. 228--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-115"> <div class="minipage"><pre class="verbatim" id="verbatim-117">
info&#x00A0;=&#x00A0;psb_cuda_setDevice(dev) info&#x00A0;=&#x00A0;psb_cuda_setDevice(dev)
</pre> </pre>
<!--l. 232--><p class="nopar" > </div></div> <!--l. 232--><p class="nopar" > </div></div>
<!--l. 237--><p class="noindent" >Set device to be used by current process. <!--l. 237--><p class="noindent" >Set device to be used by current process.
<!--l. 239--><p class="noindent" > <!--l. 239--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-162000"></a>psb_cuda_DeviceHasUVA </h4> id="x20-164000"></a>psb_cuda_DeviceHasUVA </h4>
<a <a
id="Q1-20-206"></a> id="Q1-20-208"></a>
<div class="center" <div class="center"
> >
<!--l. 247--><p class="noindent" > <!--l. 247--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-116"> <div class="minipage"><pre class="verbatim" id="verbatim-118">
hasUva&#x00A0;=&#x00A0;psb_cuda_DeviceHasUVA() hasUva&#x00A0;=&#x00A0;psb_cuda_DeviceHasUVA()
</pre> </pre>
<!--l. 251--><p class="nopar" > </div></div> <!--l. 251--><p class="nopar" > </div></div>
<!--l. 256--><p class="noindent" >Returns true if device currently in use supports UVA (Unified Virtual Addressing). <!--l. 256--><p class="noindent" >Returns true if device currently in use supports UVA (Unified Virtual Addressing).
<!--l. 259--><p class="noindent" > <!--l. 259--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-163000"></a>psb_cuda_WarpSize </h4> id="x20-165000"></a>psb_cuda_WarpSize </h4>
<a <a
id="Q1-20-208"></a> id="Q1-20-210"></a>
<div class="center" <div class="center"
> >
<!--l. 267--><p class="noindent" > <!--l. 267--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-117"> <div class="minipage"><pre class="verbatim" id="verbatim-119">
nw&#x00A0;=&#x00A0;psb_cuda_WarpSize() nw&#x00A0;=&#x00A0;psb_cuda_WarpSize()
</pre> </pre>
<!--l. 271--><p class="nopar" > </div></div> <!--l. 271--><p class="nopar" > </div></div>
@ -194,39 +194,39 @@ nw&#x00A0;=&#x00A0;psb_cuda_WarpSize()
<!--l. 279--><p class="noindent" > <!--l. 279--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-164000"></a>psb_cuda_MultiProcessors </h4> id="x20-166000"></a>psb_cuda_MultiProcessors </h4>
<a <a
id="Q1-20-210"></a> id="Q1-20-212"></a>
<div class="center" <div class="center"
> >
<!--l. 287--><p class="noindent" > <!--l. 287--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-118"> <div class="minipage"><pre class="verbatim" id="verbatim-120">
nmp&#x00A0;=&#x00A0;psb_cuda_MultiProcessors() nmp&#x00A0;=&#x00A0;psb_cuda_MultiProcessors()
</pre> </pre>
<!--l. 291--><p class="nopar" > </div></div> <!--l. 291--><p class="nopar" > </div></div>
<!--l. 296--><p class="noindent" >Returns the number of multiprocessors in the CUDA device. <!--l. 296--><p class="noindent" >Returns the number of multiprocessors in the CUDA device.
<!--l. 298--><p class="noindent" > <!--l. 298--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-165000"></a>psb_cuda_MaxThreadsPerMP </h4> id="x20-167000"></a>psb_cuda_MaxThreadsPerMP </h4>
<a <a
id="Q1-20-212"></a> id="Q1-20-214"></a>
<div class="center" <div class="center"
> >
<!--l. 306--><p class="noindent" > <!--l. 306--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-119"> <div class="minipage"><pre class="verbatim" id="verbatim-121">
nt&#x00A0;=&#x00A0;psb_cuda_MaxThreadsPerMP() nt&#x00A0;=&#x00A0;psb_cuda_MaxThreadsPerMP()
</pre> </pre>
<!--l. 310--><p class="nopar" > </div></div> <!--l. 310--><p class="nopar" > </div></div>
<!--l. 315--><p class="noindent" >Returns the maximum number of threads per multiprocessor. <!--l. 315--><p class="noindent" >Returns the maximum number of threads per multiprocessor.
<!--l. 318--><p class="noindent" > <!--l. 318--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-166000"></a>psb_cuda_MaxRegistersPerBlock </h4> id="x20-168000"></a>psb_cuda_MaxRegistersPerBlock </h4>
<a <a
id="Q1-20-214"></a> id="Q1-20-216"></a>
<div class="center" <div class="center"
> >
<!--l. 326--><p class="noindent" > <!--l. 326--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-120"> <div class="minipage"><pre class="verbatim" id="verbatim-122">
nr&#x00A0;=&#x00A0;psb_cuda_MaxRegistersPerBlock() nr&#x00A0;=&#x00A0;psb_cuda_MaxRegistersPerBlock()
</pre> </pre>
<!--l. 330--><p class="nopar" > </div></div> <!--l. 330--><p class="nopar" > </div></div>
@ -236,39 +236,39 @@ nr&#x00A0;=&#x00A0;psb_cuda_MaxRegistersPerBlock()
<!--l. 338--><p class="noindent" > <!--l. 338--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-167000"></a>psb_cuda_MemoryClockRate </h4> id="x20-169000"></a>psb_cuda_MemoryClockRate </h4>
<a <a
id="Q1-20-216"></a> id="Q1-20-218"></a>
<div class="center" <div class="center"
> >
<!--l. 346--><p class="noindent" > <!--l. 346--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-121"> <div class="minipage"><pre class="verbatim" id="verbatim-123">
cl&#x00A0;=&#x00A0;psb_cuda_MemoryClockRate() cl&#x00A0;=&#x00A0;psb_cuda_MemoryClockRate()
</pre> </pre>
<!--l. 350--><p class="nopar" > </div></div> <!--l. 350--><p class="nopar" > </div></div>
<!--l. 355--><p class="noindent" >Returns the memory clock rate in KHz, as an integer. <!--l. 355--><p class="noindent" >Returns the memory clock rate in KHz, as an integer.
<!--l. 357--><p class="noindent" > <!--l. 357--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-168000"></a>psb_cuda_MemoryBusWidth </h4> id="x20-170000"></a>psb_cuda_MemoryBusWidth </h4>
<a <a
id="Q1-20-218"></a> id="Q1-20-220"></a>
<div class="center" <div class="center"
> >
<!--l. 365--><p class="noindent" > <!--l. 365--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-122"> <div class="minipage"><pre class="verbatim" id="verbatim-124">
nb&#x00A0;=&#x00A0;psb_cuda_MemoryBusWidth() nb&#x00A0;=&#x00A0;psb_cuda_MemoryBusWidth()
</pre> </pre>
<!--l. 369--><p class="nopar" > </div></div> <!--l. 369--><p class="nopar" > </div></div>
<!--l. 374--><p class="noindent" >Returns the memory bus width in bits. <!--l. 374--><p class="noindent" >Returns the memory bus width in bits.
<!--l. 376--><p class="noindent" > <!--l. 376--><p class="noindent" >
<h4 class="likesubsectionHead"><a <h4 class="likesubsectionHead"><a
id="x20-169000"></a>psb_cuda_MemoryPeakBandwidth </h4> id="x20-171000"></a>psb_cuda_MemoryPeakBandwidth </h4>
<a <a
id="Q1-20-220"></a> id="Q1-20-222"></a>
<div class="center" <div class="center"
> >
<!--l. 384--><p class="noindent" > <!--l. 384--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-123"> <div class="minipage"><pre class="verbatim" id="verbatim-125">
bw&#x00A0;=&#x00A0;psb_cuda_MemoryPeakBandwidth() bw&#x00A0;=&#x00A0;psb_cuda_MemoryPeakBandwidth()
</pre> </pre>
<!--l. 388--><p class="nopar" > </div></div> <!--l. 388--><p class="nopar" > </div></div>

File diff suppressed because one or more lines are too long

@ -1,6 +1,6 @@
\subsection{CUDA-class extensions} \subsection{CUDA-class extensions}
\label{sec:cudastruct}
For computing with CUDA we define a dual memorization strategy in For computing with CUDA we define a dual memorization strategy in
which each variable on the CPU (``host'') side has a GPU (``device'') which each variable on the CPU (``host'') side has a GPU (``device'')
side. When a GPU-type variable is initialized, the data contained is side. When a GPU-type variable is initialized, the data contained is

@ -3,11 +3,12 @@
% \section{Preconditioners} % \section{Preconditioners}
\label{sec:psprecs} \label{sec:psprecs}
The base PSBLAS library contains the implementation of two simple The base PSBLAS library contains the implementation of some simple
preconditioning techniques: preconditioning techniques:
\begin{itemize} \begin{itemize}
\item Diagonal Scaling \item Diagonal Scaling
\item Block Jacobi with ILU(0) factorization \item Block Jacobi with ILU(0) factorization
\item Block Jacobi with an approximate inverse
%% \item Additive Schwarz with the Restricted Additive Schwarz and %% \item Additive Schwarz with the Restricted Additive Schwarz and
%% Additive Schwarz with Harmonic extensions; %% Additive Schwarz with Harmonic extensions;
\end{itemize} \end{itemize}
@ -311,7 +312,9 @@ Type: {\bf required} \\
Intent: {\bf out}.\\ Intent: {\bf out}.\\
An integer value; 0 means no error has been detected. An integer value; 0 means no error has been detected.
\end{description} \end{description}
{\par\noindent\large\bfseries Notes}
This method is almost always called by the iterative methods of
Sec.~\ref{sec:methods}, and practically never directly by the user.
\clearpage\subsection{descr --- Prints a description of current \clearpage\subsection{descr --- Prints a description of current
@ -417,6 +420,99 @@ Error code: if no error, 0 is returned.
%% installed; see~\cite{SUPERLU,UMFPACK}. %% installed; see~\cite{SUPERLU,UMFPACK}.
Releases all internal storage. Releases all internal storage.
\clearpage\subsection{allocate\_wrk --- preconditioner}
\label{sec:allocatewrk}
\begin{verbatim}
call prec%allocate_wrk(info[,vmold])
\end{verbatim}
\begin{description}
\item[Type:] Synchronous.
\item[\bf On Entry]
\item[prec] the preconditioner.\\
Scope: {\bf local}.\\
Type: {\bf required}\\
Intent: {\bf inout}.\\
Specified as: a preconditioner data structure \precdata.
\item[vmold] The desired dynamic type for the internal vector storage.\\
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf in}.\\
Specified as: an object of a class derived from \vbasedata.
\item[\bf On Exit]
\item[prec]
Scope: {\bf local} \\
Type: {\bf required}\\
Intent: {\bf inout}.\\
Specified as: a preconditioner data structure \precdata.
\item[info]
Scope: {\bf global} \\
Type: {\bf required}\\
Intent: {\bf out}.\\
Error code: if no error, 0 is returned.
\end{description}
{\par\noindent\large\bfseries Notes}
%% The PSBLAS 2.0 contains a number of preconditioners, ranging from a
%% simple diagonal scaling to 2-level domain decomposition. These
%% preconditioners may use the SuperLU or the UMFPACK software, if
%% installed; see~\cite{SUPERLU,UMFPACK}.
Preconditioners often need internal work storage during their
application at each iteration of a linear solver method: in many
situations this can be accomplished by allocating and releasing
memory ``on the fly''. However, when running on an accelerator through
e.g. the CUDA enabled data strutures of Sec.~\ref{sec:cudastruct} and
~\ref{sec:cudaenv}, memory allocation and deallocation usually have a
much larger overhead, significantly affecting performance. To
alleviate this problem we define this method that preallocates
internal storage; it is intended to be invoked prior to the iterative
solver method, so that the necessary internal scratch storage is
available throughout the iterative method application.
When using GPUs or other specialized devices, the \fortinline|vmold|
argument is also necessary to ensure the internal work vectors are of
the appropriate dynamic type to exploit the accelerator hardware.
\clearpage\subsection{deallocate\_wrk --- preconditioner}
\begin{verbatim}
call prec%allocate_wrk(info)
call prec%free_wrk(info)
\end{verbatim}
\begin{description}
\item[Type:] Synchronous.
\item[\bf On Entry]
\item[prec] the preconditioner.\\
Scope: {\bf local}.\\
Type: {\bf required}\\
Intent: {\bf inout}.\\
Specified as: a preconditioner data structure \precdata.
\item[\bf On Exit]
\item[prec]
Scope: {\bf local} \\
Type: {\bf required}\\
Intent: {\bf inout}.\\
Specified as: a preconditioner data structure \precdata.
\item[info]
Scope: {\bf global} \\
Type: {\bf required}\\
Intent: {\bf out}.\\
Error code: if no error, 0 is returned.
\end{description}
{\par\noindent\large\bfseries Notes}
%% The PSBLAS 2.0 contains a number of preconditioners, ranging from a
%% simple diagonal scaling to 2-level domain decomposition. These
%% preconditioners may use the SuperLU or the UMFPACK software, if
%% installed; see~\cite{SUPERLU,UMFPACK}.
Deallocates preconditioner internal work storage; to be invoked after an
iterative solver has completed execution, see the discussion in
Sec.~\ref{sec:allocatewrk}.
%%% Local Variables: %%% Local Variables:
%%% mode: latex %%% mode: latex
%%% TeX-master: "userguide" %%% TeX-master: "userguide"

Loading…
Cancel
Save