Updatex docs for linsolve

repack-newsolve
sfilippone 3 months ago
parent 4f4006cf6b
commit a02440afff

@ -9,7 +9,7 @@ FINCLUDES=$(FMFLAG). $(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR)
CINCLUDES=-I. -I$(HERE) -I$(INCLUDEDIR)
PSBC_LIBS= -L$(LIBDIR) -lpsb_cbind
PSB_LIBS=-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base -L$(LIBDIR)
PSB_LIBS=-lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base -L$(LIBDIR)
#
# Compilers and such

@ -54,11 +54,11 @@ href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a
<br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
href="userhtmlse12.html#x19-14500012" id="QQ2-19-175">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
href="userhtmlse13.html#x20-15400013" id="QQ2-20-190">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
href="userhtmlli2.html#x21-169000" id="QQ2-21-219">References</a></span>
</div>

@ -54,11 +54,11 @@ href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a
<br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
href="userhtmlse12.html#x19-14500012" id="QQ2-19-175">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
href="userhtmlse13.html#x20-15400013" id="QQ2-20-190">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
href="userhtmlli2.html#x21-169000" id="QQ2-21-219">References</a></span>
</div>

@ -310,46 +310,48 @@ href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-171">free &#8212; Free a prec
href="userhtmlse11.html#x17-14200011">Iterative Methods</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.1 <a
href="userhtmlse11.html#x17-14300011.1" id="QQ2-17-173">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.2 <a
href="userhtmlse11.html#x17-14400011.2" id="QQ2-17-174">psb_richardson &#8212; Richardson Iteration Driver Routine</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14400012">Extensions</a></span>
href="userhtmlse12.html#x19-14500012">Extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.1 <a
href="userhtmlse12.html#x19-14500012.1" id="QQ2-19-175">Using the extensions</a></span>
href="userhtmlse12.html#x19-14600012.1" id="QQ2-19-176">Using the extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.2 <a
href="userhtmlse12.html#x19-14600012.2" id="QQ2-19-176">Extensions&#8217; Data Structures</a></span>
href="userhtmlse12.html#x19-14700012.2" id="QQ2-19-177">Extensions&#8217; Data Structures</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.3 <a
href="userhtmlse12.html#x19-14700012.3" id="QQ2-19-179">CPU-class extensions</a></span>
href="userhtmlse12.html#x19-14800012.3" id="QQ2-19-180">CPU-class extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.4 <a
href="userhtmlse12.html#x19-15200012.4" id="QQ2-19-188">CUDA-class extensions</a></span>
href="userhtmlse12.html#x19-15300012.4" id="QQ2-19-189">CUDA-class extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15300013">CUDA Environment Routines</a></span>
href="userhtmlse13.html#x20-15400013">CUDA Environment Routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-191">psb_cuda_init</a></span>
href="userhtmlse13.html#Q1-20-192">psb_cuda_init</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-193">psb_cuda_exit</a></span>
href="userhtmlse13.html#Q1-20-194">psb_cuda_exit</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-195">psb_cuda_DeviceSync</a></span>
href="userhtmlse13.html#Q1-20-196">psb_cuda_DeviceSync</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-197">psb_cuda_getDeviceCount</a></span>
href="userhtmlse13.html#Q1-20-198">psb_cuda_getDeviceCount</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-199">psb_cuda_getDevice</a></span>
href="userhtmlse13.html#Q1-20-200">psb_cuda_getDevice</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-201">psb_cuda_setDevice</a></span>
href="userhtmlse13.html#Q1-20-202">psb_cuda_setDevice</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-203">psb_cuda_DeviceHasUVA</a></span>
href="userhtmlse13.html#Q1-20-204">psb_cuda_DeviceHasUVA</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-205">psb_cuda_WarpSize</a></span>
href="userhtmlse13.html#Q1-20-206">psb_cuda_WarpSize</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-207">psb_cuda_MultiProcessors</a></span>
href="userhtmlse13.html#Q1-20-208">psb_cuda_MultiProcessors</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-209">psb_cuda_MaxThreadsPerMP</a></span>
href="userhtmlse13.html#Q1-20-210">psb_cuda_MaxThreadsPerMP</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-211">psb_cuda_MaxRegisterPerBlock</a></span>
href="userhtmlse13.html#Q1-20-212">psb_cuda_MaxRegisterPerBlock</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-213">psb_cuda_MemoryClockRate</a></span>
href="userhtmlse13.html#Q1-20-214">psb_cuda_MemoryClockRate</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-215">psb_cuda_MemoryBusWidth</a></span>
href="userhtmlse13.html#Q1-20-216">psb_cuda_MemoryBusWidth</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-217">psb_cuda_MemoryPeakBandwidth</a></span>
href="userhtmlse13.html#Q1-20-218">psb_cuda_MemoryPeakBandwidth</a></span>
</div>

@ -16,7 +16,7 @@ href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
href="#tailuserhtmlli2.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="likesectionHead"><a
id="x21-168000"></a>References</h3>
id="x21-169000"></a>References</h3>
<!--l. 2--><p class="noindent" >
<div class="thebibliography">
<p class="bibitem" ><span class="biblabel">

@ -17,10 +17,9 @@ href="userhtmlse8.html#tailuserhtmlse11.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">11 </span> <a
id="x17-14200011"></a>Iterative Methods</h3>
<!--l. 4--><p class="noindent" >In this chapter we provide routines for preconditioners and iterative methods.
The interfaces for Krylov subspace methods are available in the module
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov_mod</span></span></span>.
<!--l. 4--><p class="noindent" >In this chapter we provide routines for preconditioners and iterative methods. The
interfaces for iterative methods are available in the module <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_linsolve_mod</span></span></span>.
@ -456,6 +455,344 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">11.2 </span> <a
id="x17-14400011.2"></a>psb_richardson &#8212; Richardson Iteration Driver Routine</h4>
<!--l. 158--><p class="noindent" >This subroutine is a driver implementig a Richardson iteration
<div class="math-display" >
<img
src="userhtml33x.png" alt="x = M - 1(b - Ax )+ x ,
k+1 k k
" class="math-display" ></div>
<!--l. 159--><p class="nopar" > with the preconditioner operator <span
class="cmmi-10">M </span>defined in the previous section.
<!--l. 162--><p class="indent" > The stopping criterion can take the following values:
<dl class="description"><dt class="description">
<!--l. 164--><p class="noindent" >
<span
class="cmbx-10">1</span> </dt><dd
class="description">
<!--l. 164--><p class="noindent" >normwise backward error in the infinity norm; the iteration is stopped
when
<div class="math-display" >
<img
src="userhtml34x.png" alt=" -----&#x2225;ri&#x2225;------
err = (&#x2225;A&#x2225;&#x2225;xi&#x2225;+ &#x2225;b&#x2225;) &#x003C; eps
" class="math-display" ></div>
<!--l. 166--><p class="nopar" >
</dd><dt class="description">
<!--l. 167--><p class="noindent" >
<span
class="cmbx-10">2</span> </dt><dd
class="description">
<!--l. 167--><p class="noindent" >Relative residual in the 2-norm; the iteration is stopped when
<div class="math-display" >
<img
src="userhtml35x.png" alt=" &#x2225;ri&#x2225;-
err = &#x2225;b&#x2225;2 &#x003C; eps
" class="math-display" ></div>
<!--l. 169--><p class="nopar" >
</dd><dt class="description">
<!--l. 170--><p class="noindent" >
<span
class="cmbx-10">3</span> </dt><dd
class="description">
<!--l. 170--><p class="noindent" >Relative residual reduction in the 2-norm; the iteration is stopped when
<div class="math-display" >
<img
src="userhtml36x.png" alt=" -&#x2225;ri&#x2225;-
err = &#x2225;r0&#x2225;2 &#x003C; eps
" class="math-display" ></div>
<!--l. 172--><p class="nopar" ></dd></dl>
<!--l. 174--><p class="noindent" >The behaviour is controlled by the istop argument (see later). In the above formulae, <span
class="cmmi-10">x</span><sub><span
class="cmmi-7">i</span></sub>
is the tentative solution and <span
class="cmmi-10">r</span><sub><span
class="cmmi-7">i</span></sub> = <span
class="cmmi-10">b </span><span
class="cmsy-10">- </span><span
class="cmmi-10">Ax</span><sub><span
class="cmmi-7">i</span></sub> the corresponding residual at the <span
class="cmmi-10">i</span>-th
iteration.
<!--l. 179-->
<pre class="lstlisting" id="listing-168"><span class="label"><a
id="x17-144001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_richardson</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">a</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">prec</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">b</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">x</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">eps</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">desc_a</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">info</span></span><span style="color:#000000"><span
class="cmtt-10">,&amp;</span></span>
<span class="label"><a
id="x17-144002r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-10">&amp;</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">itmax</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">iter</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">err</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">itrace</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">istop</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 184--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 185--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 185--><p class="noindent" >Synchronous.
</dd><dt class="description">
<!--l. 186--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 186--><p class="noindent" >
</dd><dt class="description">
<!--l. 187--><p class="noindent" >
<span
class="cmbx-10">a</span> </dt><dd
class="description">
<!--l. 187--><p class="noindent" >the local portion of global sparse matrix <span
class="cmmi-10">A</span>. <br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_Tspmat</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 193--><p class="noindent" >
<span
class="cmbx-10">prec</span> </dt><dd
class="description">
<!--l. 193--><p class="noindent" >The data structure containing the preconditioner.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 198--><p class="noindent" >
<span
class="cmbx-10">b</span> </dt><dd
class="description">
<!--l. 198--><p class="noindent" >The RHS vector. <br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 203--><p class="noindent" >
<span
class="cmbx-10">x</span> </dt><dd
class="description">
<!--l. 203--><p class="noindent" >The initial guess. <br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 208--><p class="noindent" >
<span
class="cmbx-10">eps</span> </dt><dd
class="description">
<!--l. 208--><p class="noindent" >The stopping tolerance. <br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a real number.
</dd><dt class="description">
<!--l. 213--><p class="noindent" >
<span
class="cmbx-10">desc</span><span
class="cmbx-10">_a</span> </dt><dd
class="description">
<!--l. 213--><p class="noindent" >contains data structures for communications.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#descdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_desc</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 218--><p class="noindent" >
<span
class="cmbx-10">itmax</span> </dt><dd
class="description">
<!--l. 218--><p class="noindent" >The maximum number of iterations to perform.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Default: <span
class="cmmi-10">itmax </span>= 1000.<br
class="newline" />Specified as: an integer variable <span
class="cmmi-10">itmax </span><span
class="cmsy-10">&#x2265; </span>1.
</dd><dt class="description">
<!--l. 224--><p class="noindent" >
<span
class="cmbx-10">itrace</span> </dt><dd
class="description">
<!--l. 224--><p class="noindent" >If <span
class="cmmi-10">&#x003E; </span>0 print out an informational message about convergence every <span
class="cmmi-10">itrace</span>
iterations. If = 0 print a message in case of convergence failure.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Default: <span
class="cmmi-10">itrace </span>= <span
class="cmsy-10">-</span>1.<br
class="newline" />
</dd><dt class="description">
<!--l. 232--><p class="noindent" >
<span
class="cmbx-10">istop</span> </dt><dd
class="description">
<!--l. 232--><p class="noindent" >An integer specifying the stopping criterion.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span>.<br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Values: 1: use the normwise backward error, 2: use the scaled 2-norm of
the residual, 3: use the residual reduction in the 2-norm. Default: 2.
</dd><dt class="description">
<!--l. 238--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 238--><p class="noindent" >
</dd><dt class="description">
<!--l. 239--><p class="noindent" >
<span
class="cmbx-10">x</span> </dt><dd
class="description">
<!--l. 239--><p class="noindent" >The computed solution. <br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 244--><p class="noindent" >
<span
class="cmbx-10">iter</span> </dt><dd
class="description">
<!--l. 244--><p class="noindent" >The number of iterations performed.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />Returned as: an integer variable.
</dd><dt class="description">
<!--l. 249--><p class="noindent" >
<span
class="cmbx-10">err</span> </dt><dd
class="description">
<!--l. 249--><p class="noindent" >The convergence estimate on exit.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />Returned as: a real number.
</dd><dt class="description">
<!--l. 254--><p class="noindent" >
<span
class="cmbx-10">info</span> </dt><dd
class="description">
<!--l. 254--><p class="noindent" >Error code.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>

@ -16,7 +16,7 @@ href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a
href="userhtmlse9.html#tailuserhtmlse12.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">12 </span> <a
id="x19-14400012"></a>Extensions</h3>
id="x19-14500012"></a>Extensions</h3>
<!--l. 3--><p class="noindent" >The EXT, CUDA and RSB subdirectories contains a set of extensions to the base
library. The extensions provide additional storage formats beyond the ones already
contained in the base library, as well as interfaces to:
@ -49,7 +49,7 @@ in&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XOurTechRep">22</a>]</span>.
<!--l. 19--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">12.1 </span> <a
id="x19-14500012.1"></a>Using the extensions</h4>
id="x19-14600012.1"></a>Using the extensions</h4>
<!--l. 21--><p class="noindent" >A sample application using the PSBLAS extensions will contain the following
steps:
<ul class="itemize1">
@ -142,7 +142,7 @@ speed of the sparse matrix-vector product with the various data structures inclu
in the library.
<!--l. 146--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">12.2 </span> <a
id="x19-14600012.2"></a>Extensions&#8217; Data Structures</h4>
id="x19-14700012.2"></a>Extensions&#8217; Data Structures</h4>
<!--l. 150--><p class="noindent" >Access to the facilities provided by the EXT library is mainly achieved through
the data types that are provided within. The data classes are derived from
the base classes in PSBLAS, through the Fortran&#x00A0;2003 mechanism of <span
@ -153,20 +153,20 @@ href="userhtmlli2.html#XMRC:11">17</a>]</span>.
<!--l. 155--><p class="indent" > The data classes are divided between the general purpose CPU extensions, the
GPU interfaces and the RSB interfaces. In the description we will make use of the
notation introduced in Table&#x00A0;<a
href="#x19-146001r21">21<!--tex4ht:ref: tab:notation --></a>.
href="#x19-147001r21">21<!--tex4ht:ref: tab:notation --></a>.
<div class="table">
<!--l. 160--><p class="indent" > <a
id="x19-146001r21"></a><hr class="float"><div class="float"
id="x19-147001r21"></a><hr class="float"><div class="float"
>
<div class="caption"
><span class="id">Table&#x00A0;21: </span><span
class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-146001r21 -->
class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-147001r21 -->
<div class="center"
>
<!--l. 162--><p class="noindent" >
@ -274,7 +274,7 @@ class="td11"> </td></tr></table>
<a
id="x19-146002r5"></a>
id="x19-147002r5"></a>
@ -283,18 +283,18 @@ src="mat.png" alt="PIC"
width="147" height="147" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;5: </span><span
class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-146002r5 -->
class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-147002r5 -->
<!--l. 198--><p class="indent" > </div><hr class="endfigure">
<h4 class="subsectionHead"><span class="titlemark">12.3 </span> <a
id="x19-14700012.3"></a>CPU-class extensions</h4>
id="x19-14800012.3"></a>CPU-class extensions</h4>
<!--l. 203--><p class="noindent" >
<h5 class="likesubsubsectionHead"><a
id="x19-148000"></a>ELLPACK</h5>
id="x19-149000"></a>ELLPACK</h5>
<!--l. 205--><p class="noindent" >The ELLPACK/ITPACK format (shown in Figure&#x00A0;<a
href="#x19-148001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional
href="#x19-149001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional
arrays <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">JA</span></span></span> with <span class="obeylines-h"><span class="verb"><span
@ -315,7 +315,7 @@ row.
<a
id="x19-148001r6"></a>
id="x19-149001r6"></a>
@ -325,13 +325,13 @@ width="233" height="233" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;6: </span><span
class="content">ELLPACK compression of matrix in Figure&#x00A0;<a
href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-148001r6 -->
href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-149001r6 -->
<!--l. 225--><p class="indent" > </div><hr class="endfigure">
<a
id="x19-148002r1"></a>
id="x19-149002r1"></a>
@ -341,8 +341,8 @@ href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
<!--l. 231-->
<pre class="lstlisting" id="listing-168"><span class="label"><a
id="x19-148003r1"></a></span><span
<pre class="lstlisting" id="listing-169"><span class="label"><a
id="x19-149003r1"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -352,7 +352,7 @@ class="cmtt-9">i</span></span><span style="color:#000000"><span
class="cmtt-9">=1,</span></span><span style="color:#000000"><span
class="cmtt-9">n</span></span>
<span class="label"><a
id="x19-148004r2"></a></span><span
id="x19-149004r2"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -362,7 +362,7 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">t</span></span><span style="color:#000000"><span
class="cmtt-9">=0</span></span>
<span class="label"><a
id="x19-148005r3"></a></span><span
id="x19-149005r3"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -374,7 +374,7 @@ class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">=1,</span></span><span style="color:#000000"><span
class="cmtt-9">maxnzr</span></span>
<span class="label"><a
id="x19-148006r4"></a></span><span
id="x19-149006r4"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -403,7 +403,7 @@ class="cmtt-9">,</span></span><span style="color:#000000"><span
class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">))</span></span>
<span class="label"><a
id="x19-148007r5"></a></span><span
id="x19-149007r5"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -413,7 +413,7 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">do</span></span>
<span class="label"><a
id="x19-148008r6"></a></span><span
id="x19-149008r6"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -427,7 +427,7 @@ class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="c
class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">t</span></span>
<span class="label"><a
id="x19-148009r7"></a></span><span
id="x19-149009r7"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -436,9 +436,9 @@ class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style=
class="cmtt-9">do</span></span></pre>
<a
id="x19-148010r1"></a>
id="x19-149010r1"></a>
<a
id="x19-148011"></a>
id="x19-149011"></a>
<span
class="cmbx-10">Algorithm</span><span
class="cmbx-10">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format
@ -450,7 +450,7 @@ class="cmbx-10">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format
class="cmmi-10">y </span>= <span
class="cmmi-10">Ax </span>can be computed with the code shown in
Alg.&#x00A0;<a
href="#x19-148010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and
href="#x19-149010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and
two floating-point operations per inner iteration.
<!--l. 247--><p class="indent" > Unless all rows have exactly the same number of nonzeros, some of the coefficients
in the <span class="obeylines-h"><span class="verb"><span
@ -459,12 +459,12 @@ in terms of memory space and redundant operations (multiplications by zero). The
overhead can be acceptable if:
<ol class="enumerate1" >
<li
class="enumerate" id="x19-148013x1">
class="enumerate" id="x19-149013x1">
<!--l. 253--><p class="noindent" >The maximum number of nonzeros per row is not much larger than the
average;
</li>
<li
class="enumerate" id="x19-148015x2">
class="enumerate" id="x19-149015x2">
<!--l. 255--><p class="noindent" >The regularity of the data structure allows for faster code, e.g. by allowing
vectorization, thereby offsetting the additional storage requirements.</li></ol>
<!--l. 259--><p class="noindent" >In the extreme case where the input matrix has one full row, the ELLPACK
@ -492,7 +492,7 @@ class="cmtt-10">psb_T_ell_sparse_mat</span></span></span>:
</pre>
<!--l. 295--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a
id="x19-149000"></a>Hacked ELLPACK</h5>
id="x19-150000"></a>Hacked ELLPACK</h5>
<!--l. 303--><p class="noindent" >The <span
class="cmti-10">hacked ELLPACK </span>(<span
class="cmbx-10">HLL</span>) format alleviates the main problem of the ELLPACK
@ -558,7 +558,7 @@ format.
<a
id="x19-149001r7"></a>
id="x19-150001r7"></a>
@ -568,7 +568,7 @@ width="248" height="248" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;7: </span><span
class="content">Hacked ELLPACK compression of matrix in Figure&#x00A0;<a
href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-149001r7 -->
href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-150001r7 -->
@ -595,9 +595,9 @@ class="cmtt-10">psb_T_hll_sparse_mat</span></span></span>:
</pre>
<!--l. 388--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a
id="x19-150000"></a>Diagonal storage</h5>
id="x19-151000"></a>Diagonal storage</h5>
<!--l. 396--><p class="noindent" >The DIAgonal (DIA) format (shown in Figure&#x00A0;<a
href="#x19-150001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span
href="#x19-151001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span>
containing in each column the coefficients along a diagonal of the matrix, and an
integer array <span class="obeylines-h"><span class="verb"><span
@ -607,7 +607,7 @@ are padded with zeros as necessary.
<!--l. 402--><p class="indent" > The code to compute the matrix-vector product <span
class="cmmi-10">y </span>= <span
class="cmmi-10">Ax </span>is shown in Alg.&#x00A0;<a
href="#x19-150003r2">2<!--tex4ht:ref: alg:dia --></a>; it
href="#x19-151003r2">2<!--tex4ht:ref: alg:dia --></a>; it
costs one memory read per outer iteration, plus three memory reads, one memory
write and two floating-point operations per inner iteration. The accesses to
<span class="obeylines-h"><span class="verb"><span
@ -620,7 +620,7 @@ required.
<a
id="x19-150001r8"></a>
id="x19-151001r8"></a>
@ -630,13 +630,13 @@ width="248" height="248" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;8: </span><span
class="content">DIA compression of matrix in Figure&#x00A0;<a
href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-150001r8 -->
href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-151001r8 -->
<!--l. 419--><p class="indent" > </div><hr class="endfigure">
<a
id="x19-150002r2"></a>
id="x19-151002r2"></a>
@ -662,9 +662,9 @@ href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
</pre>
<!--l. 450--><p class="nopar" > </div></div>
<a
id="x19-150003r2"></a>
id="x19-151003r2"></a>
<a
id="x19-150004"></a>
id="x19-151004"></a>
<span
class="cmbx-10">Algorithm</span><span
class="cmbx-10">&#x00A0;2:</span>&#x00A0; Matrix-Vector product in DIA format
@ -691,7 +691,7 @@ class="cmtt-10">psb_T_dia_sparse_mat</span></span></span>:
</pre>
<!--l. 486--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a
id="x19-151000"></a>Hacked DIA</h5>
id="x19-152000"></a>Hacked DIA</h5>
<!--l. 495--><p class="noindent" >Storage by DIAgonals is an attractive option for matrices whose coefficients are
located on a small set of diagonals, since they do away with storing explicitly the
indices and therefore reduce significantly memory traffic. However, having a few
@ -738,7 +738,7 @@ class="cmti-10">hackOffsets[k]</span>.
<a
id="x19-151001r9"></a>
id="x19-152001r9"></a>
@ -748,7 +748,7 @@ width="248" height="248" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;9: </span><span
class="content">Hacked DIA compression of matrix in Figure&#x00A0;<a
href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-151001r9 -->
href="#x19-147002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-152001r9 -->
@ -793,7 +793,7 @@ class="cmtt-10">psb_T_hdia_sparse_mat</span></span></span>:
<h4 class="subsectionHead"><span class="titlemark">12.4 </span> <a
id="x19-15200012.4"></a>CUDA-class extensions</h4>
id="x19-15300012.4"></a>CUDA-class extensions</h4>
<!--l. 4--><p class="noindent" >For computing with CUDA we define a dual memorization strategy in which each
variable on the CPU (&#8220;host&#8221;) side has a GPU (&#8220;device&#8221;) side. When a GPU-type
variable is initialized, the data contained is (usually) the same on both sides. Each

@ -16,12 +16,12 @@ href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail</a>] [<a
href="userhtmlse10.html#tailuserhtmlse13.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">13 </span> <a
id="x20-15300013"></a>CUDA Environment Routines</h3>
id="x20-15400013"></a>CUDA Environment Routines</h3>
<!--l. 91--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-154000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4>
id="x20-155000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4>
<a
id="Q1-20-191"></a>
id="Q1-20-192"></a>
<div class="center"
>
<!--l. 99--><p class="noindent" >
@ -64,13 +64,13 @@ class="cmbx-12">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x20-154002x1">
class="enumerate" id="x20-155002x1">
<!--l. 125--><p class="noindent" >A call to this routine must precede any other PSBLAS-CUDA call.</li></ol>
<!--l. 129--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-155000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4>
id="x20-156000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4>
<a
id="Q1-20-193"></a>
id="Q1-20-194"></a>
<div class="center"
>
<!--l. 137--><p class="noindent" >
@ -106,9 +106,9 @@ class="cmbx-10">in</span>.<br
class="newline" />Specified as: an integer variable.</dd></dl>
<!--l. 161--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-156000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4>
id="x20-157000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4>
<a
id="Q1-20-195"></a>
id="Q1-20-196"></a>
@ -123,9 +123,9 @@ call&#x00A0;psb_cuda_DeviceSync()
CUDA-side code, have completed.
<!--l. 182--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-157000"></a>psb_cuda_getDeviceCount </h4>
id="x20-158000"></a>psb_cuda_getDeviceCount </h4>
<a
id="Q1-20-197"></a>
id="Q1-20-198"></a>
<div class="center"
>
<!--l. 190--><p class="noindent" >
@ -136,9 +136,9 @@ ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDeviceCount()
<!--l. 199--><p class="noindent" >Get number of devices available on current computing node.
<!--l. 201--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-158000"></a>psb_cuda_getDevice </h4>
id="x20-159000"></a>psb_cuda_getDevice </h4>
<a
id="Q1-20-199"></a>
id="Q1-20-200"></a>
<div class="center"
>
<!--l. 209--><p class="noindent" >
@ -149,9 +149,9 @@ ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice()
<!--l. 218--><p class="noindent" >Get device in use by current process.
<!--l. 220--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-159000"></a>psb_cuda_setDevice </h4>
id="x20-160000"></a>psb_cuda_setDevice </h4>
<a
id="Q1-20-201"></a>
id="Q1-20-202"></a>
@ -165,9 +165,9 @@ info&#x00A0;=&#x00A0;psb_cuda_setDevice(dev)
<!--l. 237--><p class="noindent" >Set device to be used by current process.
<!--l. 239--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-160000"></a>psb_cuda_DeviceHasUVA </h4>
id="x20-161000"></a>psb_cuda_DeviceHasUVA </h4>
<a
id="Q1-20-203"></a>
id="Q1-20-204"></a>
<div class="center"
>
<!--l. 247--><p class="noindent" >
@ -178,9 +178,9 @@ hasUva&#x00A0;=&#x00A0;psb_cuda_DeviceHasUVA()
<!--l. 256--><p class="noindent" >Returns true if device currently in use supports UVA (Unified Virtual Addressing).
<!--l. 259--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-161000"></a>psb_cuda_WarpSize </h4>
id="x20-162000"></a>psb_cuda_WarpSize </h4>
<a
id="Q1-20-205"></a>
id="Q1-20-206"></a>
<div class="center"
>
<!--l. 267--><p class="noindent" >
@ -191,9 +191,9 @@ nw&#x00A0;=&#x00A0;psb_cuda_WarpSize()
<!--l. 276--><p class="noindent" >Returns the warp size.
<!--l. 279--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-162000"></a>psb_cuda_MultiProcessors </h4>
id="x20-163000"></a>psb_cuda_MultiProcessors </h4>
<a
id="Q1-20-207"></a>
id="Q1-20-208"></a>
@ -207,9 +207,9 @@ nmp&#x00A0;=&#x00A0;psb_cuda_MultiProcessors()
<!--l. 296--><p class="noindent" >Returns the number of multiprocessors in the CUDA device.
<!--l. 298--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-163000"></a>psb_cuda_MaxThreadsPerMP </h4>
id="x20-164000"></a>psb_cuda_MaxThreadsPerMP </h4>
<a
id="Q1-20-209"></a>
id="Q1-20-210"></a>
<div class="center"
>
<!--l. 306--><p class="noindent" >
@ -220,9 +220,9 @@ nt&#x00A0;=&#x00A0;psb_cuda_MaxThreadsPerMP()
<!--l. 315--><p class="noindent" >Returns the maximum number of threads per multiprocessor.
<!--l. 318--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-164000"></a>psb_cuda_MaxRegistersPerBlock </h4>
id="x20-165000"></a>psb_cuda_MaxRegistersPerBlock </h4>
<a
id="Q1-20-211"></a>
id="Q1-20-212"></a>
<div class="center"
>
<!--l. 326--><p class="noindent" >
@ -233,9 +233,9 @@ nr&#x00A0;=&#x00A0;psb_cuda_MaxRegistersPerBlock()
<!--l. 335--><p class="noindent" >Returns the maximum number of register per thread block.
<!--l. 338--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-165000"></a>psb_cuda_MemoryClockRate </h4>
id="x20-166000"></a>psb_cuda_MemoryClockRate </h4>
<a
id="Q1-20-213"></a>
id="Q1-20-214"></a>
@ -249,9 +249,9 @@ cl&#x00A0;=&#x00A0;psb_cuda_MemoryClockRate()
<!--l. 355--><p class="noindent" >Returns the memory clock rate in KHz, as an integer.
<!--l. 357--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-166000"></a>psb_cuda_MemoryBusWidth </h4>
id="x20-167000"></a>psb_cuda_MemoryBusWidth </h4>
<a
id="Q1-20-215"></a>
id="Q1-20-216"></a>
<div class="center"
>
<!--l. 365--><p class="noindent" >
@ -262,9 +262,9 @@ nb&#x00A0;=&#x00A0;psb_cuda_MemoryBusWidth()
<!--l. 374--><p class="noindent" >Returns the memory bus width in bits.
<!--l. 376--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-167000"></a>psb_cuda_MemoryPeakBandwidth </h4>
id="x20-168000"></a>psb_cuda_MemoryPeakBandwidth </h4>
<a
id="Q1-20-217"></a>
id="Q1-20-218"></a>
<div class="center"
>
<!--l. 384--><p class="noindent" >

File diff suppressed because one or more lines are too long

@ -2,8 +2,8 @@
\label{sec:methods}
In this chapter we provide routines for preconditioners and iterative
methods. The interfaces for Krylov subspace methods are available in
the module \verb|psb_krylov_mod|.
methods. The interfaces for iterative methods are available in
the module \verb|psb_linsolve_mod|.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
@ -146,6 +146,119 @@ An integer value; 0 means no error has been detected.
\end{description}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Richardson driver routine
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\clearpage\subsection{psb\_richardson \label{richardson} ---
Richardson Iteration Driver Routine}
This subroutine is a driver implementig a Richardson iteration
\[ x_{k+1} = M^-1 (b-Ax_k) +x_k,\]
with the preconditioner operator $M$ defined in the previous section.
The stopping criterion can take the following values:
\begin{description}
\item[1] normwise backward error in the infinity
norm; the iteration is stopped when
\[ err = \frac{\|r_i\|}{(\|A\|\|x_i\|+\|b\|)} < eps \]
\item[2] Relative residual in the 2-norm; the iteration is stopped
when
\[ err = \frac{\|r_i\|}{\|b\|_2} < eps \]
\item[3] Relative residual reduction in the 2-norm; the iteration is stopped
when
\[ err = \frac{\|r_i\|}{\|r_0\|_2} < eps \]
\end{description}
The behaviour is controlled by the istop argument (see
later). In the above formulae, $x_i$ is the tentative solution and
$r_i=b-Ax_i$ the corresponding residual at the $i$-th iteration.
\begin{lstlisting}
call psb_richardson(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop)
\end{lstlisting}
\begin{description}
\item[Type:] Synchronous.
\item[\bf On Entry]
\item[a] the local portion of global sparse matrix
$A$. \\
Scope: {\bf local} \\
Type: {\bf required}\\
Intent: {\bf in}.\\
Specified as: a structured data of type \spdata.
\item[prec] The data structure containing the preconditioner.\\
Scope: {\bf local} \\
Type: {\bf required}\\
Intent: {\bf in}.\\
Specified as: a structured data of type \precdata.
\item[b] The RHS vector. \\
Scope: {\bf local} \\
Type: {\bf required}\\
Intent: {\bf in}.\\
Specified as: a rank one array or an object of type \vdata.
\item[x] The initial guess. \\
Scope: {\bf local} \\
Type: {\bf required}\\
Intent: {\bf inout}.\\
Specified as: a rank one array or an object of type \vdata.
\item[eps] The stopping tolerance. \\
Scope: {\bf global} \\
Type: {\bf required}\\
Intent: {\bf in}.\\
Specified as: a real number.
\item[desc\_a] contains data structures for communications.\\
Scope: {\bf local} \\
Type: {\bf required}\\
Intent: {\bf in}.\\
Specified as: a structured data of type \descdata.
\item[itmax] The maximum number of iterations to perform.\\
Scope: {\bf global} \\
Type: {\bf optional}\\
Intent: {\bf in}.\\
Default: $itmax = 1000$.\\
Specified as: an integer variable $itmax \ge 1$.
\item[itrace] If $>0$ print out an informational message about
convergence every $itrace$ iterations. If $=0$ print a message in
case of convergence failure.\\
Scope: {\bf global} \\
Type: {\bf optional}\\
Intent: {\bf in}.\\
Default: $itrace = -1$.\\
\item[istop] An integer specifying the stopping criterion.\\
Scope: {\bf global} \\
Type: {\bf optional}.\\
Intent: {\bf in}.\\
Values: 1: use the normwise backward error, 2: use the scaled 2-norm
of the residual, 3: use the residual reduction in the 2-norm. Default: 2.
\item[\bf On Return]
\item[x] The computed solution. \\
Scope: {\bf local} \\
Type: {\bf required}\\
Intent: {\bf inout}.\\
Specified as: a rank one array or an object of type \vdata.
\item[iter] The number of iterations performed.\\
Scope: {\bf global} \\
Type: {\bf optional}\\
Intent: {\bf out}.\\
Returned as: an integer variable.
\item[err] The convergence estimate on exit.\\
Scope: {\bf global} \\
Type: {\bf optional}\\
Intent: {\bf out}.\\
Returned as: a real number.
\item[info] Error code.\\
Scope: {\bf local} \\
Type: {\bf required} \\
Intent: {\bf out}.\\
An integer value; 0 means no error has been detected.
\end{description}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "userguide"

Loading…
Cancel
Save