Docs and krylov->linsolve changes

openacc
Salvatore Filippone 2 months ago
parent ef1cc4b321
commit eefc67bbdd

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

@ -11,7 +11,7 @@
</head><body
>
<div class="footnote-text">
<!--l. 362--><p class="noindent" ><span class="footnote-mark"><a
<!--l. 363--><p class="noindent" ><span class="footnote-mark"><a
id="fn3x0"><a
id="x7-6020x3"></a> <sup class="textsuperscript">3</sup></a></span><span
class="cmr-8">The subroutine style </span><span
@ -19,7 +19,7 @@ class="cmtt-8">psb</span><span
class="cmtt-8">_precinit </span><span
class="cmr-8">and </span><span
class="cmtt-8">psb</span><span
class="cmtt-8">_precbl </span><span
class="cmtt-8">_precbld </span><span
class="cmr-8">are still supported for backward</span>
<span
class="cmr-8">compatibility</span></div>

@ -432,18 +432,18 @@ matrix/vector creation and linear system solution as follows:
<li
class="enumerate" id="x4-6002x1">
<!--l. 347--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span>
class="cmtt-10">psb_init</span></span></span>;
</li>
<li
class="enumerate" id="x4-6004x2">
<!--l. 348--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span>
class="cmtt-10">psb_cdall</span></span></span>;
</li>
<li
class="enumerate" id="x4-6006x3">
<!--l. 349--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geall</span></span></span>
class="cmtt-10">psb_geall</span></span></span>;
</li>
<li
class="enumerate" id="x4-6008x4">
@ -459,12 +459,12 @@ class="cmtt-10">psb_geins</span></span></span>
<li
class="enumerate" id="x4-6012x1">
<!--l. 355--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>
class="cmtt-10">psb_cdasb</span></span></span>,
</li>
<li
class="enumerate" id="x4-6014x2">
<!--l. 356--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span>
class="cmtt-10">psb_spasb</span></span></span>,
@ -472,106 +472,118 @@ class="cmtt-10">psb_spasb</span></span></span>
<li
class="enumerate" id="x4-6016x3">
<!--l. 357--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geasb</span></span></span></li></ol>
class="cmtt-10">psb_geasb</span></span></span>;</li></ol>
</li>
<li
class="enumerate" id="x4-6018x6">
<!--l. 359--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%init</span></span></span> and build it with
class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%set</span></span></span>, and build it with
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%build</span></span></span><span class="footnote-mark"><a
href="userhtml7.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a
id="x4-6019f3"></a> .
id="x4-6019f3"></a> ;
</li>
<li
class="enumerate" id="x4-6022x7">
<!--l. 363--><p class="noindent" >Call the iterative driver <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov</span></span></span> with the method of choice, e.g.
<span class="obeylines-h"><span class="verb"><span
<!--l. 364--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov</span></span></span>
with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bicgstab</span></span></span>.</li></ol>
<!--l. 366--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span
<!--l. 367--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">test/pargen/</span></span></span>.
<!--l. 369--><p class="indent" > For a simulation in which the same discretization mesh is used over multiple time
<!--l. 370--><p class="indent" > For a simulation in which the same discretization mesh is used over multiple time
steps, the following structure may be more appropriate:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-6024x1">
<!--l. 372--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
<!--l. 373--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span>
</li>
<li
class="enumerate" id="x4-6026x2">
<!--l. 373--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
<!--l. 374--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span>
</li>
<li
class="enumerate" id="x4-6028x3">
<!--l. 374--><p class="noindent" >Loop over the topology of the discretization mesh and build the descriptor
<!--l. 375--><p class="noindent" >Loop over the topology of the discretization mesh and build the descriptor
with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins</span></span></span>
class="cmtt-10">psb_cdins</span></span></span>;
</li>
<li
class="enumerate" id="x4-6030x4">
<!--l. 376--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>
<!--l. 377--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>;
</li>
<li
class="enumerate" id="x4-6032x5">
<!--l. 377--><p class="noindent" >Allocate the sparse matrices and dense vectors with <span class="obeylines-h"><span class="verb"><span
<!--l. 378--><p class="noindent" >Allocate the sparse matrices and dense vectors with; <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spall</span></span></span> and
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geall</span></span></span>
class="cmtt-10">psb_geall</span></span></span>;
</li>
<li
class="enumerate" id="x4-6034x6">
<!--l. 379--><p class="noindent" >Loop over the time steps:
<!--l. 380--><p class="noindent" >Loop over the time steps:
<ol class="enumerate2" >
<li
class="enumerate" id="x4-6036x1">
<!--l. 381--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span
<!--l. 382--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_sprn</span></span></span>;
also zero out the dense vectors;
</li>
<li
class="enumerate" id="x4-6038x2">
<!--l. 384--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update them
<!--l. 385--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update them
with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span>
class="cmtt-10">psb_geins</span></span></span>;
</li>
<li
class="enumerate" id="x4-6040x3">
<!--l. 386--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span
<!--l. 387--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geasb</span></span></span>
class="cmtt-10">psb_geasb</span></span></span>;
</li>
<li
class="enumerate" id="x4-6042x4">
<!--l. 387--><p class="noindent" >Choose and build preconditioner with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%build</span></span></span>
<!--l. 388--><p class="noindent" >
</li>
<li
class="enumerate" id="x4-6044x5">
<!--l. 389--><p class="noindent" >Call the iterative method of choice, e.g. <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_bicgstab</span></span></span></li></ol>
<!--l. 388--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%set</span></span></span>,
and build it with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%build</span></span></span>;
</li>
<li
class="enumerate" id="x4-6046x6">
<!--l. 391--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g.
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov</span></span></span> with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bicgstab</span></span></span>.</li></ol>
</li></ol>
<!--l. 392--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
<!--l. 395--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
called on the data that is actually allocated to the current process, i.e. each process
generates its own data.
<!--l. 397--><p class="indent" > In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span
<!--l. 400--><p class="indent" > In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>, nor is there a
requirement to build a matrix row in its entirety before calling the routine; this
allows the application programmer to walk through the discretization mesh element
by element, generating the main part of a given matrix row but also contributions to
the rows corresponding to neighbouring elements.
<!--l. 404--><p class="indent" > From a functional point of view it is even possible to execute one call for each
<!--l. 407--><p class="indent" > From a functional point of view it is even possible to execute one call for each
nonzero coefficient; however this would have a substantial computational
overhead. It is therefore advisable to pack a certain amount of data into each
call to the insertion routine, say touching on a few tens of rows; the best
performng value would depend on both the architecture of the computer being
used and on the problem structure. At the opposite extreme, it would be
possible to generate the entire part of a coefficient matrix residing on a
@ -579,40 +591,37 @@ process and pass it in a single call to <span class="obeylines-h"><span class="v
class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a
doubling of memory occupation, and thus would be almost always far from
optimal.
<!--l. 417--><p class="noindent" >
<!--l. 420--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">2.3.1 </span> <a
id="x4-70002.3.1"></a>User-defined index mappings</h5>
<!--l. 419--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
<!--l. 422--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
constraints outlined in sec.&#x00A0;<a
href="#x4-60002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-7002x1">
<!--l. 422--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span
<!--l. 425--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span
class="cmmi-10">&#x2026;</span><span
class="cmmi-10">n</span><sub>row<sub><span
class="cmmi-5">i</span></sub></sub>;
</li>
<li
class="enumerate" id="x4-7004x2">
<!--l. 424--><p class="noindent" >The set of halo points must be mapped to the set <span
<!--l. 427--><p class="noindent" >The set of halo points must be mapped to the set <span
class="cmmi-10">n</span><sub>row<sub><span
class="cmmi-5">i</span></sub></sub> + 1<span
class="cmmi-10">&#x2026;</span><span
class="cmmi-10">n</span><sub>col<sub>
<span
class="cmmi-5">i</span></sub></sub>;</li></ol>
<!--l. 427--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
<!--l. 430--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
consistency of this mapping; some errors may be caught by the library, but
this is not guaranteed. The application structure to support this usage is as
follows:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-7006x1">
<!--l. 433--><p class="noindent" >Initialize index
<!--l. 436--><p class="noindent" >Initialize index
space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the
vectors <span class="obeylines-h"><span class="verb"><span
@ -622,7 +631,7 @@ class="cmtt-10">lidx(:)</span></span></span> containing the corresponding local
</li>
<li
class="enumerate" id="x4-7008x2">
<!--l. 438--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span
<!--l. 441--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja(:)</span></span></span> and their associated local indices <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">lidx(:)</span></span></span> with
a(some) call(s) to <span class="obeylines-h"><span class="verb"><span
@ -630,12 +639,15 @@ class="cmtt-10">psb_cdins(nz,ja,desc,info,lidx=lidx)</span></span></span>;
</li>
<li
class="enumerate" id="x4-7010x3">
<!--l. 441--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
<!--l. 444--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>;
</li>
<li
class="enumerate" id="x4-7012x4">
<!--l. 442--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in <span class="obeylines-h"><span class="verb"><span
<!--l. 445--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>
and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span> of the <span class="obeylines-h"><span class="verb"><span
@ -644,22 +656,19 @@ class="cmtt-10">ia</span></span></span>,
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
<!--l. 449--><p class="noindent" >
<!--l. 452--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">2.4 </span> <a
id="x4-80002.4"></a>Programming model</h4>
<!--l. 451--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
<!--l. 454--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
programming model: each process participating in the computation performs the
same actions on a chunk of data. Parallelism is thus data-driven.
<!--l. 456--><p class="indent" > Because of this structure, many subroutines coordinate their action across the
<!--l. 459--><p class="indent" > Because of this structure, many subroutines coordinate their action across the
various processes, thus providing an implicit synchronization point, and therefore
<span
class="cmti-10">must </span>be called simultaneously by all processes participating in the computation. This
is certainly true for the data allocation and assembly routines, for all the
computational routines and for some of the tools routines.
<!--l. 464--><p class="indent" > However there are many cases where no synchronization, and indeed no
<!--l. 467--><p class="indent" > However there are many cases where no synchronization, and indeed no
communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a
href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a>
are only acting on the local data structures, and thus may be called independently.
@ -667,21 +676,21 @@ The most important case is that of the coefficient insertion routines: since the
number of coefficients in the sparse and dense matrices varies among the processors,
and since the user is free to choose an arbitrary order in builiding the matrix entries,
these routines cannot imply a synchronization.
<!--l. 474--><p class="indent" > Throughout this user&#8217;s guide each subroutine will be clearly indicated
<!--l. 477--><p class="indent" > Throughout this user&#8217;s guide each subroutine will be clearly indicated
as:
<dl class="description"><dt class="description">
<!--l. 477--><p class="noindent" >
<!--l. 480--><p class="noindent" >
<span
class="cmbx-10">Synchronous:</span> </dt><dd
class="description">
<!--l. 477--><p class="noindent" >must be called simultaneously by all the processes in the relevant
<!--l. 480--><p class="noindent" >must be called simultaneously by all the processes in the relevant
communication context;
</dd><dt class="description">
<!--l. 479--><p class="noindent" >
<!--l. 482--><p class="noindent" >
<span
class="cmbx-10">Asynchronous:</span> </dt><dd
class="description">
<!--l. 479--><p class="noindent" >may be called in a totally independent manner.</dd></dl>
<!--l. 482--><p class="noindent" >may be called in a totally independent manner.</dd></dl>

@ -2559,17 +2559,17 @@ Scaling or Block Jacobi with incomplete factorization ILU(0).
<!--l. 1319--><p class="indent" > A preconditioner is held in the <a
id="precdata"></a><span
class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span
class="cmtt-10">_Tprec</span><span
class="cmtt-10">_type </span>data structure reported in
figure&#x00A0;<a
href="#x8-53001r4">4<!--tex4ht:ref: fig:prectype --></a>. The <code class="lstinline"><span style="color:#000000">psb_prec_type</span></code> data type may contain a simple preconditioning matrix
with the associated communication descriptor.The internal preconditioner is
href="#x8-53001r4">4<!--tex4ht:ref: fig:prectype --></a>. The <code class="lstinline"><span style="color:#000000">psb_Tprec_type</span></code> data type may contain a simple preconditioning
matrix with the associated communication descriptor. The internal preconditioner is
allocated appropriately with the dynamic type corresponding to the desired
preconditioner.
<!--l. 1337--><p class="indent" > <a
<!--l. 1338--><p class="indent" > <a
id="x8-53001r4"></a><hr class="float"><div class="float"
>
@ -2577,7 +2577,7 @@ preconditioner.
<div class="center"
>
<!--l. 1347--><p class="noindent" >
<!--l. 1348--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-30">
&#x00A0;&#x00A0;type&#x00A0;psb_Tprec_type
@ -2585,7 +2585,7 @@ preconditioner.
&#x00A0;&#x00A0;end&#x00A0;type&#x00A0;psb_Tprec_type
</pre>
<!--l. 1355--><p class="nopar" > </div></div>
<!--l. 1356--><p class="nopar" > </div></div>
<br /> <div class="caption"
><span class="id">Listing 4: </span><span
class="content">The PSBLAS defined data type that contains a preconditioner.</span></div><!--tex4ht:label?: x8-53001r4 -->
@ -2595,70 +2595,71 @@ class="content">The PSBLAS defined data type that contains a preconditioner.</sp
</div><hr class="endfloat" />
<h4 class="subsectionHead"><span class="titlemark">3.5 </span> <a
id="x8-540003.5"></a>Heap data structure</h4>
<!--l. 1391--><p class="noindent" >Among the tools routines of sec.&#x00A0;<a
<!--l. 1392--><p class="noindent" >Among the tools routines of sec.&#x00A0;<a
href="userhtmlse6.html#x11-770006">6<!--tex4ht:ref: sec:toolsrout --></a>, we have a number of sorting utilities; the heap
sort is implemented in terms of heaps having the following signatures:
<dl class="description"><dt class="description">
<!--l. 1395--><p class="noindent" >
<!--l. 1396--><p class="noindent" >
<span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_heap</span> </dt><dd
class="description">
<!--l. 1395--><p class="noindent" >: a heap containing elements of type T, where T can be <code class="lstinline"><span style="color:#000000">i</span><span style="color:#000000">,</span><span style="color:#000000">s</span><span style="color:#000000">,</span><span style="color:#000000">c</span><span style="color:#000000">,</span><span style="color:#000000">d</span><span style="color:#000000">,</span><span style="color:#000000">z</span></code> for
<!--l. 1396--><p class="noindent" >: a heap containing elements of type T, where T can be <code class="lstinline"><span style="color:#000000">i</span><span style="color:#000000">,</span><span style="color:#000000">s</span><span style="color:#000000">,</span><span style="color:#000000">c</span><span style="color:#000000">,</span><span style="color:#000000">d</span><span style="color:#000000">,</span><span style="color:#000000">z</span></code> for
integer, real and complex data;
</dd><dt class="description">
<!--l. 1397--><p class="noindent" >
<!--l. 1398--><p class="noindent" >
<span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_idx</span><span
class="cmtt-10">_heap</span> </dt><dd
class="description">
<!--l. 1397--><p class="noindent" >: a heap containing elements of type T, as above, together with an integer
<!--l. 1398--><p class="noindent" >: a heap containing elements of type T, as above, together with an integer
index.</dd></dl>
<!--l. 1400--><p class="noindent" >Given a heap object, the following methods are defined on it:
<!--l. 1401--><p class="noindent" >Given a heap object, the following methods are defined on it:
<dl class="description"><dt class="description">
<!--l. 1402--><p class="noindent" >
<!--l. 1403--><p class="noindent" >
<span
class="cmbx-10">init</span> </dt><dd
class="description">
<!--l. 1402--><p class="noindent" >Initialize memory; also choose ascending or descending order;
<!--l. 1403--><p class="noindent" >Initialize memory; also choose ascending or descending order;
</dd><dt class="description">
<!--l. 1404--><p class="noindent" >
<!--l. 1405--><p class="noindent" >
<span
class="cmbx-10">howmany</span> </dt><dd
class="description">
<!--l. 1404--><p class="noindent" >Current heap occupancy;
<!--l. 1405--><p class="noindent" >Current heap occupancy;
</dd><dt class="description">
<!--l. 1405--><p class="noindent" >
<!--l. 1406--><p class="noindent" >
<span
class="cmbx-10">insert</span> </dt><dd
class="description">
<!--l. 1405--><p class="noindent" >Add an item (or an item and its index);
<!--l. 1406--><p class="noindent" >Add an item (or an item and its index);
</dd><dt class="description">
<!--l. 1406--><p class="noindent" >
<!--l. 1407--><p class="noindent" >
<span
class="cmbx-10">get</span><span
class="cmbx-10">_first</span> </dt><dd
class="description">
<!--l. 1406--><p class="noindent" >Remove and return the first element;
<!--l. 1407--><p class="noindent" >Remove and return the first element;
</dd><dt class="description">
<!--l. 1407--><p class="noindent" >
<!--l. 1408--><p class="noindent" >
<span
class="cmbx-10">dump</span> </dt><dd
class="description">
<!--l. 1407--><p class="noindent" >Print on file;
<!--l. 1408--><p class="noindent" >Print on file;
</dd><dt class="description">
<!--l. 1408--><p class="noindent" >
<!--l. 1409--><p class="noindent" >
<span
class="cmbx-10">free</span> </dt><dd
class="description">
<!--l. 1408--><p class="noindent" >Release memory.</dd></dl>
<!--l. 1410--><p class="noindent" >These objects are used in AMG4PSBLAS to implement the factorization algorithms.
<!--l. 1409--><p class="noindent" >Release memory.</dd></dl>
<!--l. 1411--><p class="noindent" >These objects are used to implement the factorization and approximate inversion
algorithms.

File diff suppressed because it is too large Load Diff

@ -1317,13 +1317,14 @@ like Diagonal Scaling or Block Jacobi with incomplete
factorization ILU(0).
A preconditioner is held in the \hypertarget{precdata}{{\tt
psb\_prec\_type}} data structure reported in
figure~\ref{fig:prectype}. The \fortinline|psb_prec_type|
psb\_Tprec\_type}} data structure reported in
figure~\ref{fig:prectype}. The \fortinline|psb_Tprec_type|
data type may contain a simple preconditioning matrix with the
associated communication descriptor.%% which may be different than the
associated communication descriptor.
%% which may be different from the
%% system communication descriptor in the case of parallel
%% preconditioners like the Additive Schwarz one. Then the
%% \fortinline|psb_prec_type| may contain more than one preconditioning matrix
%% \fortinline|psb_Tprec_type| may contain more than one preconditioning matrix
%% like in the case of Two-Level (in general Multi-Level) preconditioners.
%% The user can choose the type of preconditioner to be used by means of
%% the \fortinline|psb_precset| subroutine; once the type of preconditioning
@ -1407,8 +1408,8 @@ Given a heap object, the following methods are defined on it:
\item[dump] Print on file;
\item[free] Release memory.
\end{description}
These objects are used in AMG4PSBLAS to implement the factorization
algorithms.
These objects are used to implement the factorization
and approximate inversion algorithms.
%%% Local Variables:
%%% mode: latex

@ -344,24 +344,25 @@ A simple application structure will walk through the index space
allocation, matrix/vector creation and linear system solution as
follows:
\begin{enumerate}
\item Initialize parallel environment with \verb|psb_init|
\item Initialize index space with \verb|psb_cdall|
\item Initialize parallel environment with \verb|psb_init|;
\item Initialize index space with \verb|psb_cdall|;
\item Allocate sparse matrix and dense vectors with \verb|psb_spall|
and \verb|psb_geall|
and \verb|psb_geall|;
\item Loop over all local rows, generate matrix and vector entries,
and insert them with \verb|psb_spins| and \verb|psb_geins|
\item Assemble the various entities:
\begin{enumerate}
\item \verb|psb_cdasb|
\item \verb|psb_spasb|
\item \verb|psb_geasb|
\item \verb|psb_cdasb|,
\item \verb|psb_spasb|,
\item \verb|psb_geasb|;
\end{enumerate}
\item Choose the preconditioner to be used with \verb|prec%init| and
\verb|prec%set|, and
build it with \verb|prec%build|\footnote{The subroutine style {\tt
psb\_precinit} and {\tt psb\_precbl} are still supported for
backward compatibility}.
\item Call the iterative driver \verb|psb_krylov| with the method of
choice, e.g. \verb|bicgstab|.
psb\_precinit} and {\tt psb\_precbld} are still supported for
backward compatibility};
\item Call one of the iterative drivers with the method of
choice, e.g. \verb|psb_krylov| with \verb|bicgstab|.
\end{enumerate}
This is the structure of the sample programs in the directory
\verb|test/pargen/|.
@ -372,21 +373,23 @@ multiple time steps, the following structure may be more appropriate:
\item Initialize parallel environment with \verb|psb_init|
\item Initialize index space with \verb|psb_cdall|
\item Loop over the topology of the discretization mesh and build the
descriptor with \verb|psb_cdins|
\item Assemble the descriptor with \verb|psb_cdasb|
\item Allocate the sparse matrices and dense vectors with
\verb|psb_spall| and \verb|psb_geall|
descriptor with \verb|psb_cdins|;
\item Assemble the descriptor with \verb|psb_cdasb|;
\item Allocate the sparse matrices and dense vectors with;
\verb|psb_spall| and \verb|psb_geall|;
\item Loop over the time steps:
\begin{enumerate}
\item If after first time step,
reinitialize the sparse matrix with \verb|psb_sprn|; also zero out
the dense vectors;
\item Loop over the mesh, generate the coefficients and insert/update
them with \verb|psb_spins| and \verb|psb_geins|
\item Assemble with \verb|psb_spasb| and \verb|psb_geasb|
\item Choose and build preconditioner with \verb|prec%init| and
\verb|prec%build|
\item Call the iterative method of choice, e.g. \verb|psb_bicgstab|
them with \verb|psb_spins| and \verb|psb_geins|;
\item Assemble with \verb|psb_spasb| and \verb|psb_geasb|;
\item \item Choose the preconditioner to be used with \verb|prec%init| and
\verb|prec%set|, and
build it with \verb|prec%build|;
\item Call one of the iterative drivers with the method of
choice, e.g. \verb|psb_krylov| with \verb|bicgstab|.
\end{enumerate}
\end{enumerate}
The insertion routines will be called as many times as needed;

@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas
#
# Libraries used
LIBDIR=$(BASEDIR)/lib
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such

@ -6,7 +6,7 @@ INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas
LIBDIR=$(INSTALLDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).

@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas
#
# Libraries used
LIBDIR=$(INSTALLDIR)/lib
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such

@ -658,7 +658,7 @@ end module psb_d_pde3d_mod
program psb_d_pde3d
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_linsolve_mod
use psb_util_mod
use psb_d_pde3d_mod
#if defined(OPENMP)

@ -9,7 +9,7 @@ INCDIR=$(TOPDIR)/include
MODDIR=$(TOPDIR)/modules
EXEDIR=./runs
PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBGPULDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES)

@ -653,7 +653,7 @@ end module psb_d_pde3d_mod
program psb_d_oacc_pde3d
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_linsolve_mod
use psb_util_mod
use psb_d_pde3d_mod
#if defined(OPENACC)

@ -6,7 +6,7 @@ INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas
LIBDIR=$(INSTALLDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).

@ -3,7 +3,7 @@ INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas
LIBDIR=$(INSTALLDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
CCOPT= -g
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).

@ -6,7 +6,7 @@ include $(INCDIR)/Make.inc.psblas
# Libraries used
#
LIBDIR=$(INSTALLDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).

Loading…
Cancel
Save