Updates to user guide

pull/28/head
sfilippone 1 year ago
parent b71459a38f
commit 695f31aa81

@ -17,11 +17,13 @@ class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br
class="newline" /> <span
class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br
class="newline" /> <span
class="pplb7t-">Salvatore Filippone</span><br
class="pplb7t-">by Salvatore Filippone</span><br
class="newline" /><span
class="pplb7t-">Alfredo Buttari </span><br
class="pplb7t-">Alfredo Buttari </span><br
class="newline" /><span
class="pplb7t-">Fabio Durastante </span><br
class="newline" />Software version: 3.9.0<br
class="newline" />Aug 1st, 2024
class="newline" />Jun 1st, 2025
@ -31,34 +33,38 @@ class="newline" />Aug 1st, 2024
<div class="tableofcontents">
&#x00A0;<span class="likesectionToc" ><a
href="userhtmlli1.html#x2-1000" id="QQ2-2-1">Contents</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x3-2000" id="QQ2-3-2">Preface</a></span>
<br /> &#x00A0;<span class="sectionToc" ><a
href="userhtmlli2.html#Q1-3-3">Preface</a></span>
<br /> &#x00A0;<span class="sectionToc" >1 <a
href="userhtmlse1.html#x3-20001" id="QQ2-3-2">Introduction</a></span>
href="userhtmlse1.html#x4-30001" id="QQ2-4-4">Introduction</a></span>
<br /> &#x00A0;<span class="sectionToc" >2 <a
href="userhtmlse2.html#x4-30002" id="QQ2-4-3">General overview</a></span>
href="userhtmlse2.html#x5-40002" id="QQ2-5-5">General overview</a></span>
<br /> &#x00A0;<span class="sectionToc" >3 <a
href="userhtmlse3.html#x8-90003" id="QQ2-8-11">Data Structures and Classes</a></span>
href="userhtmlse3.html#x9-100003" id="QQ2-9-13">Data Structures and Classes</a></span>
<br /> &#x00A0;<span class="sectionToc" >4 <a
href="userhtmlse4.html#x9-550004" id="QQ2-9-61">Computational routines</a></span>
href="userhtmlse4.html#x10-560004" id="QQ2-10-63">Computational routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >5 <a
href="userhtmlse5.html#x10-720005" id="QQ2-10-94">Communication routines</a></span>
href="userhtmlse5.html#x11-730005" id="QQ2-11-96">Communication routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >6 <a
href="userhtmlse6.html#x11-770006" id="QQ2-11-105">Data management routines</a></span>
href="userhtmlse6.html#x12-780006" id="QQ2-12-107">Data management routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >7 <a
href="userhtmlse7.html#x12-1050007" id="QQ2-12-133">Parallel environment routines</a></span>
href="userhtmlse7.html#x13-1060007" id="QQ2-13-135">Parallel environment routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >8 <a
href="userhtmlse8.html#x13-1230008" id="QQ2-13-151">Error handling</a></span>
href="userhtmlse8.html#x14-1240008" id="QQ2-14-153">Error handling</a></span>
<br /> &#x00A0;<span class="sectionToc" >9 <a
href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
href="userhtmlse9.html#x15-1290009" id="QQ2-15-160">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
href="userhtmlse10.html#x16-13600010" id="QQ2-16-167">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14500011" id="QQ2-17-176">Iterative Methods</a></span>
href="userhtmlse11.html#x18-14600011" id="QQ2-18-178">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14800012" id="QQ2-19-179">Extensions</a></span>
href="userhtmlse12.html#x20-14900012" id="QQ2-20-181">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15700013" id="QQ2-20-194">CUDA Environment Routines</a></span>
href="userhtmlse13.html#x21-15800013" id="QQ2-21-196">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-172000" id="QQ2-21-223">References</a></span>
href="userhtmlli3.html#x22-173000" id="QQ2-22-225">References</a></span>
</div>
@ -73,6 +79,7 @@ href="userhtmlli2.html#x21-172000" id="QQ2-21-223">References</a></span>
</body></html>

@ -17,11 +17,13 @@ class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br
class="newline" /> <span
class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br
class="newline" /> <span
class="pplb7t-">Salvatore Filippone</span><br
class="pplb7t-">by Salvatore Filippone</span><br
class="newline" /><span
class="pplb7t-">Alfredo Buttari </span><br
class="pplb7t-">Alfredo Buttari </span><br
class="newline" /><span
class="pplb7t-">Fabio Durastante </span><br
class="newline" />Software version: 3.9.0<br
class="newline" />Aug 1st, 2024
class="newline" />Jun 1st, 2025
@ -31,34 +33,38 @@ class="newline" />Aug 1st, 2024
<div class="tableofcontents">
&#x00A0;<span class="likesectionToc" ><a
href="userhtmlli1.html#x2-1000" id="QQ2-2-1">Contents</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x3-2000" id="QQ2-3-2">Preface</a></span>
<br /> &#x00A0;<span class="sectionToc" ><a
href="userhtmlli2.html#Q1-3-3">Preface</a></span>
<br /> &#x00A0;<span class="sectionToc" >1 <a
href="userhtmlse1.html#x3-20001" id="QQ2-3-2">Introduction</a></span>
href="userhtmlse1.html#x4-30001" id="QQ2-4-4">Introduction</a></span>
<br /> &#x00A0;<span class="sectionToc" >2 <a
href="userhtmlse2.html#x4-30002" id="QQ2-4-3">General overview</a></span>
href="userhtmlse2.html#x5-40002" id="QQ2-5-5">General overview</a></span>
<br /> &#x00A0;<span class="sectionToc" >3 <a
href="userhtmlse3.html#x8-90003" id="QQ2-8-11">Data Structures and Classes</a></span>
href="userhtmlse3.html#x9-100003" id="QQ2-9-13">Data Structures and Classes</a></span>
<br /> &#x00A0;<span class="sectionToc" >4 <a
href="userhtmlse4.html#x9-550004" id="QQ2-9-61">Computational routines</a></span>
href="userhtmlse4.html#x10-560004" id="QQ2-10-63">Computational routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >5 <a
href="userhtmlse5.html#x10-720005" id="QQ2-10-94">Communication routines</a></span>
href="userhtmlse5.html#x11-730005" id="QQ2-11-96">Communication routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >6 <a
href="userhtmlse6.html#x11-770006" id="QQ2-11-105">Data management routines</a></span>
href="userhtmlse6.html#x12-780006" id="QQ2-12-107">Data management routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >7 <a
href="userhtmlse7.html#x12-1050007" id="QQ2-12-133">Parallel environment routines</a></span>
href="userhtmlse7.html#x13-1060007" id="QQ2-13-135">Parallel environment routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >8 <a
href="userhtmlse8.html#x13-1230008" id="QQ2-13-151">Error handling</a></span>
href="userhtmlse8.html#x14-1240008" id="QQ2-14-153">Error handling</a></span>
<br /> &#x00A0;<span class="sectionToc" >9 <a
href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
href="userhtmlse9.html#x15-1290009" id="QQ2-15-160">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
href="userhtmlse10.html#x16-13600010" id="QQ2-16-167">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14500011" id="QQ2-17-176">Iterative Methods</a></span>
href="userhtmlse11.html#x18-14600011" id="QQ2-18-178">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14800012" id="QQ2-19-179">Extensions</a></span>
href="userhtmlse12.html#x20-14900012" id="QQ2-20-181">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15700013" id="QQ2-20-194">CUDA Environment Routines</a></span>
href="userhtmlse13.html#x21-15800013" id="QQ2-21-196">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-172000" id="QQ2-21-223">References</a></span>
href="userhtmlli3.html#x22-173000" id="QQ2-22-225">References</a></span>
</div>
@ -73,6 +79,7 @@ href="userhtmlli2.html#x21-172000" id="QQ2-21-223">References</a></span>
</body></html>

@ -0,0 +1,19 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html >
<head><title></title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)">
<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)">
<!-- html,3 -->
<meta name="src" content="userhtml.tex">
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<div class="footnote-text">
<!--l. 73--><p class="indent" > <span class="footnote-mark"><a
id="fn4x0"><a
id="x17-137002x10.1"></a> <sup class="textsuperscript">4</sup></a></span><span
class="pplr7t-x-x-80">The string is case-insensitive</span></div>
</body></html>

@ -0,0 +1,23 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html >
<head><title></title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)">
<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)">
<!-- html,3 -->
<meta name="src" content="userhtml.tex">
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<div class="footnote-text">
<!--l. 53--><p class="noindent" ><span class="footnote-mark"><a
id="fn5x0"><a
id="x19-147004x11.1"></a> <sup class="textsuperscript">5</sup></a></span><span
class="pplr7t-x-x-80">Note: the implementation is for </span><span
class="zplmr7m-x-x-80">FCG</span><span
class="zplmr7t-x-x-80">(</span><span
class="pplr7t-x-x-80">1</span><span
class="zplmr7t-x-x-80">)</span><span
class="pplr7t-x-x-80">.</span></div>
</body></html>

@ -10,15 +10,10 @@
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<div class="footnote-text">
<!--l. 195--><p class="noindent" ><span class="footnote-mark"><a
id="fn2x0"><a
id="x6-4002x2.1"></a> <sup class="textsuperscript">2</sup></a></span><span
class="pplr7t-x-x-80">This is the normal situation when the pattern of the sparse matrix is symmetric, which is equivalent to</span>
<span
class="pplr7t-x-x-80">say that the interaction between two variables is reciprocal. If the matrix pattern is non-symmetric we may</span>
<span
class="pplr7t-x-x-80">have one-way interactions, and these could cause a situation in which a boundary point is not a halo point</span>
<span
class="pplr7t-x-x-80">for its neighbour.</span></div>
<div class="footnote-text">
<!--l. 209--><p class="indent" > <span class="footnote-mark"><a
id="fn1x0"><a
id="x6-4003x2"></a> <sup class="textsuperscript">1</sup></a></span><span
class="pplr7t-x-x-80">In our prototype implementation we provide sample scatter/gather routines.</span></div>
</body></html>

@ -11,16 +11,14 @@
</head><body
>
<div class="footnote-text">
<!--l. 363--><p class="noindent" ><span class="footnote-mark"><a
id="fn3x0"><a
id="x7-6020x3"></a> <sup class="textsuperscript">3</sup></a></span><span
class="pplr7t-x-x-80">The subroutine style </span><span
class="cmtt-8">psb</span><span
class="cmtt-8">_precinit </span><span
class="pplr7t-x-x-80">and </span><span
class="cmtt-8">psb</span><span
class="cmtt-8">_precbld </span><span
class="pplr7t-x-x-80">are still supported for backward</span>
<!--l. 253--><p class="noindent" ><span class="footnote-mark"><a
id="fn2x0"><a
id="x7-5002x2.1"></a> <sup class="textsuperscript">2</sup></a></span><span
class="pplr7t-x-x-80">This is the normal situation when the pattern of the sparse matrix is symmetric, which is equivalent to</span>
<span
class="pplr7t-x-x-80">compatibility</span></div>
class="pplr7t-x-x-80">say that the interaction between two variables is reciprocal. If the matrix pattern is non-symmetric we may</span>
<span
class="pplr7t-x-x-80">have one-way interactions, and these could cause a situation in which a boundary point is not a halo point</span>
<span
class="pplr7t-x-x-80">for its neighbour.</span></div>
</body></html>

@ -0,0 +1,26 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html >
<head><title></title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)">
<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)">
<!-- html,3 -->
<meta name="src" content="userhtml.tex">
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<div class="footnote-text">
<!--l. 421--><p class="noindent" ><span class="footnote-mark"><a
id="fn3x0"><a
id="x8-7020x3"></a> <sup class="textsuperscript">3</sup></a></span><span
class="pplr7t-x-x-80">The subroutine style </span><span
class="cmtt-8">psb</span><span
class="cmtt-8">_precinit </span><span
class="pplr7t-x-x-80">and </span><span
class="cmtt-8">psb</span><span
class="cmtt-8">_precbld </span><span
class="pplr7t-x-x-80">are still supported for backward</span>
<span
class="pplr7t-x-x-80">compatibility</span></div>
</body></html>

@ -10,354 +10,356 @@
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 114--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse1.html" >next</a>] [<a
<!--l. 115--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlli2.html" >next</a>] [<a
href="#tailuserhtmlli1.html">tail</a>] [<a
href="userhtml.html#userhtmlli1.html" >up</a>] </p></div>
<h3 class="likesectionHead"><a
id="x2-1000"></a>Contents</h3>
<div class="tableofcontents">
&#x00A0;<span class="sectionToc" >1 <a
href="userhtmlse1.html#x3-20001">Introduction</a></span>
&#x00A0;<span class="sectionToc" ><a
href="userhtmlli2.html#Q1-3-3">Preface</a></span>
<br /> &#x00A0;<span class="sectionToc" >1 <a
href="userhtmlse1.html#x4-30001">Introduction</a></span>
<br /> &#x00A0;<span class="sectionToc" >2 <a
href="userhtmlse2.html#x4-30002">General overview</a></span>
href="userhtmlse2.html#x5-40002">General overview</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >2.1 <a
href="userhtmlse2.html#x4-40002.1" id="QQ2-4-5">Basic Nomenclature</a></span>
href="userhtmlse2.html#x5-50002.1" id="QQ2-5-7">Basic Nomenclature</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >2.2 <a
href="userhtmlse2.html#x4-50002.2" id="QQ2-4-7">Library contents</a></span>
href="userhtmlse2.html#x5-60002.2" id="QQ2-5-9">Library contents</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >2.3 <a
href="userhtmlse2.html#x4-60002.3" id="QQ2-4-8">Application structure</a></span>
href="userhtmlse2.html#x5-70002.3" id="QQ2-5-10">Application structure</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >2.3.1 <a
href="userhtmlse2.html#x4-70002.3.1" id="QQ2-4-9">User-defined index mappings</a></span>
href="userhtmlse2.html#x5-80002.3.1" id="QQ2-5-11">User-defined index mappings</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >2.4 <a
href="userhtmlse2.html#x4-80002.4" id="QQ2-4-10">Programming model</a></span>
href="userhtmlse2.html#x5-90002.4" id="QQ2-5-12">Programming model</a></span>
<br /> &#x00A0;<span class="sectionToc" >3 <a
href="userhtmlse3.html#x8-90003">Data Structures and Classes</a></span>
href="userhtmlse3.html#x9-100003">Data Structures and Classes</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.1 <a
href="userhtmlse3.html#x8-100003.1" id="QQ2-8-12">Descriptor data structure</a></span>
href="userhtmlse3.html#x9-110003.1" id="QQ2-9-14">Descriptor data structure</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.1 <a
href="userhtmlse3.html#x8-110003.1.1" id="QQ2-8-14">Descriptor Methods</a></span>
href="userhtmlse3.html#x9-120003.1.1" id="QQ2-9-16">Descriptor Methods</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.2 <a
href="userhtmlse3.html#x8-120003.1.2" id="QQ2-8-15">get_local_rows &#8212; Get number of local rows</a></span>
href="userhtmlse3.html#x9-130003.1.2" id="QQ2-9-17">get_local_rows &#8212; Get number of local rows</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.3 <a
href="userhtmlse3.html#x8-130003.1.3" id="QQ2-8-16">get_local_cols &#8212; Get number of local cols</a></span>
href="userhtmlse3.html#x9-140003.1.3" id="QQ2-9-18">get_local_cols &#8212; Get number of local cols</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.4 <a
href="userhtmlse3.html#x8-140003.1.4" id="QQ2-8-17">get_global_rows &#8212; Get number of global rows</a></span>
href="userhtmlse3.html#x9-150003.1.4" id="QQ2-9-19">get_global_rows &#8212; Get number of global rows</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.5 <a
href="userhtmlse3.html#x8-150003.1.5" id="QQ2-8-18">get_global_cols &#8212; Get number of global cols</a></span>
href="userhtmlse3.html#x9-160003.1.5" id="QQ2-9-20">get_global_cols &#8212; Get number of global cols</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.6 <a
href="userhtmlse3.html#x8-160003.1.6" id="QQ2-8-19">get_global_indices &#8212; Get vector of global indices</a></span>
href="userhtmlse3.html#x9-170003.1.6" id="QQ2-9-21">get_global_indices &#8212; Get vector of global indices</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.7 <a
href="userhtmlse3.html#x8-170003.1.7" id="QQ2-8-20">get_context &#8212; Get communication context</a></span>
href="userhtmlse3.html#x9-180003.1.7" id="QQ2-9-22">get_context &#8212; Get communication context</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.8 <a
href="userhtmlse3.html#x8-180003.1.8" id="QQ2-8-21">Clone &#8212; clone current object</a></span>
href="userhtmlse3.html#x9-190003.1.8" id="QQ2-9-23">Clone &#8212; clone current object</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.9 <a
href="userhtmlse3.html#x8-190003.1.9" id="QQ2-8-22">CNV &#8212; convert internal storage format</a></span>
href="userhtmlse3.html#x9-200003.1.9" id="QQ2-9-24">CNV &#8212; convert internal storage format</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.10 <a
href="userhtmlse3.html#x8-200003.1.10" id="QQ2-8-23">psb_cd_get_hash_threshold &#8212; Get threshold for index mapping switch</a></span>
href="userhtmlse3.html#x9-210003.1.10" id="QQ2-9-25">psb_cd_get_hash_threshold &#8212; Get threshold for index mapping switch</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.11 <a
href="userhtmlse3.html#x8-210003.1.11" id="QQ2-8-24">psb_cd_set_hash_threshold &#8212; Set threshold for index mapping switch</a></span>
href="userhtmlse3.html#x9-220003.1.11" id="QQ2-9-26">psb_cd_set_hash_threshold &#8212; Set threshold for index mapping switch</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.12 <a
href="userhtmlse3.html#x8-220003.1.12" id="QQ2-8-25">get_p_adjcncy &#8212; Get process adjacency list</a></span>
href="userhtmlse3.html#x9-230003.1.12" id="QQ2-9-27">get_p_adjcncy &#8212; Get process adjacency list</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.13 <a
href="userhtmlse3.html#x8-230003.1.13" id="QQ2-8-26">set_p_adjcncy &#8212; Set process adjacency list</a></span>
href="userhtmlse3.html#x9-240003.1.13" id="QQ2-9-28">set_p_adjcncy &#8212; Set process adjacency list</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.14 <a
href="userhtmlse3.html#x8-240003.1.14" id="QQ2-8-27">fnd_owner &#8212; Find the owner process of a set of indices</a></span>
href="userhtmlse3.html#x9-250003.1.14" id="QQ2-9-29">fnd_owner &#8212; Find the owner process of a set of indices</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.15 <a
href="userhtmlse3.html#x8-250003.1.15" id="QQ2-8-28">Named Constants</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.2 <a
href="userhtmlse3.html#x8-260003.2" id="QQ2-8-29">Sparse Matrix class</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.1 <a
href="userhtmlse3.html#x8-270003.2.1" id="QQ2-8-31">Sparse Matrix Methods</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.2 <a
href="userhtmlse3.html#x8-280003.2.2" id="QQ2-8-32">get_nrows &#8212; Get number of rows in a sparse matrix</a></span>
href="userhtmlse3.html#x9-260003.1.15" id="QQ2-9-30">Named Constants</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.2 <a
href="userhtmlse3.html#x9-270003.2" id="QQ2-9-31">Sparse Matrix class</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.1 <a
href="userhtmlse3.html#x9-280003.2.1" id="QQ2-9-33">Sparse Matrix Methods</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.2 <a
href="userhtmlse3.html#x9-290003.2.2" id="QQ2-9-34">get_nrows &#8212; Get number of rows in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.3 <a
href="userhtmlse3.html#x8-290003.2.3" id="QQ2-8-33">get_ncols &#8212; Get number of columns in a sparse matrix</a></span>
href="userhtmlse3.html#x9-300003.2.3" id="QQ2-9-35">get_ncols &#8212; Get number of columns in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.4 <a
href="userhtmlse3.html#x8-300003.2.4" id="QQ2-8-34">get_nnzeros &#8212; Get number of nonzero elements in a sparse matrix</a></span>
href="userhtmlse3.html#x9-310003.2.4" id="QQ2-9-36">get_nnzeros &#8212; Get number of nonzero elements in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.5 <a
href="userhtmlse3.html#x8-310003.2.5" id="QQ2-8-35">get_size &#8212; Get maximum number of nonzero elements in a sparse matrix</a></span>
href="userhtmlse3.html#x9-320003.2.5" id="QQ2-9-37">get_size &#8212; Get maximum number of nonzero elements in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.6 <a
href="userhtmlse3.html#x8-320003.2.6" id="QQ2-8-36">sizeof &#8212; Get memory occupation in bytes of a sparse matrix</a></span>
href="userhtmlse3.html#x9-330003.2.6" id="QQ2-9-38">sizeof &#8212; Get memory occupation in bytes of a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.7 <a
href="userhtmlse3.html#x8-330003.2.7" id="QQ2-8-37">get_fmt &#8212; Short description of the dynamic type</a></span>
href="userhtmlse3.html#x9-340003.2.7" id="QQ2-9-39">get_fmt &#8212; Short description of the dynamic type</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.8 <a
href="userhtmlse3.html#x8-340003.2.8" id="QQ2-8-38">is_bld, is_upd, is_asb &#8212; Status check</a></span>
href="userhtmlse3.html#x9-350003.2.8" id="QQ2-9-40">is_bld, is_upd, is_asb &#8212; Status check</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.9 <a
href="userhtmlse3.html#x8-350003.2.9" id="QQ2-8-39">is_lower, is_upper, is_triangle, is_unit &#8212; Format check</a></span>
href="userhtmlse3.html#x9-360003.2.9" id="QQ2-9-41">is_lower, is_upper, is_triangle, is_unit &#8212; Format check</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.10 <a
href="userhtmlse3.html#x8-360003.2.10" id="QQ2-8-40">cscnv &#8212; Convert to a different storage format</a></span>
href="userhtmlse3.html#x9-370003.2.10" id="QQ2-9-42">cscnv &#8212; Convert to a different storage format</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.11 <a
href="userhtmlse3.html#x8-370003.2.11" id="QQ2-8-41">csclip &#8212; Reduce to a submatrix</a></span>
href="userhtmlse3.html#x9-380003.2.11" id="QQ2-9-43">csclip &#8212; Reduce to a submatrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.12 <a
href="userhtmlse3.html#x8-380003.2.12" id="QQ2-8-42">clean_zeros &#8212; Eliminate zero coefficients</a></span>
href="userhtmlse3.html#x9-390003.2.12" id="QQ2-9-44">clean_zeros &#8212; Eliminate zero coefficients</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.13 <a
href="userhtmlse3.html#x8-390003.2.13" id="QQ2-8-43">get_diag &#8212; Get main diagonal</a></span>
href="userhtmlse3.html#x9-400003.2.13" id="QQ2-9-45">get_diag &#8212; Get main diagonal</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.14 <a
href="userhtmlse3.html#x8-400003.2.14" id="QQ2-8-44">clip_diag &#8212; Cut out main diagonal</a></span>
href="userhtmlse3.html#x9-410003.2.14" id="QQ2-9-46">clip_diag &#8212; Cut out main diagonal</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.15 <a
href="userhtmlse3.html#x8-410003.2.15" id="QQ2-8-45">tril &#8212; Return the lower triangle</a></span>
href="userhtmlse3.html#x9-420003.2.15" id="QQ2-9-47">tril &#8212; Return the lower triangle</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.16 <a
href="userhtmlse3.html#x8-420003.2.16" id="QQ2-8-46">triu &#8212; Return the upper triangle</a></span>
href="userhtmlse3.html#x9-430003.2.16" id="QQ2-9-48">triu &#8212; Return the upper triangle</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.17 <a
href="userhtmlse3.html#x8-430003.2.17" id="QQ2-8-47">psb_set_mat_default &#8212; Set default storage format</a></span>
href="userhtmlse3.html#x9-440003.2.17" id="QQ2-9-49">psb_set_mat_default &#8212; Set default storage format</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.18 <a
href="userhtmlse3.html#x8-440003.2.18" id="QQ2-8-48">clone &#8212; Clone current object</a></span>
href="userhtmlse3.html#x9-450003.2.18" id="QQ2-9-50">clone &#8212; Clone current object</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.19 <a
href="userhtmlse3.html#x8-450003.2.19" id="QQ2-8-49">Named Constants</a></span>
href="userhtmlse3.html#x9-460003.2.19" id="QQ2-9-51">Named Constants</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.3 <a
href="userhtmlse3.html#x8-460003.3" id="QQ2-8-50">Dense Vector Data Structure</a></span>
href="userhtmlse3.html#x9-470003.3" id="QQ2-9-52">Dense Vector Data Structure</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.1 <a
href="userhtmlse3.html#x8-470003.3.1" id="QQ2-8-52">Vector Methods</a></span>
href="userhtmlse3.html#x9-480003.3.1" id="QQ2-9-54">Vector Methods</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.2 <a
href="userhtmlse3.html#x8-480003.3.2" id="QQ2-8-53">get_nrows &#8212; Get number of rows in a dense vector</a></span>
href="userhtmlse3.html#x9-490003.3.2" id="QQ2-9-55">get_nrows &#8212; Get number of rows in a dense vector</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.3 <a
href="userhtmlse3.html#x8-490003.3.3" id="QQ2-8-54">sizeof &#8212; Get memory occupation in bytes of a dense vector</a></span>
href="userhtmlse3.html#x9-500003.3.3" id="QQ2-9-56">sizeof &#8212; Get memory occupation in bytes of a dense vector</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.4 <a
href="userhtmlse3.html#x8-500003.3.4" id="QQ2-8-55">set &#8212; Set contents of the vector</a></span>
href="userhtmlse3.html#x9-510003.3.4" id="QQ2-9-57">set &#8212; Set contents of the vector</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.5 <a
href="userhtmlse3.html#x8-510003.3.5" id="QQ2-8-56">get_vect &#8212; Get a copy of the vector contents</a></span>
href="userhtmlse3.html#x9-520003.3.5" id="QQ2-9-58">get_vect &#8212; Get a copy of the vector contents</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.6 <a
href="userhtmlse3.html#x8-520003.3.6" id="QQ2-8-57">clone &#8212; Clone current object</a></span>
href="userhtmlse3.html#x9-530003.3.6" id="QQ2-9-59">clone &#8212; Clone current object</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.4 <a
href="userhtmlse3.html#x8-530003.4" id="QQ2-8-58">Preconditioner data structure</a></span>
href="userhtmlse3.html#x9-540003.4" id="QQ2-9-60">Preconditioner data structure</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.5 <a
href="userhtmlse3.html#x8-540003.5" id="QQ2-8-60">Heap data structure</a></span>
href="userhtmlse3.html#x9-550003.5" id="QQ2-9-62">Heap data structure</a></span>
<br /> &#x00A0;<span class="sectionToc" >4 <a
href="userhtmlse4.html#x9-550004">Computational routines</a></span>
href="userhtmlse4.html#x10-560004">Computational routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.1 <a
href="userhtmlse4.html#x9-560004.1" id="QQ2-9-62">psb_geaxpby &#8212; General Dense Matrix Sum</a></span>
href="userhtmlse4.html#x10-570004.1" id="QQ2-10-64">psb_geaxpby &#8212; General Dense Matrix Sum</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.2 <a
href="userhtmlse4.html#x9-570004.2" id="QQ2-9-64">psb_gedot &#8212; Dot Product</a></span>
href="userhtmlse4.html#x10-580004.2" id="QQ2-10-66">psb_gedot &#8212; Dot Product</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.3 <a
href="userhtmlse4.html#x9-580004.3" id="QQ2-9-66">psb_gedots &#8212; Generalized Dot Product</a></span>
href="userhtmlse4.html#x10-590004.3" id="QQ2-10-68">psb_gedots &#8212; Generalized Dot Product</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.4 <a
href="userhtmlse4.html#x9-590004.4" id="QQ2-9-68">psb_normi &#8212; Infinity-Norm of Vector</a></span>
href="userhtmlse4.html#x10-600004.4" id="QQ2-10-70">psb_normi &#8212; Infinity-Norm of Vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.5 <a
href="userhtmlse4.html#x9-600004.5" id="QQ2-9-70">psb_geamaxs &#8212; Generalized Infinity Norm</a></span>
href="userhtmlse4.html#x10-610004.5" id="QQ2-10-72">psb_geamaxs &#8212; Generalized Infinity Norm</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.6 <a
href="userhtmlse4.html#x9-610004.6" id="QQ2-9-72">psb_norm1 &#8212; 1-Norm of Vector</a></span>
href="userhtmlse4.html#x10-620004.6" id="QQ2-10-74">psb_norm1 &#8212; 1-Norm of Vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.7 <a
href="userhtmlse4.html#x9-620004.7" id="QQ2-9-74">psb_geasums &#8212; Generalized 1-Norm of Vector</a></span>
href="userhtmlse4.html#x10-630004.7" id="QQ2-10-76">psb_geasums &#8212; Generalized 1-Norm of Vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.8 <a
href="userhtmlse4.html#x9-630004.8" id="QQ2-9-76">psb_norm2 &#8212; 2-Norm of Vector</a></span>
href="userhtmlse4.html#x10-640004.8" id="QQ2-10-78">psb_norm2 &#8212; 2-Norm of Vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.9 <a
href="userhtmlse4.html#x9-640004.9" id="QQ2-9-78">psb_genrm2s &#8212; Generalized 2-Norm of Vector</a></span>
href="userhtmlse4.html#x10-650004.9" id="QQ2-10-80">psb_genrm2s &#8212; Generalized 2-Norm of Vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.10 <a
href="userhtmlse4.html#x9-650004.10" id="QQ2-9-80">psb_norm1 &#8212; 1-Norm of Sparse Matrix</a></span>
href="userhtmlse4.html#x10-660004.10" id="QQ2-10-82">psb_norm1 &#8212; 1-Norm of Sparse Matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.11 <a
href="userhtmlse4.html#x9-660004.11" id="QQ2-9-82">psb_normi &#8212; Infinity Norm of Sparse Matrix</a></span>
href="userhtmlse4.html#x10-670004.11" id="QQ2-10-84">psb_normi &#8212; Infinity Norm of Sparse Matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.12 <a
href="userhtmlse4.html#x9-670004.12" id="QQ2-9-84">psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</a></span>
href="userhtmlse4.html#x10-680004.12" id="QQ2-10-86">psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.13 <a
href="userhtmlse4.html#x9-680004.13" id="QQ2-9-86">psb_spsm &#8212; Triangular System Solve</a></span>
href="userhtmlse4.html#x10-690004.13" id="QQ2-10-88">psb_spsm &#8212; Triangular System Solve</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.14 <a
href="userhtmlse4.html#x9-690004.14" id="QQ2-9-88">psb_gemlt &#8212; Entrywise Product</a></span>
href="userhtmlse4.html#x10-700004.14" id="QQ2-10-90">psb_gemlt &#8212; Entrywise Product</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.15 <a
href="userhtmlse4.html#x9-700004.15" id="QQ2-9-90">psb_gediv &#8212; Entrywise Division</a></span>
href="userhtmlse4.html#x10-710004.15" id="QQ2-10-92">psb_gediv &#8212; Entrywise Division</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.16 <a
href="userhtmlse4.html#x9-710004.16" id="QQ2-9-92">psb_geinv &#8212; Entrywise Inversion</a></span>
href="userhtmlse4.html#x10-720004.16" id="QQ2-10-94">psb_geinv &#8212; Entrywise Inversion</a></span>
<br /> &#x00A0;<span class="sectionToc" >5 <a
href="userhtmlse5.html#x10-720005">Communication routines</a></span>
href="userhtmlse5.html#x11-730005">Communication routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >5.1 <a
href="userhtmlse5.html#x10-730005.1" id="QQ2-10-95">psb_halo &#8212; Halo Data Communication</a></span>
href="userhtmlse5.html#x11-740005.1" id="QQ2-11-97">psb_halo &#8212; Halo Data Communication</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >5.2 <a
href="userhtmlse5.html#x10-740005.2" id="QQ2-10-98">psb_ovrl &#8212; Overlap Update</a></span>
href="userhtmlse5.html#x11-750005.2" id="QQ2-11-100">psb_ovrl &#8212; Overlap Update</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >5.3 <a
href="userhtmlse5.html#x10-750005.3" id="QQ2-10-101">psb_gather &#8212; Gather Global Dense Matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >5.4 <a
href="userhtmlse5.html#x10-760005.4" id="QQ2-10-103">psb_scatter &#8212; Scatter Global Dense Matrix</a></span>
<br /> &#x00A0;<span class="sectionToc" >6 <a
href="userhtmlse6.html#x11-770006">Data management routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.1 <a
href="userhtmlse6.html#x11-780006.1" id="QQ2-11-106">psb_cdall &#8212; Allocates a communication descriptor</a></span>
href="userhtmlse5.html#x11-760005.3" id="QQ2-11-103">psb_gather &#8212; Gather Global Dense Matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >5.4 <a
href="userhtmlse5.html#x11-770005.4" id="QQ2-11-105">psb_scatter &#8212; Scatter Global Dense Matrix</a></span>
<br /> &#x00A0;<span class="sectionToc" >6 <a
href="userhtmlse6.html#x12-780006">Data management routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.1 <a
href="userhtmlse6.html#x12-790006.1" id="QQ2-12-108">psb_cdall &#8212; Allocates a communication descriptor</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.2 <a
href="userhtmlse6.html#x11-790006.2" id="QQ2-11-107">psb_cdins &#8212; Communication descriptor insert routine</a></span>
href="userhtmlse6.html#x12-800006.2" id="QQ2-12-109">psb_cdins &#8212; Communication descriptor insert routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.3 <a
href="userhtmlse6.html#x11-800006.3" id="QQ2-11-108">psb_cdasb &#8212; Communication descriptor assembly routine</a></span>
href="userhtmlse6.html#x12-810006.3" id="QQ2-12-110">psb_cdasb &#8212; Communication descriptor assembly routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.4 <a
href="userhtmlse6.html#x11-810006.4" id="QQ2-11-109">psb_cdcpy &#8212; Copies a communication descriptor</a></span>
href="userhtmlse6.html#x12-820006.4" id="QQ2-12-111">psb_cdcpy &#8212; Copies a communication descriptor</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.5 <a
href="userhtmlse6.html#x11-820006.5" id="QQ2-11-110">psb_cdfree &#8212; Frees a communication descriptor</a></span>
href="userhtmlse6.html#x12-830006.5" id="QQ2-12-112">psb_cdfree &#8212; Frees a communication descriptor</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.6 <a
href="userhtmlse6.html#x11-830006.6" id="QQ2-11-111">psb_cdbldext &#8212; Build an extended communication descriptor</a></span>
href="userhtmlse6.html#x12-840006.6" id="QQ2-12-113">psb_cdbldext &#8212; Build an extended communication descriptor</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.7 <a
href="userhtmlse6.html#x11-840006.7" id="QQ2-11-112">psb_spall &#8212; Allocates a sparse matrix</a></span>
href="userhtmlse6.html#x12-850006.7" id="QQ2-12-114">psb_spall &#8212; Allocates a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.8 <a
href="userhtmlse6.html#x11-850006.8" id="QQ2-11-113">psb_spins &#8212; Insert a set of coefficients into a sparse matrix</a></span>
href="userhtmlse6.html#x12-860006.8" id="QQ2-12-115">psb_spins &#8212; Insert a set of coefficients into a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.9 <a
href="userhtmlse6.html#x11-860006.9" id="QQ2-11-114">psb_spasb &#8212; Sparse matrix assembly routine</a></span>
href="userhtmlse6.html#x12-870006.9" id="QQ2-12-116">psb_spasb &#8212; Sparse matrix assembly routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.10 <a
href="userhtmlse6.html#x11-870006.10" id="QQ2-11-115">psb_spfree &#8212; Frees a sparse matrix</a></span>
href="userhtmlse6.html#x12-880006.10" id="QQ2-12-117">psb_spfree &#8212; Frees a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.11 <a
href="userhtmlse6.html#x11-880006.11" id="QQ2-11-116">psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</a></span>
href="userhtmlse6.html#x12-890006.11" id="QQ2-12-118">psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.12 <a
href="userhtmlse6.html#x11-890006.12" id="QQ2-11-117">psb_geall &#8212; Allocates a dense matrix</a></span>
href="userhtmlse6.html#x12-900006.12" id="QQ2-12-119">psb_geall &#8212; Allocates a dense matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.13 <a
href="userhtmlse6.html#x11-900006.13" id="QQ2-11-118">psb_geins &#8212; Dense matrix insertion routine</a></span>
href="userhtmlse6.html#x12-910006.13" id="QQ2-12-120">psb_geins &#8212; Dense matrix insertion routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.14 <a
href="userhtmlse6.html#x11-910006.14" id="QQ2-11-119">psb_geasb &#8212; Assembly a dense matrix</a></span>
href="userhtmlse6.html#x12-920006.14" id="QQ2-12-121">psb_geasb &#8212; Assembly a dense matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.15 <a
href="userhtmlse6.html#x11-920006.15" id="QQ2-11-120">psb_gefree &#8212; Frees a dense matrix</a></span>
href="userhtmlse6.html#x12-930006.15" id="QQ2-12-122">psb_gefree &#8212; Frees a dense matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.16 <a
href="userhtmlse6.html#x11-930006.16" id="QQ2-11-121">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
href="userhtmlse6.html#x12-940006.16" id="QQ2-12-123">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.17 <a
href="userhtmlse6.html#x11-940006.17" id="QQ2-11-122">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
href="userhtmlse6.html#x12-950006.17" id="QQ2-12-124">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.18 <a
href="userhtmlse6.html#x11-950006.18" id="QQ2-11-123">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
href="userhtmlse6.html#x12-960006.18" id="QQ2-12-125">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.19 <a
href="userhtmlse6.html#x11-960006.19" id="QQ2-11-124">psb_is_owned &#8212; </a></span>
href="userhtmlse6.html#x12-970006.19" id="QQ2-12-126">psb_is_owned &#8212; </a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.20 <a
href="userhtmlse6.html#x11-970006.20" id="QQ2-11-125">psb_owned_index &#8212; </a></span>
href="userhtmlse6.html#x12-980006.20" id="QQ2-12-127">psb_owned_index &#8212; </a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.21 <a
href="userhtmlse6.html#x11-980006.21" id="QQ2-11-126">psb_is_local &#8212; </a></span>
href="userhtmlse6.html#x12-990006.21" id="QQ2-12-128">psb_is_local &#8212; </a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.22 <a
href="userhtmlse6.html#x11-990006.22" id="QQ2-11-127">psb_local_index &#8212; </a></span>
href="userhtmlse6.html#x12-1000006.22" id="QQ2-12-129">psb_local_index &#8212; </a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.23 <a
href="userhtmlse6.html#x11-1000006.23" id="QQ2-11-128">psb_get_boundary &#8212; Extract list of boundary elements</a></span>
href="userhtmlse6.html#x12-1010006.23" id="QQ2-12-130">psb_get_boundary &#8212; Extract list of boundary elements</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.24 <a
href="userhtmlse6.html#x11-1010006.24" id="QQ2-11-129">psb_get_overlap &#8212; Extract list of overlap elements</a></span>
href="userhtmlse6.html#x12-1020006.24" id="QQ2-12-131">psb_get_overlap &#8212; Extract list of overlap elements</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.25 <a
href="userhtmlse6.html#x11-1020006.25" id="QQ2-11-130">psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</a></span>
href="userhtmlse6.html#x12-1030006.25" id="QQ2-12-132">psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.26 <a
href="userhtmlse6.html#x11-1030006.26" id="QQ2-11-131">psb_sizeof &#8212; Memory occupation</a></span>
href="userhtmlse6.html#x12-1040006.26" id="QQ2-12-133">psb_sizeof &#8212; Memory occupation</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.27 <a
href="userhtmlse6.html#x11-1040006.27" id="QQ2-11-132">Sorting utilities &#8212; </a></span>
href="userhtmlse6.html#x12-1050006.27" id="QQ2-12-134">Sorting utilities &#8212; </a></span>
<br /> &#x00A0;<span class="sectionToc" >7 <a
href="userhtmlse7.html#x12-1050007">Parallel environment routines</a></span>
href="userhtmlse7.html#x13-1060007">Parallel environment routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.1 <a
href="userhtmlse7.html#x12-1060007.1" id="QQ2-12-134">psb_init &#8212; Initializes PSBLAS parallel environment</a></span>
href="userhtmlse7.html#x13-1070007.1" id="QQ2-13-136">psb_init &#8212; Initializes PSBLAS parallel environment</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.2 <a
href="userhtmlse7.html#x12-1070007.2" id="QQ2-12-135">psb_info &#8212; Return information about PSBLAS parallel environment</a></span>
href="userhtmlse7.html#x13-1080007.2" id="QQ2-13-137">psb_info &#8212; Return information about PSBLAS parallel environment</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.3 <a
href="userhtmlse7.html#x12-1080007.3" id="QQ2-12-136">psb_exit &#8212; Exit from PSBLAS parallel environment</a></span>
href="userhtmlse7.html#x13-1090007.3" id="QQ2-13-138">psb_exit &#8212; Exit from PSBLAS parallel environment</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.4 <a
href="userhtmlse7.html#x12-1090007.4" id="QQ2-12-137">psb_get_mpi_comm &#8212; Get the MPI communicator</a></span>
href="userhtmlse7.html#x13-1100007.4" id="QQ2-13-139">psb_get_mpi_comm &#8212; Get the MPI communicator</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.5 <a
href="userhtmlse7.html#x12-1100007.5" id="QQ2-12-138">psb_get_mpi_rank &#8212; Get the MPI rank</a></span>
href="userhtmlse7.html#x13-1110007.5" id="QQ2-13-140">psb_get_mpi_rank &#8212; Get the MPI rank</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.6 <a
href="userhtmlse7.html#x12-1110007.6" id="QQ2-12-139">psb_wtime &#8212; Wall clock timing</a></span>
href="userhtmlse7.html#x13-1120007.6" id="QQ2-13-141">psb_wtime &#8212; Wall clock timing</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.7 <a
href="userhtmlse7.html#x12-1120007.7" id="QQ2-12-140">psb_barrier &#8212; Sinchronization point parallel environment</a></span>
href="userhtmlse7.html#x13-1130007.7" id="QQ2-13-142">psb_barrier &#8212; Sinchronization point parallel environment</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.8 <a
href="userhtmlse7.html#x12-1130007.8" id="QQ2-12-141">psb_abort &#8212; Abort a computation</a></span>
href="userhtmlse7.html#x13-1140007.8" id="QQ2-13-143">psb_abort &#8212; Abort a computation</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.9 <a
href="userhtmlse7.html#x12-1140007.9" id="QQ2-12-142">psb_bcast &#8212; Broadcast data</a></span>
href="userhtmlse7.html#x13-1150007.9" id="QQ2-13-144">psb_bcast &#8212; Broadcast data</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.10 <a
href="userhtmlse7.html#x12-1150007.10" id="QQ2-12-143">psb_sum &#8212; Global sum</a></span>
href="userhtmlse7.html#x13-1160007.10" id="QQ2-13-145">psb_sum &#8212; Global sum</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.11 <a
href="userhtmlse7.html#x12-1160007.11" id="QQ2-12-144">psb_max &#8212; Global maximum</a></span>
href="userhtmlse7.html#x13-1170007.11" id="QQ2-13-146">psb_max &#8212; Global maximum</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.12 <a
href="userhtmlse7.html#x12-1170007.12" id="QQ2-12-145">psb_min &#8212; Global minimum</a></span>
href="userhtmlse7.html#x13-1180007.12" id="QQ2-13-147">psb_min &#8212; Global minimum</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.13 <a
href="userhtmlse7.html#x12-1180007.13" id="QQ2-12-146">psb_amx &#8212; Global maximum absolute value</a></span>
href="userhtmlse7.html#x13-1190007.13" id="QQ2-13-148">psb_amx &#8212; Global maximum absolute value</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.14 <a
href="userhtmlse7.html#x12-1190007.14" id="QQ2-12-147">psb_amn &#8212; Global minimum absolute value</a></span>
href="userhtmlse7.html#x13-1200007.14" id="QQ2-13-149">psb_amn &#8212; Global minimum absolute value</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.15 <a
href="userhtmlse7.html#x12-1200007.15" id="QQ2-12-148">psb_nrm2 &#8212; Global 2-norm reduction</a></span>
href="userhtmlse7.html#x13-1210007.15" id="QQ2-13-150">psb_nrm2 &#8212; Global 2-norm reduction</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.16 <a
href="userhtmlse7.html#x12-1210007.16" id="QQ2-12-149">psb_snd &#8212; Send data</a></span>
href="userhtmlse7.html#x13-1220007.16" id="QQ2-13-151">psb_snd &#8212; Send data</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.17 <a
href="userhtmlse7.html#x12-1220007.17" id="QQ2-12-150">psb_rcv &#8212; Receive data</a></span>
href="userhtmlse7.html#x13-1230007.17" id="QQ2-13-152">psb_rcv &#8212; Receive data</a></span>
<br /> &#x00A0;<span class="sectionToc" >8 <a
href="userhtmlse8.html#x13-1230008">Error handling</a></span>
href="userhtmlse8.html#x14-1240008">Error handling</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >8.1 <a
href="userhtmlse8.html#x13-1240008.1" id="QQ2-13-154">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
href="userhtmlse8.html#x14-1250008.1" id="QQ2-14-156">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >8.2 <a
href="userhtmlse8.html#x13-1250008.2" id="QQ2-13-155">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >8.3 <a
href="userhtmlse8.html#x13-1260008.3" id="QQ2-13-156">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >8.4 <a
href="userhtmlse8.html#x13-1270008.4" id="QQ2-13-157">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
<br /> &#x00A0;<span class="sectionToc" >9 <a
href="userhtmlse9.html#x14-1280009">Utilities</a></span>
href="userhtmlse8.html#x14-1260008.2" id="QQ2-14-157">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >8.3 <a
href="userhtmlse8.html#x14-1270008.3" id="QQ2-14-158">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >8.4 <a
href="userhtmlse8.html#x14-1280008.4" id="QQ2-14-159">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
<br /> &#x00A0;<span class="sectionToc" >9 <a
href="userhtmlse9.html#x15-1290009">Utilities</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.1 <a
href="userhtmlse9.html#x14-1290009.1" id="QQ2-14-159"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
href="userhtmlse9.html#x15-1300009.1" id="QQ2-15-161"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.2 <a
href="userhtmlse9.html#x14-1300009.2" id="QQ2-14-160">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
href="userhtmlse9.html#x15-1310009.2" id="QQ2-15-162">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.3 <a
href="userhtmlse9.html#x14-1310009.3" id="QQ2-14-161">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
href="userhtmlse9.html#x15-1320009.3" id="QQ2-15-163">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.4 <a
href="userhtmlse9.html#x14-1320009.4" id="QQ2-14-162">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
href="userhtmlse9.html#x15-1330009.4" id="QQ2-15-164">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.5 <a
href="userhtmlse9.html#x14-1330009.5" id="QQ2-14-163">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
href="userhtmlse9.html#x15-1340009.5" id="QQ2-15-165">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.6 <a
href="userhtmlse9.html#x14-1340009.6" id="QQ2-14-164">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
href="userhtmlse9.html#x15-1350009.6" id="QQ2-15-166">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010">Preconditioner routines</a></span>
href="userhtmlse10.html#x16-13600010">Preconditioner routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.1 <a
href="userhtmlse10.html#x15-13600010.1" id="QQ2-15-166">init &#8212; Initialize a preconditioner</a></span>
href="userhtmlse10.html#x16-13700010.1" id="QQ2-16-168">init &#8212; Initialize a preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.2 <a
href="userhtmlse10.html#x15-13700010.2" id="QQ2-15-167">Set &#8212; set preconditioner parameters</a></span>
href="userhtmlse10.html#x16-13800010.2" id="QQ2-16-169">Set &#8212; set preconditioner parameters</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.3 <a
href="userhtmlse10.html#x15-13800010.3" id="QQ2-15-169">build &#8212; Builds a preconditioner</a></span>
href="userhtmlse10.html#x16-13900010.3" id="QQ2-16-171">build &#8212; Builds a preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.4 <a
href="userhtmlse10.html#x15-13900010.4" id="QQ2-15-170">apply &#8212; Preconditioner application routine</a></span>
href="userhtmlse10.html#x16-14000010.4" id="QQ2-16-172">apply &#8212; Preconditioner application routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.5 <a
href="userhtmlse10.html#x15-14000010.5" id="QQ2-15-171">descr &#8212; Prints a description of current preconditioner</a></span>
href="userhtmlse10.html#x16-14100010.5" id="QQ2-16-173">descr &#8212; Prints a description of current preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.6 <a
href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-172">clone &#8212; clone current preconditioner</a></span>
href="userhtmlse10.html#x16-14200010.6" id="QQ2-16-174">clone &#8212; clone current preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.7 <a
href="userhtmlse10.html#x15-14200010.7" id="QQ2-15-173">free &#8212; Free a preconditioner</a></span>
href="userhtmlse10.html#x16-14300010.7" id="QQ2-16-175">free &#8212; Free a preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.8 <a
href="userhtmlse10.html#x15-14300010.8" id="QQ2-15-174">allocate_wrk &#8212; preconditioner</a></span>
href="userhtmlse10.html#x16-14400010.8" id="QQ2-16-176">allocate_wrk &#8212; preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.9 <a
href="userhtmlse10.html#x15-14400010.9" id="QQ2-15-175">deallocate_wrk &#8212; preconditioner</a></span>
href="userhtmlse10.html#x16-14500010.9" id="QQ2-16-177">deallocate_wrk &#8212; preconditioner</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14500011">Iterative Methods</a></span>
href="userhtmlse11.html#x18-14600011">Iterative Methods</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.1 <a
href="userhtmlse11.html#x17-14600011.1" id="QQ2-17-177">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
href="userhtmlse11.html#x18-14700011.1" id="QQ2-18-179">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.2 <a
href="userhtmlse11.html#x17-14700011.2" id="QQ2-17-178">psb_richardson &#8212; Richardson Iteration Driver Routine</a></span>
href="userhtmlse11.html#x18-14800011.2" id="QQ2-18-180">psb_richardson &#8212; Richardson Iteration Driver Routine</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14800012">Extensions</a></span>
href="userhtmlse12.html#x20-14900012">Extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.1 <a
href="userhtmlse12.html#x19-14900012.1" id="QQ2-19-180">Using the extensions</a></span>
href="userhtmlse12.html#x20-15000012.1" id="QQ2-20-182">Using the extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.2 <a
href="userhtmlse12.html#x19-15000012.2" id="QQ2-19-181">Extensions&#8217; Data Structures</a></span>
href="userhtmlse12.html#x20-15100012.2" id="QQ2-20-183">Extensions&#8217; Data Structures</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.3 <a
href="userhtmlse12.html#x19-15100012.3" id="QQ2-19-184">CPU-class extensions</a></span>
href="userhtmlse12.html#x20-15200012.3" id="QQ2-20-186">CPU-class extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.4 <a
href="userhtmlse12.html#x19-15600012.4" id="QQ2-19-193">CUDA-class extensions</a></span>
href="userhtmlse12.html#x20-15700012.4" id="QQ2-20-195">CUDA-class extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15700013">CUDA Environment Routines</a></span>
href="userhtmlse13.html#x21-15800013">CUDA Environment Routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-196">psb_cuda_init</a></span>
href="userhtmlse13.html#Q1-21-198">psb_cuda_init</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-198">psb_cuda_exit</a></span>
href="userhtmlse13.html#Q1-21-200">psb_cuda_exit</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-200">psb_cuda_DeviceSync</a></span>
href="userhtmlse13.html#Q1-21-202">psb_cuda_DeviceSync</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-202">psb_cuda_getDeviceCount</a></span>
href="userhtmlse13.html#Q1-21-204">psb_cuda_getDeviceCount</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-204">psb_cuda_getDevice</a></span>
href="userhtmlse13.html#Q1-21-206">psb_cuda_getDevice</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-206">psb_cuda_setDevice</a></span>
href="userhtmlse13.html#Q1-21-208">psb_cuda_setDevice</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-208">psb_cuda_DeviceHasUVA</a></span>
href="userhtmlse13.html#Q1-21-210">psb_cuda_DeviceHasUVA</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-210">psb_cuda_WarpSize</a></span>
href="userhtmlse13.html#Q1-21-212">psb_cuda_WarpSize</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-212">psb_cuda_MultiProcessors</a></span>
href="userhtmlse13.html#Q1-21-214">psb_cuda_MultiProcessors</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-214">psb_cuda_MaxThreadsPerMP</a></span>
href="userhtmlse13.html#Q1-21-216">psb_cuda_MaxThreadsPerMP</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-216">psb_cuda_MaxRegisterPerBlock</a></span>
href="userhtmlse13.html#Q1-21-218">psb_cuda_MaxRegisterPerBlock</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-218">psb_cuda_MemoryClockRate</a></span>
href="userhtmlse13.html#Q1-21-220">psb_cuda_MemoryClockRate</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-220">psb_cuda_MemoryBusWidth</a></span>
href="userhtmlse13.html#Q1-21-222">psb_cuda_MemoryBusWidth</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-222">psb_cuda_MemoryPeakBandwidth</a></span>
href="userhtmlse13.html#Q1-21-224">psb_cuda_MemoryPeakBandwidth</a></span>
</div>
@ -369,7 +371,7 @@ href="userhtmlse13.html#Q1-20-222">psb_cuda_MemoryPeakBandwidth</a></span>
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse1.html" >next</a>] [<a
href="userhtmlli2.html" >next</a>] [<a
href="userhtmlli1.html" >front</a>] [<a
href="userhtml.html#userhtmlli1.html" >up</a>] </p></div>
<!--l. 1--><p class="indent" > <a

@ -1,7 +1,7 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html >
<head><title>References</title>
<head><title>Preface</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)">
<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)">
@ -10,216 +10,74 @@
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 2--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse13.html" >prev</a>] [<a
href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse1.html" >next</a>] [<a
href="userhtmlli1.html" >prev</a>] [<a
href="userhtmlli1.html#tailuserhtmlli1.html" >prev-tail</a>] [<a
href="#tailuserhtmlli2.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
href="userhtml.html#userhtmlli2.html" >up</a>] </p></div>
<h3 class="likesectionHead"><a
id="x21-172000"></a>References</h3>
<!--l. 2--><p class="noindent" >
<div class="thebibliography">
<p class="bibitem" ><span class="biblabel">
[1]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XPARA04FOREST"></a>G.&#x00A0;Bella, S.&#x00A0;Filippone, A.&#x00A0;De Maio and M.&#x00A0;Testa, <span
class="pplri7t-">A Simulation Model</span>
<span
class="pplri7t-">for Forest Fires</span>, in J.&#x00A0;Dongarra, K.&#x00A0;Madsen, J.&#x00A0;Wasniewski, editors,
Proceedings of PARA&#x00A0;04 Workshop on State of the Art in Scientific
Computing, pp.&#x00A0;546&#8211;553, Lecture Notes in Computer Science, Springer,
2005.
</p>
<p class="bibitem" ><span class="biblabel">
[2]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBERTACCINIFILIPPONE"></a>D. Bertaccini&#x00A0;and&#x00A0;S. Filippone, <span
class="pplri7t-">Sparse approximate</span>
<span
class="pplri7t-">inverse preconditioners on high performance GPU platforms</span>, Comput. Math.
Appl., 71, (2016), no.&#x00A0;3, 693&#8211;711.
</p>
<p class="bibitem" ><span class="biblabel">
[3]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="X2007d"></a>A. Buttari, D. di Serafino, P. D&#8217;Ambra, S. Filippone, 2LEV-D2P4:
a package of high-performance preconditioners, Applicable Algebra in
Engineering, Communications and Computing, Volume 18, Number 3,
May, 2007, pp. 223-239
</p>
<p class="bibitem" ><span class="biblabel">
[4]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="X2007c"></a>P. D&#8217;Ambra, S. Filippone, D. Di Serafino On the Development of
PSBLAS-based Parallel Two-level Schwarz Preconditioners Applied
Numerical Mathematics, Elsevier Science, Volume 57, Issues 11-12,
November-December 2007, Pages 1181-1196.
</p>
<p class="bibitem" ><span class="biblabel">
[5]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS2"></a>Dongarra, J. J., DuCroz, J., Hammarling, S. and Hanson, R., An
Extended Set of Fortran Basic Linear Algebra Subprograms, ACM Trans.
Math. Softw. vol.&#x00A0;14, 1&#8211;17, 1988.
</p>
<p class="bibitem" ><span class="biblabel">
[6]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS3"></a>Dongarra, J., DuCroz, J., Hammarling, S. and Duff, I., A Set of level
3 Basic Linear Algebra Subprograms, ACM Trans. Math. Softw. vol.&#x00A0;16,
1&#8211;17, 1990.
</p>
<p class="bibitem" ><span class="biblabel">
[7]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLACS"></a>J.&#x00A0;J.&#x00A0;Dongarra and R.&#x00A0;C.&#x00A0;Whaley, <span
class="pplri7t-">A User&#8217;s Guide to the BLACS</span>
<span
class="pplri7t-">v.</span><span
class="pplri7t-">&#x00A0;1.1</span>, Lapack Working Note 94, Tech.&#x00A0;Rep.&#x00A0;UT-CS-95-281, University of
Tennessee, March 1995 (updated May 1997).
</p>
<p class="bibitem" ><span class="biblabel">
[8]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xsblas97"></a>I.&#x00A0;Duff, M.&#x00A0;Marrone, G.&#x00A0;Radicati and C.&#x00A0;Vittoli, <span
class="pplri7t-">Level 3 Basic Linear</span>
<span
class="pplri7t-">Algebra Subprograms for Sparse Matrices: a User Level Interface</span>, ACM
Transactions on Mathematical Software, 23(3), pp.&#x00A0;379&#8211;401, 1997.
</p>
<p class="bibitem" ><span class="biblabel">
[9]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xsblas02"></a>I.&#x00A0;Duff, M.&#x00A0;Heroux and R.&#x00A0;Pozo, <span
class="pplri7t-">An Overview of the Sparse Basic Linear</span>
<span
class="pplri7t-">Algebra Subprograms: the New Standard from the BLAS Technical Forum</span>, ACM
Transactions on Mathematical Software, 28(2), pp.&#x00A0;239&#8211;267, 2002.
</p>
<p class="bibitem" ><span class="biblabel">
[10]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XPSBLAS"></a>S.&#x00A0;Filippone and M.&#x00A0;Colajanni, <span
class="pplri7t-">PSBLAS: A Library for Parallel</span>
<span
class="pplri7t-">Linear Algebra Computation on Sparse Matrices</span>, ACM Transactions on
Mathematical Software, 26(4), pp.&#x00A0;527&#8211;550, 2000.
</p>
<p class="bibitem" ><span class="biblabel">
[11]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XSparse03"></a>S.&#x00A0;Filippone and A.&#x00A0;Buttari, <span
class="pplri7t-">Object-Oriented Techniques for Sparse</span>
<span
class="pplri7t-">Matrix Computations in Fortran 2003</span>, ACM Transactions on Mathematical
Software, 38(4), 2012.
</p>
<p class="bibitem" ><span class="biblabel">
[12]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XKIVA3PSBLAS"></a>S.&#x00A0;Filippone, P.&#x00A0;D&#8217;Ambra, M.&#x00A0;Colajanni, <span
class="pplri7t-">Using a Parallel Library of</span>
<span
class="pplri7t-">Sparse Linear Algebra in a Fluid Dynamics Applications Code on Linux</span>
<span
class="pplri7t-">Clusters</span>, in G.&#x00A0;Joubert, A.&#x00A0;Murli, F.&#x00A0;Peters, M.&#x00A0;Vanneschi, editors,
Parallel Computing - Advances &amp; Current Issues, pp.&#x00A0;441&#8211;448, Imperial
College Press, 2002.
</p>
<p class="bibitem" ><span class="biblabel">
[13]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XDesignPatterns"></a> Gamma, E., Helm, R., Johnson, R., and Vlissides, J. 1995. <span
class="pplri7t-">Design</span>
<span
class="pplri7t-">Patterns: Elements of Reusable Object-Oriented Software</span>. Addison-Wesley.
</p>
<p class="bibitem" ><span class="biblabel">
[14]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMETIS"></a>Karypis, G. and Kumar, V., <span
class="pplri7t-">METIS: Unstructured Graph Partitioning</span>
<span
class="pplri7t-">and Sparse Matrix Ordering System</span>. Minneapolis, MN 55455: University
of Minnesota, Department of Computer Science, 1995. Internet Address:
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">http://www.cs.umn.edu/~karypis</span></span></span>.
id="x3-2000"></a>Preface</h3>
<a
id="Q1-3-3"></a>
<!--l. 3--><p class="noindent" >This manual describes the main features of PSBLAS, a library for parallel sparse
computations that has been developed over a number of years.
<!--l. 7--><p class="indent" > Our work has been mainly devoted to providing a foundational toolkit on which
many algorithms can be implemented; the toolkit has proven its effectiveness and
flexibility in many ways. The PSBLAS component deals mostly with the
computational kernels and environment handling; it supports computations on
normal CPUs, including the usage of OpenMP for parallellizing across multiple
cores.
<!--l. 15--><p class="indent" > This foundational package provides linear solvers and some very simple
preconditioners; the companion package AMG4PSBLAS explores how to use the
base toolkit to build much more sophisticated preconditioners which can be plugged
seamlessly into the base solvers.
<!--l. 20--><p class="indent" > The software architecture allows us to offer support for many alternatives in the
implementation, including usage of heterogeneous platforms, and computations
performed on GPUs throuh CUDA. There is support for GPU computations through
OpenACC, but it is at this time a highly experimental version; we plan to
also look at using accelerators through OpenMP as support from compilers
improves.
<!--l. 28--><p class="indent" > The project is lead by Salvatore Filippone; a number of people have been
contributing to this package over the years; contributors in roughly reverse
chronological order: <span class="obeylines-h">
<br />Theophane Loloum
<br />Fabio Durastante
<br />Dimitri Walther
<br />Andea Di Iorio
<br />Stefano Petrilli
<br />Soren Rasmussen
<br />Zaak Beekman
<br />Ambra Abdullahi Hassan
<br />Pasqua D&#8217;Ambra
<br />Alfredo Buttari
<br />Daniela di Serafino
<br />Michele Martone
<br />Michele Colajanni
<br />Fabio Cerioni
<br />Stefano Maiolatesi
<br />Dario Pascucci</span>
<div class="flushright"
>
</p>
<p class="bibitem" ><span class="biblabel">
[15]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS1"></a>Lawson, C., Hanson, R., Kincaid, D. and Krogh, F., Basic Linear
Algebra Subprograms for Fortran usage, ACM Trans. Math. Softw. vol.&#x00A0;5,
38&#8211;329, 1979.
</p>
<p class="bibitem" ><span class="biblabel">
[16]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xmachiels"></a>Machiels, L. and Deville, M. <span
class="pplri7t-">Fortran 90: An entry to object-oriented</span>
<span
class="pplri7t-">programming for the solution of partial differential equations. </span>ACM Trans.
Math. Softw. vol.&#x00A0;23, 32&#8211;49.
</p>
<p class="bibitem" ><span class="biblabel">
[17]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xmetcalf"></a>Metcalf, M., Reid, J. and Cohen, M. <span
class="pplri7t-">Fortran 95/2003 explained. </span>Oxford
University Press, 2004.
</p>
<p class="bibitem" ><span class="biblabel">
[18]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMRC:11"></a>Metcalf, M., Reid, J. and Cohen, M. <span
class="pplri7t-">Modern Fortran explained. </span>Oxford
University Press, 2011.
</p>
<p class="bibitem" ><span class="biblabel">
[19]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XRouXiaXu:11"></a>Rouson, D.W.I., Xia, J., Xu, X.: Scientific Software Design: The
Object-Oriented Way. Cambridge University Press (2011)
</p>
<p class="bibitem" ><span class="biblabel">
[20]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMPI1"></a>M.&#x00A0;Snir, S.&#x00A0;Otto, S.&#x00A0;Huss-Lederman, D.&#x00A0;Walker and J.&#x00A0;Dongarra,
<span
class="pplri7t-">MPI: The Complete Reference. Volume 1 - The MPI Core</span>, second edition, MIT
Press, 1998.
</p>
<p class="bibitem" ><span class="biblabel">
[21]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XDesPat:11"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini,
S.&#x00A0;Filippone and D.&#x00A0;Rouson <span
class="pplri7t-">Design Patterns for Scientific Computations</span>
<span
class="pplri7t-">on Sparse Matrices</span>, HPSS 2011, Algorithms and Programming Tools for
Next-Generation High-Performance Scientific Software, Bordeaux, Sep.
2011
</p>
<p class="bibitem" ><span class="biblabel">
[22]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XCaFiRo:2014"></a> Cardellini, V., Filippone, S., and Rouson, D. 2014, Design patterns
for sparse-matrix computations on hybrid CPU/GPU platforms, <span
class="pplri7t-">Scientific</span>
<span
class="pplri7t-">Programming</span>&#x00A0;<span
class="pplri7t-">22,</span>&#x00A0;1, 1&#8211;19.
</p>
<!--l. 49--><p class="noindent" >
Salvatore Filippone<br />
Alfredo Buttari<br />
Fabio Durastante</div>
<p class="bibitem" ><span class="biblabel">
[23]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XOurTechRep"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, A.&#x00A0;Fanfarillo, S.&#x00A0;Filippone, Three storage
formats for sparse matrices on GPGPUs, Tech. Rep. DICII RR-15.6,
Università di Roma Tor Vergata (February 2015).
</p>
<p class="bibitem" ><span class="biblabel">
[24]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XFilippone:2017:SMM:3034774.3017994"></a>S.&#x00A0;Filippone, V.&#x00A0;Cardellini, D.&#x00A0;Barbieri, and A.&#x00A0;Fanfarillo. Sparse
matrix-vector multiplication on GPGPUs. <span
class="pplri7t-">ACM Trans. Math. Softw.</span>,
43(4):30:1&#8211;30:49, 2017.
</p>
</div>
<!--l. 138--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse13.html" >prev</a>] [<a
href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
<!--l. 58--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse1.html" >next</a>] [<a
href="userhtmlli1.html" >prev</a>] [<a
href="userhtmlli1.html#tailuserhtmlli1.html" >prev-tail</a>] [<a
href="userhtmlli2.html" >front</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<!--l. 138--><p class="indent" > <a
id="tailuserhtmlli2.html"></a>
href="userhtml.html#userhtmlli2.html" >up</a>] </p></div>
<!--l. 58--><p class="indent" > <a
id="tailuserhtmlli2.html"></a>
</body></html>

@ -0,0 +1,225 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html >
<head><title>References</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)">
<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)">
<!-- html,3 -->
<meta name="src" content="userhtml.tex">
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 2--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse13.html" >prev</a>] [<a
href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
href="#tailuserhtmlli3.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="likesectionHead"><a
id="x22-173000"></a>References</h3>
<!--l. 2--><p class="noindent" >
<div class="thebibliography">
<p class="bibitem" ><span class="biblabel">
[1]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XPARA04FOREST"></a>G.&#x00A0;Bella, S.&#x00A0;Filippone, A.&#x00A0;De Maio and M.&#x00A0;Testa, <span
class="pplri7t-">A Simulation Model</span>
<span
class="pplri7t-">for Forest Fires</span>, in J.&#x00A0;Dongarra, K.&#x00A0;Madsen, J.&#x00A0;Wasniewski, editors,
Proceedings of PARA&#x00A0;04 Workshop on State of the Art in Scientific
Computing, pp.&#x00A0;546&#8211;553, Lecture Notes in Computer Science, Springer,
2005.
</p>
<p class="bibitem" ><span class="biblabel">
[2]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBERTACCINIFILIPPONE"></a>D. Bertaccini&#x00A0;and&#x00A0;S. Filippone, <span
class="pplri7t-">Sparse approximate</span>
<span
class="pplri7t-">inverse preconditioners on high performance GPU platforms</span>, Comput. Math.
Appl., 71, (2016), no.&#x00A0;3, 693&#8211;711.
</p>
<p class="bibitem" ><span class="biblabel">
[3]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="X2007d"></a>A. Buttari, D. di Serafino, P. D&#8217;Ambra, S. Filippone, 2LEV-D2P4:
a package of high-performance preconditioners, Applicable Algebra in
Engineering, Communications and Computing, Volume 18, Number 3,
May, 2007, pp. 223-239
</p>
<p class="bibitem" ><span class="biblabel">
[4]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="X2007c"></a>P. D&#8217;Ambra, S. Filippone, D. Di Serafino On the Development of
PSBLAS-based Parallel Two-level Schwarz Preconditioners Applied
Numerical Mathematics, Elsevier Science, Volume 57, Issues 11-12,
November-December 2007, Pages 1181-1196.
</p>
<p class="bibitem" ><span class="biblabel">
[5]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS2"></a>Dongarra, J. J., DuCroz, J., Hammarling, S. and Hanson, R., An
Extended Set of Fortran Basic Linear Algebra Subprograms, ACM Trans.
Math. Softw. vol.&#x00A0;14, 1&#8211;17, 1988.
</p>
<p class="bibitem" ><span class="biblabel">
[6]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS3"></a>Dongarra, J., DuCroz, J., Hammarling, S. and Duff, I., A Set of level
3 Basic Linear Algebra Subprograms, ACM Trans. Math. Softw. vol.&#x00A0;16,
1&#8211;17, 1990.
</p>
<p class="bibitem" ><span class="biblabel">
[7]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLACS"></a>J.&#x00A0;J.&#x00A0;Dongarra and R.&#x00A0;C.&#x00A0;Whaley, <span
class="pplri7t-">A User&#8217;s Guide to the BLACS</span>
<span
class="pplri7t-">v.</span><span
class="pplri7t-">&#x00A0;1.1</span>, Lapack Working Note 94, Tech.&#x00A0;Rep.&#x00A0;UT-CS-95-281, University of
Tennessee, March 1995 (updated May 1997).
</p>
<p class="bibitem" ><span class="biblabel">
[8]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xsblas97"></a>I.&#x00A0;Duff, M.&#x00A0;Marrone, G.&#x00A0;Radicati and C.&#x00A0;Vittoli, <span
class="pplri7t-">Level 3 Basic Linear</span>
<span
class="pplri7t-">Algebra Subprograms for Sparse Matrices: a User Level Interface</span>, ACM
Transactions on Mathematical Software, 23(3), pp.&#x00A0;379&#8211;401, 1997.
</p>
<p class="bibitem" ><span class="biblabel">
[9]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xsblas02"></a>I.&#x00A0;Duff, M.&#x00A0;Heroux and R.&#x00A0;Pozo, <span
class="pplri7t-">An Overview of the Sparse Basic Linear</span>
<span
class="pplri7t-">Algebra Subprograms: the New Standard from the BLAS Technical Forum</span>, ACM
Transactions on Mathematical Software, 28(2), pp.&#x00A0;239&#8211;267, 2002.
</p>
<p class="bibitem" ><span class="biblabel">
[10]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XPSBLAS"></a>S.&#x00A0;Filippone and M.&#x00A0;Colajanni, <span
class="pplri7t-">PSBLAS: A Library for Parallel</span>
<span
class="pplri7t-">Linear Algebra Computation on Sparse Matrices</span>, ACM Transactions on
Mathematical Software, 26(4), pp.&#x00A0;527&#8211;550, 2000.
</p>
<p class="bibitem" ><span class="biblabel">
[11]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XSparse03"></a>S.&#x00A0;Filippone and A.&#x00A0;Buttari, <span
class="pplri7t-">Object-Oriented Techniques for Sparse</span>
<span
class="pplri7t-">Matrix Computations in Fortran 2003</span>, ACM Transactions on Mathematical
Software, 38(4), 2012.
</p>
<p class="bibitem" ><span class="biblabel">
[12]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XKIVA3PSBLAS"></a>S.&#x00A0;Filippone, P.&#x00A0;D&#8217;Ambra, M.&#x00A0;Colajanni, <span
class="pplri7t-">Using a Parallel Library of</span>
<span
class="pplri7t-">Sparse Linear Algebra in a Fluid Dynamics Applications Code on Linux</span>
<span
class="pplri7t-">Clusters</span>, in G.&#x00A0;Joubert, A.&#x00A0;Murli, F.&#x00A0;Peters, M.&#x00A0;Vanneschi, editors,
Parallel Computing - Advances &amp; Current Issues, pp.&#x00A0;441&#8211;448, Imperial
College Press, 2002.
</p>
<p class="bibitem" ><span class="biblabel">
[13]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XDesignPatterns"></a> Gamma, E., Helm, R., Johnson, R., and Vlissides, J. 1995. <span
class="pplri7t-">Design</span>
<span
class="pplri7t-">Patterns: Elements of Reusable Object-Oriented Software</span>. Addison-Wesley.
</p>
<p class="bibitem" ><span class="biblabel">
[14]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMETIS"></a>Karypis, G. and Kumar, V., <span
class="pplri7t-">METIS: Unstructured Graph Partitioning</span>
<span
class="pplri7t-">and Sparse Matrix Ordering System</span>. Minneapolis, MN 55455: University
of Minnesota, Department of Computer Science, 1995. Internet Address:
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">http://www.cs.umn.edu/~karypis</span></span></span>.
</p>
<p class="bibitem" ><span class="biblabel">
[15]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS1"></a>Lawson, C., Hanson, R., Kincaid, D. and Krogh, F., Basic Linear
Algebra Subprograms for Fortran usage, ACM Trans. Math. Softw. vol.&#x00A0;5,
38&#8211;329, 1979.
</p>
<p class="bibitem" ><span class="biblabel">
[16]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xmachiels"></a>Machiels, L. and Deville, M. <span
class="pplri7t-">Fortran 90: An entry to object-oriented</span>
<span
class="pplri7t-">programming for the solution of partial differential equations. </span>ACM Trans.
Math. Softw. vol.&#x00A0;23, 32&#8211;49.
</p>
<p class="bibitem" ><span class="biblabel">
[17]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xmetcalf"></a>Metcalf, M., Reid, J., Cohen, M., Bader, R. <span
class="pplri7t-">Modern Fortran explained.</span>
Oxford University Press, 2024.
</p>
<p class="bibitem" ><span class="biblabel">
[18]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMRC:11"></a>Metcalf, M., Reid, J. and Cohen, M. <span
class="pplri7t-">Modern Fortran explained. </span>Oxford
University Press, 2011.
</p>
<p class="bibitem" ><span class="biblabel">
[19]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XRouXiaXu:11"></a>Rouson, D.W.I., Xia, J., Xu, X.: Scientific Software Design: The
Object-Oriented Way. Cambridge University Press (2011)
</p>
<p class="bibitem" ><span class="biblabel">
[20]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMPI1"></a>M.&#x00A0;Snir, S.&#x00A0;Otto, S.&#x00A0;Huss-Lederman, D.&#x00A0;Walker and J.&#x00A0;Dongarra,
<span
class="pplri7t-">MPI: The Complete Reference. Volume 1 - The MPI Core</span>, second edition, MIT
Press, 1998.
</p>
<p class="bibitem" ><span class="biblabel">
[21]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XDesPat:11"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini,
S.&#x00A0;Filippone and D.&#x00A0;Rouson <span
class="pplri7t-">Design Patterns for Scientific Computations</span>
<span
class="pplri7t-">on Sparse Matrices</span>, HPSS 2011, Algorithms and Programming Tools for
Next-Generation High-Performance Scientific Software, Bordeaux, Sep.
2011
</p>
<p class="bibitem" ><span class="biblabel">
[22]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XCaFiRo:2014"></a> Cardellini, V., Filippone, S., and Rouson, D. 2014, Design patterns
for sparse-matrix computations on hybrid CPU/GPU platforms, <span
class="pplri7t-">Scientific</span>
<span
class="pplri7t-">Programming</span>&#x00A0;<span
class="pplri7t-">22,</span>&#x00A0;1, 1&#8211;19.
</p>
<p class="bibitem" ><span class="biblabel">
[23]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XOurTechRep"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, A.&#x00A0;Fanfarillo, S.&#x00A0;Filippone, Three storage
formats for sparse matrices on GPGPUs, Tech. Rep. DICII RR-15.6,
Università di Roma Tor Vergata (February 2015).
</p>
<p class="bibitem" ><span class="biblabel">
[24]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XFilippone:2017:SMM:3034774.3017994"></a>S.&#x00A0;Filippone, V.&#x00A0;Cardellini, D.&#x00A0;Barbieri, and A.&#x00A0;Fanfarillo. Sparse
matrix-vector multiplication on GPGPUs. <span
class="pplri7t-">ACM Trans. Math. Softw.</span>,
43(4):30:1&#8211;30:49, 2017.
</p>
</div>
<!--l. 139--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse13.html" >prev</a>] [<a
href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
href="userhtmlli3.html" >front</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<!--l. 139--><p class="indent" > <a
id="tailuserhtmlli3.html"></a>
</body></html>

@ -10,15 +10,15 @@
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
<!--l. 58--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse2.html" >next</a>] [<a
href="userhtmlli1.html" >prev</a>] [<a
href="userhtmlli1.html#tailuserhtmlli1.html" >prev-tail</a>] [<a
href="userhtmlli2.html" >prev</a>] [<a
href="userhtmlli2.html#tailuserhtmlli2.html" >prev-tail</a>] [<a
href="#tailuserhtmlse1.html">tail</a>] [<a
href="userhtml.html#userhtmlse1.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">1 </span> <a
id="x3-20001"></a>Introduction</h3>
<!--l. 3--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of
id="x4-30001"></a>Introduction</h3>
<!--l. 60--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of
computationally intensive scientific applications, is designed to address parallel
implementation of iterative solvers for sparse linear systems through the
distributed memory paradigm. It includes routines for multiplying sparse
@ -27,37 +27,38 @@ diagonal entries, preprocessing sparse matrices, and contains additional
routines for dense matrix operations. The current implementation of PSBLAS
addresses a distributed memory execution model operating with message
passing.
<!--l. 14--><p class="indent" > The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2003&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#Xmetcalf">17</a>]</span>
<!--l. 71--><p class="indent" > The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2008&#x00A0;<span class="cite">[<a
href="userhtmlli3.html#Xmetcalf">17</a>]</span>
programming language, with reuse and/or adaptation of existing Fortran&#x00A0;77 and
Fortran&#x00A0;95 software, plus a handful of C routines.
<!--l. 19--><p class="indent" > The use of Fortran&#x00A0;2003 offers a number of advantages over Fortran&#x00A0;95, mostly
<!--l. 76--><p class="indent" > The use of Fortran&#x00A0;2008 offers a number of advantages over Fortran&#x00A0;95, mostly
in the handling of requirements for evolution and adaptation of the library to new
computing architectures and integration of new algorithms. For a detailed
discussion of our design see&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XSparse03">11</a>]</span>; other works discussing advanced programming in
Fortran&#x00A0;2003 include&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XDesPat:11">21</a>,&#x00A0;<a
href="userhtmlli2.html#XRouXiaXu:11">19</a>]</span>; sufficient support for Fortran&#x00A0;2003 is now available
from many compilers, including the GNU Fortran compiler from the Free Software
Foundation (as of version 4.8).
<!--l. 30--><p class="indent" > Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
href="userhtmlli3.html#XSparse03">11</a>]</span>; other works discussing advanced programming in
Fortran&#x00A0;2008 include&#x00A0;<span class="cite">[<a
href="userhtmlli3.html#XDesPat:11">21</a>,&#x00A0;<a
href="userhtmlli3.html#XRouXiaXu:11">19</a>]</span>; sufficient support for Fortran&#x00A0;2008 is now available
from many compilers, including recent versions of the GNU Fortran compiler from
the Free Software Foundation, and the FLANG compiler from the LLVM
project.
<!--l. 88--><p class="indent" > Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
object-based design, with other languages; these have been advocated by a number
of authors, e.g.&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#Xmachiels">16</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
href="userhtmlli3.html#Xmachiels">16</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
management and interface overloading greatly enhance the usability of the PSBLAS
subroutines. In this way, the library can take care of runtime memory requirements
that are quite difficult or even impossible to predict at implementation or
compilation time.
<!--l. 40--><p class="indent" > The presentation of the PSBLAS library follows the general structure of the
<!--l. 98--><p class="indent" > The presentation of the PSBLAS library follows the general structure of the
proposal for serial Sparse BLAS&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#Xsblas97">8</a>,&#x00A0;<a
href="userhtmlli2.html#Xsblas02">9</a>]</span>, which in its turn is based on the proposal for
href="userhtmlli3.html#Xsblas97">8</a>,&#x00A0;<a
href="userhtmlli3.html#Xsblas02">9</a>]</span>, which in its turn is based on the proposal for
BLAS on dense matrices&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XBLAS1">15</a>,&#x00A0;<a
href="userhtmlli2.html#XBLAS2">5</a>,&#x00A0;<a
href="userhtmlli2.html#XBLAS3">6</a>]</span>.
<!--l. 45--><p class="indent" > The applicability of sparse iterative solvers to many different areas causes
href="userhtmlli3.html#XBLAS1">15</a>,&#x00A0;<a
href="userhtmlli3.html#XBLAS2">5</a>,&#x00A0;<a
href="userhtmlli3.html#XBLAS3">6</a>]</span>.
<!--l. 103--><p class="indent" > The applicability of sparse iterative solvers to many different areas causes
some terminology problems because the same concept may be denoted
through different names depending on the application area. The PSBLAS
features presented in this document will be discussed referring to a finite
@ -66,13 +67,13 @@ the scope of the library is wider than that: for example, it can be applied
to finite element discretizations of PDEs, and even to different classes of
problems such as nonlinear optimization, for example in optimal control
problems.
<!--l. 55--><p class="indent" > The design of a solver for sparse linear systems is driven by many conflicting
<!--l. 113--><p class="indent" > The design of a solver for sparse linear systems is driven by many conflicting
objectives, such as limiting occupation of storage resources, exploiting regularities in
the input data, exploiting hardware characteristics of the parallel platform. To
achieve an optimal communication to computation ratio on distributed memory
achieve an optimal communication to computation ratio on distributed memory
machines it is essential to keep the <span
class="pplri7t-">data locality </span>as high as possible; this can
be done through an appropriate data allocation strategy. The choice of the
@ -87,12 +88,12 @@ applications.
<!--l. 72--><div class="crosslinks"><p class="noindent">[<a
<!--l. 130--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse2.html" >next</a>] [<a
href="userhtmlli1.html" >prev</a>] [<a
href="userhtmlli1.html#tailuserhtmlli1.html" >prev-tail</a>] [<a
href="userhtmlli2.html" >prev</a>] [<a
href="userhtmlli2.html#tailuserhtmlli2.html" >prev-tail</a>] [<a
href="userhtmlse1.html" >front</a>] [<a
href="userhtml.html#userhtmlse1.html" >up</a>] </p></div>
<!--l. 72--><p class="indent" > <a
<!--l. 130--><p class="indent" > <a
id="tailuserhtmlse1.html"></a>
</body></html>

@ -16,7 +16,7 @@ href="userhtmlse9.html#tailuserhtmlse9.html" >prev-tail</a>] [<a
href="userhtmlse7.html#tailuserhtmlse10.html">tail</a>] [<a
href="userhtml.html#userhtmlse13.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">10 </span> <a
id="x15-13500010"></a>Preconditioner routines</h3>
id="x16-13600010"></a>Preconditioner routines</h3>
<!--l. 6--><p class="noindent" >The base PSBLAS library contains the implementation of some simple preconditioning
techniques:
<ul class="itemize1">
@ -38,7 +38,7 @@ for backward compatibility
<h4 class="subsectionHead"><span class="titlemark">10.1 </span> <a
id="x15-13600010.1"></a>init &#8212; Initialize a preconditioner</h4>
id="x16-13700010.1"></a>init &#8212; Initialize a preconditioner</h4>
@ -125,8 +125,8 @@ class="newline" />Error code: if no error, 0 is returned.</dd></dl>
class="pplb7t-x-x-120">Notes </span>Legal inputs to this subroutine are interpreted depending on the <span
class="zplmr7m-">ptype </span>string as
follows<span class="footnote-mark"><a
href="userhtml16.html#fn4x0"><sup class="textsuperscript">4</sup></a></span><a
id="x15-136001f4"></a> :
href="userhtml17.html#fn4x0"><sup class="textsuperscript">4</sup></a></span><a
id="x16-137001f4"></a> :
<dl class="description"><dt class="description">
<!--l. 75--><p class="noindent" >
<span
@ -152,12 +152,12 @@ class="description">
class="zplmr7m-">A</span>, where block boundaries are determined
by the data allocation boundaries for each process; requires no
communication. See also Table-<a
href="#x15-137001r21">21<!--tex4ht:ref: tab:p_subsolve_1 --></a>.</dd></dl>
href="#x16-138001r21">21<!--tex4ht:ref: tab:p_subsolve_1 --></a>.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">10.2 </span> <a
id="x15-13700010.2"></a>Set &#8212; set preconditioner parameters</h4>
id="x16-13800010.2"></a>Set &#8212; set preconditioner parameters</h4>
<div class="center"
>
<!--l. 92--><p class="noindent" >
@ -181,7 +181,7 @@ class="td11"><!--l. 105--><p class="noindent" ><code class="lstinline"><span sty
class="td11"><!--l. 106--><p class="noindent" > </td><td style="white-space:normal; text-align:left;" id="TBL-23-2-2"
class="td11"><!--l. 106--><p class="noindent" >The parameter to be set. It can be specified through its name; the string is
case-insensitive. See Table&#x00A0;<a
href="#x15-137001r21">21<!--tex4ht:ref: tab:p_subsolve_1 --></a>. </td>
href="#x16-138001r21">21<!--tex4ht:ref: tab:p_subsolve_1 --></a>. </td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-3-"><td style="white-space:normal; text-align:left;" id="TBL-23-3-1"
class="td11"><!--l. 109--><p class="noindent" ><code class="lstinline"><span style="color:#000000">val</span><span style="color:#000000"> </span></code> </td><td style="white-space:normal; text-align:left;" id="TBL-23-3-2"
@ -195,7 +195,7 @@ class="pplri7t-">or </span><code class="lstinline"><span style="color:#000000"
class="td11"><!--l. 112--><p class="noindent" > </td><td style="white-space:normal; text-align:left;" id="TBL-23-4-2"
class="td11"><!--l. 112--><p class="noindent" >The value of the parameter to be set. The list of allowed values and the
corresponding data types is given in Table&#x00A0;<a
href="#x15-137001r21">21<!--tex4ht:ref: tab:p_subsolve_1 --></a>. When the value is of type
href="#x16-138001r21">21<!--tex4ht:ref: tab:p_subsolve_1 --></a>. When the value is of type
<code class="lstinline"><span style="color:#000000">character</span><span style="color:#000000">(</span><span style="color:#000000">len</span><span style="color:#000000">=*)</span></code>, it is also treated as case insensitive. </td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-5-"><td style="white-space:normal; text-align:left;" id="TBL-23-5-1"
@ -205,18 +205,18 @@ class="td11"><!--l. 117--><p class="noindent" ><code class="lstinline"><span sty
style="vertical-align:baseline;" id="TBL-23-6-"><td style="white-space:normal; text-align:left;" id="TBL-23-6-1"
class="td11"><!--l. 118--><p class="noindent" > </td><td style="white-space:normal; text-align:left;" id="TBL-23-6-2"
class="td11"><!--l. 118--><p class="noindent" >Error code. If no error, 0 is returned. See Section&#x00A0;<a
href="userhtmlse8.html#x13-1230008">8<!--tex4ht:ref: sec:errors --></a> for details. </td> </tr></table></div>
href="userhtmlse8.html#x14-1240008">8<!--tex4ht:ref: sec:errors --></a> for details. </td> </tr></table></div>
<!--l. 123--><p class="noindent" >A number of subdomain solvers can be chosen with this method; a list of the
parameters that can be set, along with their allowed and default values, is given in
Table-<a
href="#x15-137001r21">21<!--tex4ht:ref: tab:p_subsolve_1 --></a>.<br
href="#x16-138001r21">21<!--tex4ht:ref: tab:p_subsolve_1 --></a>.<br
class="newline" />
<div class="table">
<!--l. 130--><p class="indent" > <a
id="x15-137001r21"></a><hr class="float"><div class="float"
id="x16-138001r21"></a><hr class="float"><div class="float"
>
@ -315,7 +315,7 @@ class="pplr7t-x-x-90">suited for GPUs since they do not employ</sp
class="pplr7t-x-x-90">triangular system solve kernels, see</span><span
class="pplr7t-x-x-90">&#x00A0;</span><span class="cite"><span
class="pplr7t-x-x-90">[</span><a
href="userhtmlli2.html#XBERTACCINIFILIPPONE"><span
href="userhtmlli3.html#XBERTACCINIFILIPPONE"><span
class="pplr7t-x-x-90">2</span></a><span
class="pplr7t-x-x-90">]</span></span><span
class="pplr7t-x-x-90">.</span> </td>
@ -450,7 +450,7 @@ class="hline"><td><hr></td><td><hr></td><td><hr></td><td><hr></td><td><hr></td><
class="td11"> </td></tr></table> </div></div>
<br /> <div class="caption"
><span class="id">Table&#x00A0;21: </span><span
class="content">Parameters defining the solver of the BJAC preconditioner.</span></div><!--tex4ht:label?: x15-137001r21 -->
class="content">Parameters defining the solver of the BJAC preconditioner.</span></div><!--tex4ht:label?: x16-138001r21 -->
@ -460,7 +460,7 @@ class="content">Parameters defining the solver of the BJAC preconditioner.</span
<h4 class="subsectionHead"><span class="titlemark">10.3 </span> <a
id="x15-13800010.3"></a>build &#8212; Builds a preconditioner</h4>
id="x16-13900010.3"></a>build &#8212; Builds a preconditioner</h4>
@ -644,7 +644,7 @@ devices, such as GPUs and other accelerators.
<h4 class="subsectionHead"><span class="titlemark">10.4 </span> <a
id="x15-13900010.4"></a>apply &#8212; Preconditioner application routine</h4>
id="x16-14000010.4"></a>apply &#8212; Preconditioner application routine</h4>
@ -780,14 +780,15 @@ class="newline" />Intent: <span
class="pplb7t-">out</span>.<br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<!--l. 315--><p class="noindent" ><span
class="pplb7t-x-x-120">Notes </span>This method is almost always called by the iterative methods of Sec.&#x00A0;<a
href="userhtmlse11.html#x17-14500011">11<!--tex4ht:ref: sec:methods --></a>, and
practically never directly by the user.
class="pplb7t-x-x-120">Notes </span>This method is almost always called by the iterative methods of
Sec.&#x00A0;<a
href="userhtmlse11.html#x18-14600011">11<!--tex4ht:ref: sec:methods --></a>; it is extremely unlikely to be needed directly by the application
developer.
<h4 class="subsectionHead"><span class="titlemark">10.5 </span> <a
id="x15-14000010.5"></a>descr &#8212; Prints a description of current preconditioner</h4>
id="x16-14100010.5"></a>descr &#8212; Prints a description of current preconditioner</h4>
@ -795,26 +796,26 @@ practically never directly by the user.
call&#x00A0;prec%descr(info)
call&#x00A0;prec%descr(info,iout,&#x00A0;root)
</pre>
<!--l. 326--><p class="nopar" >
<!--l. 328--><p class="indent" >
<!--l. 327--><p class="nopar" >
<!--l. 329--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 329--><p class="noindent" >
<!--l. 330--><p class="noindent" >
<span
class="pplb7t-">Type:</span> </dt><dd
class="description">
<!--l. 329--><p class="noindent" >Asynchronous.
<!--l. 330--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 330--><p class="noindent" >
<!--l. 331--><p class="noindent" >
<span
class="pplb7t-">On Entry</span> </dt><dd
class="description">
<!--l. 330--><p class="noindent" >
</dd><dt class="description">
<!--l. 331--><p class="noindent" >
</dd><dt class="description">
<!--l. 332--><p class="noindent" >
<span
class="pplb7t-">prec</span> </dt><dd
class="description">
<!--l. 331--><p class="noindent" >the preconditioner. Scope: <span
<!--l. 332--><p class="noindent" >the preconditioner. Scope: <span
class="pplb7t-">local </span><br
class="newline" />Type: <span
class="pplb7t-">required</span><br
@ -826,11 +827,11 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_Tprec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 336--><p class="noindent" >
<!--l. 337--><p class="noindent" >
<span
class="pplb7t-">iout</span> </dt><dd
class="description">
<!--l. 336--><p class="noindent" >output unit. Scope: <span
<!--l. 337--><p class="noindent" >output unit. Scope: <span
class="pplb7t-">local </span><br
class="newline" />Type: <span
class="pplb7t-">optional</span><br
@ -838,11 +839,11 @@ class="newline" />Intent: <span
class="pplb7t-">in</span>.<br
class="newline" />Specified as: an integer number. Default: default output unit.
</dd><dt class="description">
<!--l. 341--><p class="noindent" >
<!--l. 342--><p class="noindent" >
<span
class="pplb7t-">root</span> </dt><dd
class="description">
<!--l. 341--><p class="noindent" >Process from which to print Scope: <span
<!--l. 342--><p class="noindent" >Process from which to print Scope: <span
class="pplb7t-">local </span><br
class="newline" />Type: <span
class="pplb7t-">optional</span><br
@ -855,20 +856,20 @@ class="zplmr7y-">- </span>1, in which case
class="zplmr7y-">-</span>1, in which case all
processes will print. Default: 0.
</dd><dt class="description">
<!--l. 348--><p class="noindent" >
<!--l. 349--><p class="noindent" >
<span
class="pplb7t-">On Return</span> </dt><dd
class="description">
<!--l. 348--><p class="noindent" >
<!--l. 349--><p class="noindent" >
</dd><dt class="description">
<!--l. 349--><p class="noindent" >
<!--l. 350--><p class="noindent" >
<span
class="pplb7t-">info</span> </dt><dd
class="description">
<!--l. 349--><p class="noindent" >Error code.<br
<!--l. 350--><p class="noindent" >Error code.<br
class="newline" />Scope: <span
class="pplb7t-">local </span><br
class="newline" />Type: <span
@ -880,86 +881,86 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">10.6 </span> <a
id="x15-14100010.6"></a>clone &#8212; clone current preconditioner</h4>
id="x16-14200010.6"></a>clone &#8212; clone current preconditioner</h4>
<pre class="verbatim" id="verbatim-101">
call&#x00A0;&#x00A0;prec%clone(precout,info)
</pre>
<!--l. 362--><p class="nopar" >
<!--l. 364--><p class="indent" >
<!--l. 363--><p class="nopar" >
<!--l. 365--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 365--><p class="noindent" >
<!--l. 366--><p class="noindent" >
<span
class="pplb7t-">Type:</span> </dt><dd
class="description">
<!--l. 365--><p class="noindent" >Asynchronous.
<!--l. 366--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 366--><p class="noindent" >
<!--l. 367--><p class="noindent" >
<span
class="pplb7t-">On Entry</span> </dt><dd
class="description">
<!--l. 366--><p class="noindent" >
</dd><dt class="description">
<!--l. 367--><p class="noindent" >
</dd><dt class="description">
<!--l. 368--><p class="noindent" >
<span
class="pplb7t-">prec</span> </dt><dd
class="description">
<!--l. 367--><p class="noindent" >the preconditioner.<br
<!--l. 368--><p class="noindent" >the preconditioner.<br
class="newline" />Scope: <span
class="pplb7t-">local</span>.<br
class="newline" /></dd></dl>
<!--l. 374--><p class="indent" >
<!--l. 375--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 375--><p class="noindent" >
<!--l. 376--><p class="noindent" >
<span
class="pplb7t-">On Return</span> </dt><dd
class="description">
<!--l. 375--><p class="noindent" >
</dd><dt class="description">
<!--l. 376--><p class="noindent" >
</dd><dt class="description">
<!--l. 377--><p class="noindent" >
<span
class="pplb7t-">precout</span> </dt><dd
class="description">
<!--l. 376--><p class="noindent" >A copy of the input object.
<!--l. 377--><p class="noindent" >A copy of the input object.
</dd><dt class="description">
<!--l. 377--><p class="noindent" >
<!--l. 378--><p class="noindent" >
<span
class="pplb7t-">info</span> </dt><dd
class="description">
<!--l. 377--><p class="noindent" >Return code.</dd></dl>
<!--l. 378--><p class="noindent" >Return code.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">10.7 </span> <a
id="x15-14200010.7"></a>free &#8212; Free a preconditioner</h4>
id="x16-14300010.7"></a>free &#8212; Free a preconditioner</h4>
<pre class="verbatim" id="verbatim-102">
call&#x00A0;prec%free(info)
</pre>
<!--l. 385--><p class="nopar" >
<!--l. 387--><p class="indent" >
<!--l. 386--><p class="nopar" >
<!--l. 388--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 388--><p class="noindent" >
<!--l. 389--><p class="noindent" >
<span
class="pplb7t-">Type:</span> </dt><dd
class="description">
<!--l. 388--><p class="noindent" >Asynchronous.
<!--l. 389--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 389--><p class="noindent" >
<!--l. 390--><p class="noindent" >
<span
class="pplb7t-">On Entry</span> </dt><dd
class="description">
<!--l. 389--><p class="noindent" >
</dd><dt class="description">
<!--l. 390--><p class="noindent" >
</dd><dt class="description">
<!--l. 391--><p class="noindent" >
<span
class="pplb7t-">prec</span> </dt><dd
class="description">
<!--l. 390--><p class="noindent" >the preconditioner.<br
<!--l. 391--><p class="noindent" >the preconditioner.<br
class="newline" />Scope: <span
class="pplb7t-">local</span>.<br
class="newline" />Type: <span
@ -972,17 +973,17 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_Tprec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 403--><p class="noindent" >
<!--l. 404--><p class="noindent" >
<span
class="pplb7t-">On Exit</span> </dt><dd
class="description">
<!--l. 403--><p class="noindent" >
<!--l. 404--><p class="noindent" >
</dd><dt class="description">
<!--l. 405--><p class="noindent" >
<!--l. 406--><p class="noindent" >
<span
class="pplb7t-">prec</span> </dt><dd
class="description">
<!--l. 405--><p class="noindent" >Scope: <span
<!--l. 406--><p class="noindent" >Scope: <span
class="pplb7t-">local </span><br
class="newline" />Type: <span
class="pplb7t-">required</span><br
@ -994,50 +995,50 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_Tprec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 410--><p class="noindent" >
<!--l. 411--><p class="noindent" >
<span
class="pplb7t-">info</span> </dt><dd
class="description">
<!--l. 410--><p class="noindent" >Scope: <span
<!--l. 411--><p class="noindent" >Scope: <span
class="pplb7t-">global </span><br
class="newline" />Type: <span
class="pplb7t-">required</span><br
class="newline" />Intent: <span
class="pplb7t-">out</span>.<br
class="newline" />Error code: if no error, 0 is returned.</dd></dl>
<!--l. 416--><p class="noindent" ><span
<!--l. 417--><p class="noindent" ><span
class="pplb7t-x-x-120">Notes </span>Releases all internal storage.
<h4 class="subsectionHead"><span class="titlemark">10.8 </span> <a
id="x15-14300010.8"></a>allocate_wrk &#8212; preconditioner</h4>
id="x16-14400010.8"></a>allocate_wrk &#8212; preconditioner</h4>
<pre class="verbatim" id="verbatim-103">
call&#x00A0;prec%allocate_wrk(info[,vmold])
</pre>
<!--l. 428--><p class="nopar" >
<!--l. 430--><p class="indent" >
<!--l. 429--><p class="nopar" >
<!--l. 431--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 431--><p class="noindent" >
<!--l. 432--><p class="noindent" >
<span
class="pplb7t-">Type:</span> </dt><dd
class="description">
<!--l. 431--><p class="noindent" >Synchronous.
<!--l. 432--><p class="noindent" >Synchronous.
</dd><dt class="description">
<!--l. 432--><p class="noindent" >
<!--l. 433--><p class="noindent" >
<span
class="pplb7t-">On Entry</span> </dt><dd
class="description">
<!--l. 432--><p class="noindent" >
</dd><dt class="description">
<!--l. 433--><p class="noindent" >
</dd><dt class="description">
<!--l. 434--><p class="noindent" >
<span
class="pplb7t-">prec</span> </dt><dd
class="description">
<!--l. 433--><p class="noindent" >the preconditioner.<br
<!--l. 434--><p class="noindent" >the preconditioner.<br
class="newline" />Scope: <span
class="pplb7t-">local</span>.<br
class="newline" />Type: <span
@ -1050,11 +1051,11 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_Tprec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 438--><p class="noindent" >
<!--l. 439--><p class="noindent" >
<span
class="pplb7t-">vmold</span> </dt><dd
class="description">
<!--l. 438--><p class="noindent" >The desired dynamic type for the internal vector storage.<br
<!--l. 439--><p class="noindent" >The desired dynamic type for the internal vector storage.<br
class="newline" />Scope: <span
class="pplb7t-">local</span>.<br
class="newline" />Type: <span
@ -1069,17 +1070,17 @@ class="cmtt-10">_base</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span>.
</dd><dt class="description">
<!--l. 443--><p class="noindent" >
<!--l. 444--><p class="noindent" >
<span
class="pplb7t-">On Exit</span> </dt><dd
class="description">
<!--l. 443--><p class="noindent" >
<!--l. 444--><p class="noindent" >
</dd><dt class="description">
<!--l. 445--><p class="noindent" >
<!--l. 446--><p class="noindent" >
<span
class="pplb7t-">prec</span> </dt><dd
class="description">
<!--l. 445--><p class="noindent" >Scope: <span
<!--l. 446--><p class="noindent" >Scope: <span
class="pplb7t-">local </span><br
class="newline" />Type: <span
class="pplb7t-">required</span><br
@ -1094,37 +1095,37 @@ class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 450--><p class="noindent" >
<!--l. 451--><p class="noindent" >
<span
class="pplb7t-">info</span> </dt><dd
class="description">
<!--l. 450--><p class="noindent" >Scope: <span
<!--l. 451--><p class="noindent" >Scope: <span
class="pplb7t-">global </span><br
class="newline" />Type: <span
class="pplb7t-">required</span><br
class="newline" />Intent: <span
class="pplb7t-">out</span>.<br
class="newline" />Error code: if no error, 0 is returned.</dd></dl>
<!--l. 456--><p class="noindent" ><span
<!--l. 457--><p class="noindent" ><span
class="pplb7t-x-x-120">Notes </span>Preconditioners often need internal work storage during their application at
each iteration of a linear solver method: in many situations this can be accomplished
by allocating and releasing memory &#8220;on the fly&#8221;. However, when running on an
accelerator through e.g. the CUDA enabled data strutures of Sec.&#x00A0;<a
href="userhtmlse12.html#x19-15600012.4">12.4<!--tex4ht:ref: sec:cudastruct --></a> and &#x00A0;<a
href="userhtmlse13.html#x20-15700013">13<!--tex4ht:ref: sec:cudaenv --></a>,
href="userhtmlse12.html#x20-15700012.4">12.4<!--tex4ht:ref: sec:cudastruct --></a> and &#x00A0;<a
href="userhtmlse13.html#x21-15800013">13<!--tex4ht:ref: sec:cudaenv --></a>,
memory allocation and deallocation usually have a much larger overhead,
significantly affecting performance. To alleviate this problem we define this method
that preallocates internal storage; it is intended to be invoked prior to the iterative
solver method, so that the necessary internal work storage is available throughout
the iterative method application.
<!--l. 473--><p class="indent" > When using GPUs or other specialized devices, the <code class="lstinline"><span style="color:#000000">vmold</span></code> argument is also
<!--l. 474--><p class="indent" > When using GPUs or other specialized devices, the <code class="lstinline"><span style="color:#000000">vmold</span></code> argument is also
necessary to ensure the internal work vectors are of the appropriate dynamic type to
exploit the accelerator hardware.
<h4 class="subsectionHead"><span class="titlemark">10.9 </span> <a
id="x15-14400010.9"></a>deallocate_wrk &#8212; preconditioner</h4>
id="x16-14500010.9"></a>deallocate_wrk &#8212; preconditioner</h4>
@ -1132,26 +1133,26 @@ exploit the accelerator hardware.
call&#x00A0;prec%allocate_wrk(info)
call&#x00A0;prec%free_wrk(info)
</pre>
<!--l. 483--><p class="nopar" >
<!--l. 485--><p class="indent" >
<!--l. 484--><p class="nopar" >
<!--l. 486--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 486--><p class="noindent" >
<!--l. 487--><p class="noindent" >
<span
class="pplb7t-">Type:</span> </dt><dd
class="description">
<!--l. 486--><p class="noindent" >Synchronous.
<!--l. 487--><p class="noindent" >Synchronous.
</dd><dt class="description">
<!--l. 487--><p class="noindent" >
<!--l. 488--><p class="noindent" >
<span
class="pplb7t-">On Entry</span> </dt><dd
class="description">
<!--l. 487--><p class="noindent" >
</dd><dt class="description">
<!--l. 488--><p class="noindent" >
</dd><dt class="description">
<!--l. 489--><p class="noindent" >
<span
class="pplb7t-">prec</span> </dt><dd
class="description">
<!--l. 488--><p class="noindent" >the preconditioner.<br
<!--l. 489--><p class="noindent" >the preconditioner.<br
class="newline" />Scope: <span
class="pplb7t-">local</span>.<br
class="newline" />Type: <span
@ -1164,17 +1165,17 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_Tprec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 493--><p class="noindent" >
<!--l. 494--><p class="noindent" >
<span
class="pplb7t-">On Exit</span> </dt><dd
class="description">
<!--l. 493--><p class="noindent" >
<!--l. 494--><p class="noindent" >
</dd><dt class="description">
<!--l. 495--><p class="noindent" >
<!--l. 496--><p class="noindent" >
<span
class="pplb7t-">prec</span> </dt><dd
class="description">
<!--l. 495--><p class="noindent" >Scope: <span
<!--l. 496--><p class="noindent" >Scope: <span
class="pplb7t-">local </span><br
class="newline" />Type: <span
class="pplb7t-">required</span><br
@ -1186,11 +1187,11 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_Tprec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 500--><p class="noindent" >
<!--l. 501--><p class="noindent" >
<span
class="pplb7t-">info</span> </dt><dd
class="description">
<!--l. 500--><p class="noindent" >Scope: <span
<!--l. 501--><p class="noindent" >Scope: <span
class="pplb7t-">global </span><br
class="newline" />Type: <span
class="pplb7t-">required</span><br
@ -1200,10 +1201,10 @@ class="newline" />Error code: if no error, 0 is returned.</dd></dl>
<!--l. 506--><p class="noindent" ><span
<!--l. 507--><p class="noindent" ><span
class="pplb7t-x-x-120">Notes </span>Deallocates preconditioner internal work storage; to be invoked after an
iterative solver has completed execution, see the discussion in Sec.&#x00A0;<a
href="#x15-14300010.8">10.8<!--tex4ht:ref: sec:allocatewrk --></a>.
href="#x16-14400010.8">10.8<!--tex4ht:ref: sec:allocatewrk --></a>.

@ -16,7 +16,7 @@ href="userhtmlse10.html#tailuserhtmlse10.html" >prev-tail</a>] [<a
href="userhtmlse8.html#tailuserhtmlse11.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">11 </span> <a
id="x17-14500011"></a>Iterative Methods</h3>
id="x18-14600011"></a>Iterative Methods</h3>
<!--l. 4--><p class="noindent" >In this chapter we provide routines for preconditioners and iterative methods. The
interfaces for iterative methods are available in the module <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_linsolve_mod</span></span></span>.
@ -24,7 +24,7 @@ class="cmtt-10">psb_linsolve_mod</span></span></span>.
<h4 class="subsectionHead"><span class="titlemark">11.1 </span> <a
id="x17-14600011.1"></a>psb_krylov &#8212; Krylov Methods Driver Routine</h4>
id="x18-14700011.1"></a>psb_krylov &#8212; Krylov Methods Driver Routine</h4>
<!--l. 17--><p class="noindent" >This subroutine is a driver that provides a general interface for all the Krylov-Subspace
family methods implemented in PSBLAS version 2.
<!--l. 20--><p class="indent" > The stopping criterion can take the following values:
@ -83,7 +83,7 @@ iteration.
<pre class="lstlisting" id="listing-219"><span class="label"><a
id="x17-146001r1"></a></span><span style="color:#000000"><span
id="x18-147001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_krylov</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -104,7 +104,7 @@ class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">info</span></span><span style="color:#000000"><span
class="cmtt-10">,&amp;</span></span>
<span class="label"><a
id="x17-146002r2"></a></span><span
id="x18-147002r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
@ -169,8 +169,8 @@ class="description">
class="pplb7t-">FCG:</span> </dt><dd
class="description">
<!--l. 52--><p class="noindent" >the Flexible Conjugate Gradient method<span class="footnote-mark"><a
href="userhtml18.html#fn5x0"><sup class="textsuperscript">5</sup></a></span><a
id="x17-146003f5"></a> ;
href="userhtml19.html#fn5x0"><sup class="textsuperscript">5</sup></a></span><a
id="x18-147003f5"></a> ;
</dd><dt class="description">
<!--l. 55--><p class="noindent" >
<span
@ -460,7 +460,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">11.2 </span> <a
id="x17-14700011.2"></a>psb_richardson &#8212; Richardson Iteration Driver Routine</h4>
id="x18-14800011.2"></a>psb_richardson &#8212; Richardson Iteration Driver Routine</h4>
<!--l. 158--><p class="noindent" >This subroutine is a driver implementig a Richardson iteration
<div class="math-display" >
<img
@ -525,7 +525,7 @@ class="zplmr7m-">i</span>-th
iteration.
<!--l. 179-->
<pre class="lstlisting" id="listing-220"><span class="label"><a
id="x17-147001r1"></a></span><span style="color:#000000"><span
id="x18-148001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_richardson</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -544,7 +544,7 @@ class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">info</span></span><span style="color:#000000"><span
class="cmtt-10">,&amp;</span></span>
<span class="label"><a
id="x17-147002r2"></a></span><span
id="x18-148002r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span

@ -16,7 +16,7 @@ href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a
href="userhtmlse9.html#tailuserhtmlse12.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">12 </span> <a
id="x19-14800012"></a>Extensions</h3>
id="x20-14900012"></a>Extensions</h3>
<!--l. 3--><p class="noindent" >The EXT, CUDA and RSB subdirectories contains a set of extensions to the base
library. The extensions provide additional storage formats beyond the ones already
contained in the base library, as well as interfaces to:
@ -42,14 +42,14 @@ class="cmtt-10">http://sourceforge.net/projects/librsb/</span></a>, for comput
multicore parallel machines.</dd></dl>
<!--l. 14--><p class="noindent" >The infrastructure laid out in the base library to allow for these extensions is detailed in
the references&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XDesPat:11">21</a>,&#x00A0;<a
href="userhtmlli2.html#XCaFiRo:2014">22</a>,&#x00A0;<a
href="userhtmlli2.html#XSparse03">11</a>]</span>; the CUDA-specific data formats are described
href="userhtmlli3.html#XDesPat:11">21</a>,&#x00A0;<a
href="userhtmlli3.html#XCaFiRo:2014">22</a>,&#x00A0;<a
href="userhtmlli3.html#XSparse03">11</a>]</span>; the CUDA-specific data formats are described
in&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XOurTechRep">23</a>]</span>.
href="userhtmlli3.html#XOurTechRep">23</a>]</span>.
<!--l. 19--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">12.1 </span> <a
id="x19-14900012.1"></a>Using the extensions</h4>
id="x20-15000012.1"></a>Using the extensions</h4>
<!--l. 21--><p class="noindent" >A sample application using the PSBLAS extensions will contain the following
steps:
<ul class="itemize1">
@ -142,31 +142,31 @@ speed of the sparse matrix-vector product with the various data structures inclu
in the library.
<!--l. 146--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">12.2 </span> <a
id="x19-15000012.2"></a>Extensions&#8217; Data Structures</h4>
id="x20-15100012.2"></a>Extensions&#8217; Data Structures</h4>
<!--l. 150--><p class="noindent" >Access to the facilities provided by the EXT library is mainly achieved through
the data types that are provided within. The data classes are derived from
the base classes in PSBLAS, through the Fortran&#x00A0;2003 mechanism of <span
class="pplri7t-">type</span>
<span
class="pplri7t-">extension</span>&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XMRC:11">18</a>]</span>.
href="userhtmlli3.html#XMRC:11">18</a>]</span>.
<!--l. 155--><p class="indent" > The data classes are divided between the general purpose CPU extensions, the
GPU interfaces and the RSB interfaces. In the description we will make use of the
notation introduced in Table&#x00A0;<a
href="#x19-150001r22">22<!--tex4ht:ref: tab:notation --></a>.
href="#x20-151001r22">22<!--tex4ht:ref: tab:notation --></a>.
<div class="table">
<!--l. 160--><p class="indent" > <a
id="x19-150001r22"></a><hr class="float"><div class="float"
id="x20-151001r22"></a><hr class="float"><div class="float"
>
<div class="caption"
><span class="id">Table&#x00A0;22: </span><span
class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-150001r22 -->
class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x20-151001r22 -->
<div class="center"
>
<!--l. 162--><p class="noindent" >
@ -276,7 +276,7 @@ class="td11"> </td></tr></table>
<a
id="x19-150002r5"></a>
id="x20-151002r5"></a>
@ -285,18 +285,18 @@ src="mat.png" alt="PIC"
width="147" height="147" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;5: </span><span
class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-150002r5 -->
class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x20-151002r5 -->
<!--l. 198--><p class="indent" > </div><hr class="endfigure">
<h4 class="subsectionHead"><span class="titlemark">12.3 </span> <a
id="x19-15100012.3"></a>CPU-class extensions</h4>
id="x20-15200012.3"></a>CPU-class extensions</h4>
<!--l. 203--><p class="noindent" >
<h5 class="likesubsubsectionHead"><a
id="x19-152000"></a>ELLPACK</h5>
id="x20-153000"></a>ELLPACK</h5>
<!--l. 205--><p class="noindent" >The ELLPACK/ITPACK format (shown in Figure&#x00A0;<a
href="#x19-152001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional
href="#x20-153001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional
arrays <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">JA</span></span></span> with <span class="obeylines-h"><span class="verb"><span
@ -317,7 +317,7 @@ row.
<a
id="x19-152001r6"></a>
id="x20-153001r6"></a>
@ -327,13 +327,13 @@ width="233" height="233" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;6: </span><span
class="content">ELLPACK compression of matrix in Figure&#x00A0;<a
href="#x19-150002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-152001r6 -->
href="#x20-151002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x20-153001r6 -->
<!--l. 225--><p class="indent" > </div><hr class="endfigure">
<a
id="x19-152002r1"></a>
id="x20-153002r1"></a>
@ -344,7 +344,7 @@ href="#x19-150002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
<!--l. 231-->
<pre class="lstlisting" id="listing-221"><span class="label"><a
id="x19-152003r1"></a></span><span
id="x20-153003r1"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
@ -353,7 +353,7 @@ class="cmtt-9">i</span></span><span style="color:#000000"><span
class="cmtt-9">=1,</span></span><span style="color:#000000"><span
class="cmtt-9">n</span></span>
<span class="label"><a
id="x19-152004r2"></a></span><span
id="x20-153004r2"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -362,7 +362,7 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">t</span></span><span style="color:#000000"><span
class="cmtt-9">=0</span></span>
<span class="label"><a
id="x19-152005r3"></a></span><span
id="x20-153005r3"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -373,7 +373,7 @@ class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">=1,</span></span><span style="color:#000000"><span
class="cmtt-9">maxnzr</span></span>
<span class="label"><a
id="x19-152006r4"></a></span><span
id="x20-153006r4"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -401,7 +401,7 @@ class="cmtt-9">,</span></span><span style="color:#000000"><span
class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">))</span></span>
<span class="label"><a
id="x19-152007r5"></a></span><span
id="x20-153007r5"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -410,7 +410,7 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">do</span></span>
<span class="label"><a
id="x19-152008r6"></a></span><span
id="x20-153008r6"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -423,7 +423,7 @@ class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="c
class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">t</span></span>
<span class="label"><a
id="x19-152009r7"></a></span><span
id="x20-153009r7"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
@ -431,9 +431,9 @@ class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style=
class="cmtt-9">do</span></span></pre>
<a
id="x19-152010r1"></a>
id="x20-153010r1"></a>
<a
id="x19-152011"></a>
id="x20-153011"></a>
<span
class="pplb7t-">Algorithm</span><span
class="pplb7t-">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format
@ -446,7 +446,7 @@ class="zplmr7m-">y </span><span
class="zplmr7t-">= </span><span
class="zplmr7m-">Ax </span>can be computed with the code shown in
Alg.&#x00A0;<a
href="#x19-152010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and
href="#x20-153010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and
two floating-point operations per inner iteration.
<!--l. 247--><p class="indent" > Unless all rows have exactly the same number of nonzeros, some of the
coefficients in the <span class="obeylines-h"><span class="verb"><span
@ -455,12 +455,12 @@ overhead both in terms of memory space and redundant operations (multiplications
by zero). The overhead can be acceptable if:
<ol class="enumerate1" >
<li
class="enumerate" id="x19-152013x1">
class="enumerate" id="x20-153013x1">
<!--l. 253--><p class="noindent" >The maximum number of nonzeros per row is not much larger than the
average;
</li>
<li
class="enumerate" id="x19-152015x2">
class="enumerate" id="x20-153015x2">
<!--l. 255--><p class="noindent" >The regularity of the data structure allows for faster code, e.g. by allowing
vectorization, thereby offsetting the additional storage requirements.</li></ol>
<!--l. 259--><p class="noindent" >In the extreme case where the input matrix has one full row, the ELLPACK
@ -488,7 +488,7 @@ class="cmtt-10">psb_T_ell_sparse_mat</span></span></span>:
</pre>
<!--l. 295--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a
id="x19-153000"></a>Hacked ELLPACK</h5>
id="x20-154000"></a>Hacked ELLPACK</h5>
@ -564,7 +564,7 @@ format.
<a
id="x19-153001r7"></a>
id="x20-154001r7"></a>
@ -574,7 +574,7 @@ width="248" height="248" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;7: </span><span
class="content">Hacked ELLPACK compression of matrix in Figure&#x00A0;<a
href="#x19-150002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-153001r7 -->
href="#x20-151002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x20-154001r7 -->
@ -601,9 +601,9 @@ class="cmtt-10">psb_T_hll_sparse_mat</span></span></span>:
</pre>
<!--l. 388--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a
id="x19-154000"></a>Diagonal storage</h5>
id="x20-155000"></a>Diagonal storage</h5>
<!--l. 396--><p class="noindent" >The DIAgonal (DIA) format (shown in Figure&#x00A0;<a
href="#x19-154001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span
href="#x20-155001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span>
containing in each column the coefficients along a diagonal of the matrix, and an
integer array <span class="obeylines-h"><span class="verb"><span
@ -614,7 +614,7 @@ are padded with zeros as necessary.
class="zplmr7m-">y </span><span
class="zplmr7t-">= </span><span
class="zplmr7m-">Ax </span>is shown in Alg.&#x00A0;<a
href="#x19-154003r2">2<!--tex4ht:ref: alg:dia --></a>; it
href="#x20-155003r2">2<!--tex4ht:ref: alg:dia --></a>; it
costs one memory read per outer iteration, plus three memory reads, one memory
write and two floating-point operations per inner iteration. The accesses to
<span class="obeylines-h"><span class="verb"><span
@ -627,7 +627,7 @@ required.
<a
id="x19-154001r8"></a>
id="x20-155001r8"></a>
@ -637,13 +637,13 @@ width="248" height="248" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;8: </span><span
class="content">DIA compression of matrix in Figure&#x00A0;<a
href="#x19-150002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-154001r8 -->
href="#x20-151002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x20-155001r8 -->
<!--l. 419--><p class="indent" > </div><hr class="endfigure">
<a
id="x19-154002r2"></a>
id="x20-155002r2"></a>
@ -669,9 +669,9 @@ href="#x19-150002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:l
</pre>
<!--l. 450--><p class="nopar" > </div></div>
<a
id="x19-154003r2"></a>
id="x20-155003r2"></a>
<a
id="x19-154004"></a>
id="x20-155004"></a>
<span
class="pplb7t-">Algorithm</span><span
class="pplb7t-">&#x00A0;2:</span>&#x00A0; Matrix-Vector product in DIA format
@ -698,7 +698,7 @@ class="cmtt-10">psb_T_dia_sparse_mat</span></span></span>:
</pre>
<!--l. 486--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a
id="x19-155000"></a>Hacked DIA</h5>
id="x20-156000"></a>Hacked DIA</h5>
<!--l. 495--><p class="noindent" >Storage by DIAgonals is an attractive option for matrices whose coefficients are
located on a small set of diagonals, since they do away with storing explicitly the
indices and therefore reduce significantly memory traffic. However, having a few
@ -749,7 +749,7 @@ class="pplri7t-">hackOffsets[k]</span>.
<a
id="x19-155001r9"></a>
id="x20-156001r9"></a>
@ -759,7 +759,7 @@ width="248" height="248" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;9: </span><span
class="content">Hacked DIA compression of matrix in Figure&#x00A0;<a
href="#x19-150002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-155001r9 -->
href="#x20-151002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x20-156001r9 -->
@ -804,7 +804,7 @@ class="cmtt-10">psb_T_hdia_sparse_mat</span></span></span>:
<h4 class="subsectionHead"><span class="titlemark">12.4 </span> <a
id="x19-15600012.4"></a>CUDA-class extensions</h4>
id="x20-15700012.4"></a>CUDA-class extensions</h4>
<!--l. 4--><p class="noindent" >For computing with CUDA we define a dual memorization strategy in which each
variable on the CPU (&#8220;host&#8221;) side has a GPU (&#8220;device&#8221;) side. When a GPU-type
variable is initialized, the data contained is (usually) the same on both sides. Each

@ -16,12 +16,12 @@ href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail</a>] [<a
href="userhtmlse10.html#tailuserhtmlse13.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">13 </span> <a
id="x20-15700013"></a>CUDA Environment Routines</h3>
id="x21-15800013"></a>CUDA Environment Routines</h3>
<!--l. 91--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-158000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4>
id="x21-159000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4>
<a
id="Q1-20-196"></a>
id="Q1-21-198"></a>
<div class="center"
>
<!--l. 99--><p class="noindent" >
@ -64,13 +64,13 @@ class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x20-158002x1">
class="enumerate" id="x21-159002x1">
<!--l. 125--><p class="noindent" >A call to this routine must precede any other PSBLAS-CUDA call.</li></ol>
<!--l. 129--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-159000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4>
id="x21-160000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4>
<a
id="Q1-20-198"></a>
id="Q1-21-200"></a>
<div class="center"
>
<!--l. 137--><p class="noindent" >
@ -106,9 +106,9 @@ class="pplb7t-">in</span>.<br
class="newline" />Specified as: an integer variable.</dd></dl>
<!--l. 161--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-160000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4>
id="x21-161000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4>
<a
id="Q1-20-200"></a>
id="Q1-21-202"></a>
@ -123,9 +123,9 @@ call&#x00A0;psb_cuda_DeviceSync()
CUDA-side code, have completed.
<!--l. 182--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-161000"></a>psb_cuda_getDeviceCount </h4>
id="x21-162000"></a>psb_cuda_getDeviceCount </h4>
<a
id="Q1-20-202"></a>
id="Q1-21-204"></a>
<div class="center"
>
<!--l. 190--><p class="noindent" >
@ -136,14 +136,14 @@ ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDeviceCount()
<!--l. 199--><p class="noindent" >Get number of devices available on current computing node.
<!--l. 201--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-162000"></a>psb_cuda_getDevice </h4>
id="x21-163000"></a>psb_cuda_getDevice </h4>
<a
id="Q1-20-204"></a>
id="Q1-21-206"></a>
<div class="center"
>
<!--l. 209--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-116">
ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice()
dev&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice()
</pre>
<!--l. 213--><p class="nopar" > </div></div>
<!--l. 218--><p class="noindent" >Get device in use by current process.
@ -152,9 +152,9 @@ ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice()
<!--l. 220--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-163000"></a>psb_cuda_setDevice </h4>
id="x21-164000"></a>psb_cuda_setDevice </h4>
<a
id="Q1-20-206"></a>
id="Q1-21-208"></a>
<div class="center"
>
<!--l. 228--><p class="noindent" >
@ -165,9 +165,9 @@ info&#x00A0;=&#x00A0;psb_cuda_setDevice(dev)
<!--l. 237--><p class="noindent" >Set device to be used by current process.
<!--l. 239--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-164000"></a>psb_cuda_DeviceHasUVA </h4>
id="x21-165000"></a>psb_cuda_DeviceHasUVA </h4>
<a
id="Q1-20-208"></a>
id="Q1-21-210"></a>
<div class="center"
>
<!--l. 247--><p class="noindent" >
@ -178,9 +178,9 @@ hasUva&#x00A0;=&#x00A0;psb_cuda_DeviceHasUVA()
<!--l. 256--><p class="noindent" >Returns true if device currently in use supports UVA (Unified Virtual Addressing).
<!--l. 259--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-165000"></a>psb_cuda_WarpSize </h4>
id="x21-166000"></a>psb_cuda_WarpSize </h4>
<a
id="Q1-20-210"></a>
id="Q1-21-212"></a>
<div class="center"
>
<!--l. 267--><p class="noindent" >
@ -194,9 +194,9 @@ nw&#x00A0;=&#x00A0;psb_cuda_WarpSize()
<!--l. 279--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-166000"></a>psb_cuda_MultiProcessors </h4>
id="x21-167000"></a>psb_cuda_MultiProcessors </h4>
<a
id="Q1-20-212"></a>
id="Q1-21-214"></a>
<div class="center"
>
<!--l. 287--><p class="noindent" >
@ -207,9 +207,9 @@ nmp&#x00A0;=&#x00A0;psb_cuda_MultiProcessors()
<!--l. 296--><p class="noindent" >Returns the number of multiprocessors in the CUDA device.
<!--l. 298--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-167000"></a>psb_cuda_MaxThreadsPerMP </h4>
id="x21-168000"></a>psb_cuda_MaxThreadsPerMP </h4>
<a
id="Q1-20-214"></a>
id="Q1-21-216"></a>
<div class="center"
>
<!--l. 306--><p class="noindent" >
@ -220,9 +220,9 @@ nt&#x00A0;=&#x00A0;psb_cuda_MaxThreadsPerMP()
<!--l. 315--><p class="noindent" >Returns the maximum number of threads per multiprocessor.
<!--l. 318--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-168000"></a>psb_cuda_MaxRegistersPerBlock </h4>
id="x21-169000"></a>psb_cuda_MaxRegistersPerBlock </h4>
<a
id="Q1-20-216"></a>
id="Q1-21-218"></a>
<div class="center"
>
<!--l. 326--><p class="noindent" >
@ -236,9 +236,9 @@ nr&#x00A0;=&#x00A0;psb_cuda_MaxRegistersPerBlock()
<!--l. 338--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-169000"></a>psb_cuda_MemoryClockRate </h4>
id="x21-170000"></a>psb_cuda_MemoryClockRate </h4>
<a
id="Q1-20-218"></a>
id="Q1-21-220"></a>
<div class="center"
>
<!--l. 346--><p class="noindent" >
@ -249,9 +249,9 @@ cl&#x00A0;=&#x00A0;psb_cuda_MemoryClockRate()
<!--l. 355--><p class="noindent" >Returns the memory clock rate in KHz, as an integer.
<!--l. 357--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-170000"></a>psb_cuda_MemoryBusWidth </h4>
id="x21-171000"></a>psb_cuda_MemoryBusWidth </h4>
<a
id="Q1-20-220"></a>
id="Q1-21-222"></a>
<div class="center"
>
<!--l. 365--><p class="noindent" >
@ -262,9 +262,9 @@ nb&#x00A0;=&#x00A0;psb_cuda_MemoryBusWidth()
<!--l. 374--><p class="noindent" >Returns the memory bus width in bits.
<!--l. 376--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-171000"></a>psb_cuda_MemoryPeakBandwidth </h4>
id="x21-172000"></a>psb_cuda_MemoryPeakBandwidth </h4>
<a
id="Q1-20-222"></a>
id="Q1-21-224"></a>
<div class="center"
>
<!--l. 384--><p class="noindent" >
@ -282,10 +282,6 @@ bw&#x00A0;=&#x00A0;psb_cuda_MemoryPeakBandwidth()
<!--l. 134--><p class="indent" >

@ -10,15 +10,15 @@
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 72--><div class="crosslinks"><p class="noindent">[<a
<!--l. 130--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse6.html" >next</a>] [<a
href="userhtmlse1.html" >prev</a>] [<a
href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a
href="#tailuserhtmlse2.html">tail</a>] [<a
href="userhtml.html#userhtmlse2.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">2 </span> <a
id="x4-30002"></a>General overview</h3>
<!--l. 74--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for
id="x5-40002"></a>General overview</h3>
<!--l. 132--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for
sparse linear systems on distributed memory parallel computers. The system
coefficient matrix <span
class="zplmr7m-">A </span>must be square; it may be real or complex, nonsymmetric, and
@ -27,43 +27,43 @@ based on the serial sparse BLAS, so that any extension made to the data structur
the serial kernels is available to the parallel version. The overall design and
parallelization strategy have been influenced by the structure of the ScaLAPACK
parallel library. The layered structure of the PSBLAS library is shown in figure&#x00A0;<a
href="#x4-3001r1">1<!--tex4ht:ref: fig:psblas --></a>;
href="#x5-4001r1">1<!--tex4ht:ref: fig:psblas --></a>;
lower layers of the library indicate an encapsulation relationship with upper layers.
The ongoing discussion focuses on the Fortran&#x00A0;2003 layer immediately
The ongoing discussion focuses on the Fortran&#x00A0;2008 layer immediately
below the application layer. The serial parts of the computation on each
process are executed through calls to the serial sparse BLAS subroutines. In a
similar way, the inter-process message exchanges are encapsulated in an
applicaiton layer that has been strongly inspired by the Basic Linear Algebra
Communication Subroutines (BLACS) library&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XBLACS">7</a>]</span>. Usually there is no need to deal
href="userhtmlli3.html#XBLACS">7</a>]</span>. Usually there is no need to deal
directly with MPI; however, in some cases, MPI routines are used directly
to improve efficiency. For further details on our communication layer see
Sec.&#x00A0;<a
href="userhtmlse7.html#x12-1050007">7<!--tex4ht:ref: sec:parenv --></a>.
<!--l. 101--><p class="indent" > <hr class="figure"><div class="figure"
href="userhtmlse7.html#x13-1060007">7<!--tex4ht:ref: sec:parenv --></a>.
<!--l. 159--><p class="indent" > <hr class="figure"><div class="figure"
>
<a
id="x4-3001r1"></a>
id="x5-4001r1"></a>
<div class="center"
>
<!--l. 102--><p class="noindent" >
<!--l. 104--><p class="noindent" ><img
<!--l. 160--><p class="noindent" >
<!--l. 162--><p class="noindent" ><img
src="psblas.png" alt="PIC"
width="46" height="46" ></div>
<br /> <div class="caption"
><span class="id">Figure&#x00A0;1: </span><span
class="content">PSBLAS library components hierarchy.</span></div><!--tex4ht:label?: x4-3001r1 -->
class="content">PSBLAS library components hierarchy.</span></div><!--tex4ht:label?: x5-4001r1 -->
<!--l. 110--><p class="indent" > </div><hr class="endfigure">
<!--l. 113--><p class="indent" > The type of linear system matrices that we address typically arise in
<!--l. 168--><p class="indent" > </div><hr class="endfigure">
<!--l. 171--><p class="indent" > The type of linear system matrices that we address typically arise in
the numerical solution of PDEs; in such a context, it is necessary to pay
special attention to the structure of the problem from which the application
originates. The nonzero pattern of a matrix arising from the discretization of a
@ -71,7 +71,7 @@ PDE is influenced by various factors, such as the shape of the domain, the
discretization strategy, and the equation/unknown ordering. The matrix itself can be
interpreted as the adjacency matrix of the graph associated with the discretization
mesh.
<!--l. 124--><p class="indent" > The distribution of the coefficient matrix for the linear system is based on the
<!--l. 182--><p class="indent" > The distribution of the coefficient matrix for the linear system is based on the
&#8220;owner computes&#8221; rule: the variable associated to each mesh point is assigned to a
process that will own the corresponding row in the coefficient matrix and will
carry out all related computations. This allocation strategy is equivalent to a
@ -85,23 +85,23 @@ class="cmtt-10">BLOCK</span></span></span>, as well as completely
arbitrary assignments of equation indices to processes. In particular it is
consistent with the usage of graph partitioning tools commonly available in
the literature, e.g. METIS&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XMETIS">14</a>]</span>. Dense vectors conform to sparse matrices,
href="userhtmlli3.html#XMETIS">14</a>]</span>. Dense vectors conform to sparse matrices,
that is, the entries of a vector follow the same distribution of the matrix
rows.
<!--l. 146--><p class="indent" > We assume that the sparse matrix is built in parallel, where each process generates
<!--l. 204--><p class="indent" > We assume that the sparse matrix is built in parallel, where each process generates
its own portion. We never require that the entire matrix be available on a single
node. However, it is possible to hold the entire matrix in one process and distribute it
explicitly<span class="footnote-mark"><a
href="userhtml5.html#fn1x0"><sup class="textsuperscript">1</sup></a></span><a
id="x4-3002f1"></a> ,
href="userhtml6.html#fn1x0"><sup class="textsuperscript">1</sup></a></span><a
id="x5-4002f1"></a> ,
even though the resulting memory bottleneck would make this option unattractive
in most cases.
<h4 class="subsectionHead"><span class="titlemark">2.1 </span> <a
id="x4-40002.1"></a>Basic Nomenclature</h4>
<!--l. 158--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
id="x5-50002.1"></a>Basic Nomenclature</h4>
<!--l. 216--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
memory machine is guided by the structure of the physical model, and specifically
by the discretization mesh of the PDE.
<!--l. 163--><p class="indent" > Each point of the discretization mesh will have (at least) one associated
<!--l. 221--><p class="indent" > Each point of the discretization mesh will have (at least) one associated
equation/variable, and therefore one index. We say that point <span
class="zplmr7m-">i </span><span
class="pplri7t-">depends </span>on point <span
@ -117,11 +117,11 @@ class="pplri7t-">sub-domains </span>assigned
to the parallel processes, we classify the points of a given sub-domain as
following.
<dl class="description"><dt class="description">
<!--l. 172--><p class="noindent" >
<!--l. 230--><p class="noindent" >
<span
class="pplb7t-">Internal.</span> </dt><dd
class="description">
<!--l. 172--><p class="noindent" >An internal point of a given domain <span
<!--l. 230--><p class="noindent" >An internal point of a given domain <span
class="pplri7t-">depends </span>only on points of the same
domain. If all points of a domain are assigned to one process, then
a computational step (e.g., a matrix-vector product) of the equations
@ -131,42 +131,42 @@ class="pplri7t-">depends </span>only on points of the same
associated with the internal points requires no data items from other
domains and no communications.
</dd><dt class="description">
<!--l. 181--><p class="noindent" >
<!--l. 239--><p class="noindent" >
<span
class="pplb7t-">Boundary.</span> </dt><dd
class="description">
<!--l. 181--><p class="noindent" >A point of a given domain is a boundary point if it <span
<!--l. 239--><p class="noindent" >A point of a given domain is a boundary point if it <span
class="pplri7t-">depends </span>on points
belonging to other domains.
</dd><dt class="description">
<!--l. 185--><p class="noindent" >
<!--l. 243--><p class="noindent" >
<span
class="pplb7t-">Halo.</span> </dt><dd
class="description">
<!--l. 185--><p class="noindent" >A halo point for a given domain is a point belonging to another domain
<!--l. 243--><p class="noindent" >A halo point for a given domain is a point belonging to another domain
such that there is a boundary point which <span
class="pplri7t-">depends </span>on it. Whenever performing
a computational step, such as a matrix-vector product, the values associated
with halo points are requested from other domains. A boundary point of
a given domain is usually a halo point for some other domain<span class="footnote-mark"><a
href="userhtml6.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a
id="x4-4001f2"></a> ;
href="userhtml7.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a
id="x5-5001f2"></a> ;
therefore the cardinality of the boundary points set denotes the amount
of data sent to other domains.
</dd><dt class="description">
<!--l. 198--><p class="noindent" >
<!--l. 256--><p class="noindent" >
<span
class="pplb7t-">Overlap.</span> </dt><dd
class="description">
<!--l. 198--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any
<!--l. 256--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any
operation that involves an overlap point has to be replicated for each
assignment.</dd></dl>
<!--l. 202--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
<!--l. 260--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
feature of Domain Decomposition Schwarz preconditioners which are the subject of
related research work&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#X2007c">4</a>,&#x00A0;<a
href="userhtmlli2.html#X2007d">3</a>]</span>.
<!--l. 207--><p class="indent" > We denote the sets of internal, boundary and halo points for a given subdomain
href="userhtmlli3.html#X2007c">4</a>,&#x00A0;<a
href="userhtmlli3.html#X2007d">3</a>]</span>.
<!--l. 265--><p class="indent" > We denote the sets of internal, boundary and halo points for a given subdomain
by <span
class="zplmr7y-"><img
src="zplmr7y-49.png" alt="I" class="x-x-49" /></span>, <span
@ -203,169 +203,169 @@ class="zplmr7y-">|<img
src="zplmr7y-48.png" alt="H" class="x-x-48" /></span><sub><span
class="zplmr7m-x-x-76">i</span></sub><span
class="zplmr7y-">|</span>.
<!--l. 217--><p class="indent" > <hr class="figure"><div class="figure"
<!--l. 275--><p class="indent" > <hr class="figure"><div class="figure"
>
<a
id="x4-4003r2"></a>
id="x5-5003r2"></a>
<div class="center"
>
<!--l. 218--><p class="noindent" >
<!--l. 221--><p class="noindent" ><img
<!--l. 276--><p class="noindent" >
<!--l. 279--><p class="noindent" ><img
src="points.png" alt="PIC"
width="46" height="46" ></div>
<br /> <div class="caption"
><span class="id">Figure&#x00A0;2: </span><span
class="content">Point classfication.</span></div><!--tex4ht:label?: x4-4003r2 -->
class="content">Point classfication.</span></div><!--tex4ht:label?: x5-5003r2 -->
<!--l. 227--><p class="indent" > </div><hr class="endfigure">
<!--l. 229--><p class="indent" > This classification of mesh points guides the naming scheme that we adopted in
<!--l. 285--><p class="indent" > </div><hr class="endfigure">
<!--l. 287--><p class="indent" > This classification of mesh points guides the naming scheme that we adopted in
the library internals and in the data structures. We explicitly note that &#8220;Halo&#8221; points
are also often called &#8220;ghost&#8221; points in the literature.
<h4 class="subsectionHead"><span class="titlemark">2.2 </span> <a
id="x4-50002.2"></a>Library contents</h4>
<!--l. 238--><p class="noindent" >The PSBLAS library consists of various classes of subroutines:
id="x5-60002.2"></a>Library contents</h4>
<!--l. 296--><p class="noindent" >The PSBLAS library consists of various classes of subroutines:
<dl class="description"><dt class="description">
<!--l. 240--><p class="noindent" >
<!--l. 298--><p class="noindent" >
<span
class="pplb7t-">Computational routines</span> </dt><dd
class="description">
<!--l. 240--><p class="noindent" >comprising:
<!--l. 298--><p class="noindent" >comprising:
<ul class="itemize1">
<li class="itemize">
<!--l. 242--><p class="noindent" >Sparse matrix by dense matrix product;
<!--l. 300--><p class="noindent" >Sparse matrix by dense matrix product;
</li>
<li class="itemize">
<!--l. 243--><p class="noindent" >Sparse triangular systems solution for block diagonal matrices;
<!--l. 301--><p class="noindent" >Sparse triangular systems solution for block diagonal matrices;
</li>
<li class="itemize">
<!--l. 245--><p class="noindent" >Vector and matrix norms;
<!--l. 303--><p class="noindent" >Vector and matrix norms;
</li>
<li class="itemize">
<!--l. 246--><p class="noindent" >Dense matrix sums;
<!--l. 304--><p class="noindent" >Dense matrix sums;
</li>
<li class="itemize">
<!--l. 247--><p class="noindent" >Dot products.</li></ul>
<!--l. 305--><p class="noindent" >Dot products.</li></ul>
</dd><dt class="description">
<!--l. 249--><p class="noindent" >
<!--l. 307--><p class="noindent" >
<span
class="pplb7t-">Communication routines</span> </dt><dd
class="description">
<!--l. 249--><p class="noindent" >handling halo and overlap communications;
<!--l. 307--><p class="noindent" >handling halo and overlap communications;
</dd><dt class="description">
<!--l. 251--><p class="noindent" >
<!--l. 309--><p class="noindent" >
<span
class="pplb7t-">Data management and auxiliary routines</span> </dt><dd
class="description">
<!--l. 251--><p class="noindent" >including:
<!--l. 309--><p class="noindent" >including:
<ul class="itemize1">
<li class="itemize">
<!--l. 253--><p class="noindent" >Parallel environment management
<!--l. 311--><p class="noindent" >Parallel environment management
</li>
<li class="itemize">
<!--l. 254--><p class="noindent" >Communication descriptors allocation;
<!--l. 312--><p class="noindent" >Communication descriptors allocation;
</li>
<li class="itemize">
<!--l. 255--><p class="noindent" >Dense and sparse matrix allocation;
<!--l. 313--><p class="noindent" >Dense and sparse matrix allocation;
</li>
<li class="itemize">
<!--l. 256--><p class="noindent" >Dense and sparse matrix build and update;
<!--l. 314--><p class="noindent" >Dense and sparse matrix build and update;
</li>
<li class="itemize">
<!--l. 257--><p class="noindent" >Sparse matrix and data distribution preprocessing.</li></ul>
<!--l. 315--><p class="noindent" >Sparse matrix and data distribution preprocessing.</li></ul>
</dd><dt class="description">
<!--l. 259--><p class="noindent" >
<!--l. 317--><p class="noindent" >
<span
class="pplb7t-">Preconditioner routines</span> </dt><dd
class="description">
<!--l. 259--><p class="noindent" >
<!--l. 317--><p class="noindent" >
</dd><dt class="description">
<!--l. 260--><p class="noindent" >
<!--l. 318--><p class="noindent" >
<span
class="pplb7t-">Iterative methods</span> </dt><dd
class="description">
<!--l. 260--><p class="noindent" >a subset of Krylov subspace iterative methods</dd></dl>
<!--l. 263--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined
<!--l. 318--><p class="noindent" >a subset of classical and Krylov subspace iterative methods</dd></dl>
<!--l. 321--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined
in the PSBLAS software package:
<ul class="itemize1">
<li class="itemize">
<!--l. 266--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span
<!--l. 324--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_</span></span></span>
</li>
<li class="itemize">
<!--l. 268--><p class="noindent" >all data type names are suffixed by <span class="obeylines-h"><span class="verb"><span
<!--l. 326--><p class="noindent" >all data type names are suffixed by <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">_type</span></span></span>
</li>
<li class="itemize">
<!--l. 269--><p class="noindent" >all constants are suffixed by <span class="obeylines-h"><span class="verb"><span
<!--l. 327--><p class="noindent" >all constants are suffixed by <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">_</span></span></span>
</li>
<li class="itemize">
<!--l. 270--><p class="noindent" >all top-level subroutine names follow the rule <span class="obeylines-h"><span class="verb"><span
<!--l. 328--><p class="noindent" >all top-level subroutine names follow the rule <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_xxname</span></span></span> where <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">xx</span></span></span> can be
either:
<ul class="itemize2">
<li class="itemize">
<!--l. 273--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
<!--l. 331--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ge</span></span></span>: the routine is related to dense data,
</li>
<li class="itemize">
<!--l. 274--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
<!--l. 332--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">sp</span></span></span>: the routine is related to sparse data,
</li>
<li class="itemize">
<!--l. 275--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
<!--l. 333--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">cd</span></span></span>: the routine is related to communication descriptor (see&#x00A0;<a
href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a>).</li></ul>
href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a>).</li></ul>
<!--l. 278--><p class="noindent" >For example the <span class="obeylines-h"><span class="verb"><span
<!--l. 336--><p class="noindent" >For example the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins</span></span></span> perform the same
action (see&#x00A0;<a
href="userhtmlse6.html#x11-770006">6<!--tex4ht:ref: sec:toolsrout --></a>) on dense matrices, sparse matrices and communication
href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>) on dense matrices, sparse matrices and communication
descriptors respectively. Interface overloading allows the usage of the same
subroutine names for both real and complex data.</li></ul>
<!--l. 285--><p class="noindent" >In the description of the subroutines, arguments or argument entries are classified
<!--l. 343--><p class="noindent" >In the description of the subroutines, arguments or argument entries are classified
as:
<dl class="description"><dt class="description">
<!--l. 288--><p class="noindent" >
<!--l. 346--><p class="noindent" >
<span
class="pplb7t-">global</span> </dt><dd
class="description">
<!--l. 288--><p class="noindent" >For input arguments, the value must be the same on all processes
<!--l. 346--><p class="noindent" >For input arguments, the value must be the same on all processes
participating in the subroutine call; for output arguments the value is
guaranteed to be the same.
</dd><dt class="description">
<!--l. 291--><p class="noindent" >
<!--l. 349--><p class="noindent" >
<span
class="pplb7t-">local</span> </dt><dd
class="description">
<!--l. 291--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
<!--l. 293--><p class="noindent" >To finish our general description, we define a version string with the constant
<!--l. 349--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
<!--l. 351--><p class="noindent" >To finish our general description, we define a version string with the constant
<div class="math-display" >
<img
src="userhtml0x.png" alt="psb_version_string_
" class="math-display" ></div>
<!--l. 295--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span
<!--l. 353--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">3.9.0</span></span></span>
<!--l. 298--><p class="noindent" >
<!--l. 356--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">2.3 </span> <a
id="x4-60002.3"></a>Application structure</h4>
<!--l. 301--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are
id="x5-70002.3"></a>Application structure</h4>
<!--l. 359--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are
created and exist with reference to a discretized space to which there corresponds
an index space and a matrix sparsity pattern. As an example, consider a
cell-centered finite-volume discretization of the Navier-Stokes equations on a
@ -375,13 +375,13 @@ class="zplmr7m-">n </span>is isomorphic to the set of cell centers,
whereas the pattern of the associated linear system matrix is isomorphic to the
adjacency graph imposed on the discretization mesh by the discretization
stencil.
<!--l. 311--><p class="indent" > Thus the first order of business is to establish an index space, and this is done
<!--l. 369--><p class="indent" > Thus the first order of business is to establish an index space, and this is done
with a call to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span> in which we specify the size of the index space <span
class="zplmr7m-">n </span>and the
allocation of the elements of the index space to the various processes making up the
MPI (virtual) parallel machine.
<!--l. 317--><p class="indent" > The index space is partitioned among processes, and this creates a mapping from
<!--l. 375--><p class="indent" > The index space is partitioned among processes, and this creates a mapping from
the &#8220;global&#8221; numbering 1<span
class="zplmr7m-">&#x2026;</span><span
class="zplmr7m-">n </span>to a numbering &#8220;local&#8221; to each process; each process <span
@ -400,7 +400,7 @@ numbering.
<!--l. 327--><p class="indent" > For a given index space 1<span
<!--l. 385--><p class="indent" > For a given index space 1<span
class="zplmr7m-">&#x2026;</span><span
class="zplmr7m-">n </span>there are many possible associated topologies, i.e.
many different discretization stencils; thus the description of the index space is not
@ -430,99 +430,99 @@ class="zplmr7m-">A</span>, and thus they have to be fetched from (neighbouring)
processes. The descriptor of the index space is built exactly for the purpose
of properly sequencing the communication steps required to achieve this
objective.
<!--l. 343--><p class="indent" > A simple application structure will walk through the index space allocation,
<!--l. 401--><p class="indent" > A simple application structure will walk through the index space allocation,
matrix/vector creation and linear system solution as follows:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-6002x1">
<!--l. 347--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7002x1">
<!--l. 405--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span>;
</li>
<li
class="enumerate" id="x4-6004x2">
<!--l. 348--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7004x2">
<!--l. 406--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span>;
</li>
<li
class="enumerate" id="x4-6006x3">
<!--l. 349--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7006x3">
<!--l. 407--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geall</span></span></span>;
</li>
<li
class="enumerate" id="x4-6008x4">
<!--l. 351--><p class="noindent" >Loop over all local rows, generate matrix and vector entries, and insert
class="enumerate" id="x5-7008x4">
<!--l. 409--><p class="noindent" >Loop over all local rows, generate matrix and vector entries, and insert
them with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span>
</li>
<li
class="enumerate" id="x4-6010x5">
<!--l. 353--><p class="noindent" >Assemble the various entities:
class="enumerate" id="x5-7010x5">
<!--l. 411--><p class="noindent" >Assemble the various entities:
<ol class="enumerate2" >
<li
class="enumerate" id="x4-6012x1">
<!--l. 355--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7012x1">
<!--l. 413--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>,
</li>
<li
class="enumerate" id="x4-6014x2">
<!--l. 356--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7014x2">
<!--l. 414--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span>,
</li>
<li
class="enumerate" id="x4-6016x3">
<!--l. 357--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7016x3">
<!--l. 415--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geasb</span></span></span>;</li></ol>
</li>
<li
class="enumerate" id="x4-6018x6">
<!--l. 359--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7018x6">
<!--l. 417--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%set</span></span></span>, and build it with
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%build</span></span></span><span class="footnote-mark"><a
href="userhtml7.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a
id="x4-6019f3"></a> ;
href="userhtml8.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a
id="x5-7019f3"></a> ;
</li>
<li
class="enumerate" id="x4-6022x7">
<!--l. 364--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7022x7">
<!--l. 422--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov</span></span></span>
with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bicgstab</span></span></span>.</li></ol>
<!--l. 367--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span
<!--l. 425--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">test/pargen/</span></span></span>.
<!--l. 370--><p class="indent" > For a simulation in which the same discretization mesh is used over multiple
<!--l. 428--><p class="indent" > For a simulation in which the same discretization mesh is used over multiple
time steps, the following structure may be more appropriate:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-6024x1">
<!--l. 373--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7024x1">
<!--l. 431--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span>
</li>
<li
class="enumerate" id="x4-6026x2">
<!--l. 374--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7026x2">
<!--l. 432--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span>
</li>
<li
class="enumerate" id="x4-6028x3">
<!--l. 375--><p class="noindent" >Loop over the topology of the discretization mesh and build the
class="enumerate" id="x5-7028x3">
<!--l. 433--><p class="noindent" >Loop over the topology of the discretization mesh and build the
descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins</span></span></span>;
</li>
<li
class="enumerate" id="x4-6030x4">
<!--l. 377--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7030x4">
<!--l. 435--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>;
</li>
<li
class="enumerate" id="x4-6032x5">
<!--l. 378--><p class="noindent" >Allocate the sparse matrices and dense vectors with; <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7032x5">
<!--l. 436--><p class="noindent" >Allocate the sparse matrices and dense vectors with; <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spall</span></span></span> and
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geall</span></span></span>;
@ -531,57 +531,57 @@ class="cmtt-10">psb_geall</span></span></span>;
</li>
<li
class="enumerate" id="x4-6034x6">
<!--l. 380--><p class="noindent" >Loop over the time steps:
class="enumerate" id="x5-7034x6">
<!--l. 438--><p class="noindent" >Loop over the time steps:
<ol class="enumerate2" >
<li
class="enumerate" id="x4-6036x1">
<!--l. 382--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7036x1">
<!--l. 440--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_sprn</span></span></span>;
also zero out the dense vectors;
</li>
<li
class="enumerate" id="x4-6038x2">
<!--l. 385--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update
class="enumerate" id="x5-7038x2">
<!--l. 443--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update
them with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span>;
</li>
<li
class="enumerate" id="x4-6040x3">
<!--l. 387--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7040x3">
<!--l. 445--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geasb</span></span></span>;
</li>
<li
class="enumerate" id="x4-6042x4">
<!--l. 388--><p class="noindent" >
class="enumerate" id="x5-7042x4">
<!--l. 446--><p class="noindent" >
</li>
<li
class="enumerate" id="x4-6044x5">
<!--l. 388--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-7044x5">
<!--l. 446--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%init</span></span></span> and
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%set</span></span></span>, and build it with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%build</span></span></span>;
</li>
<li
class="enumerate" id="x4-6046x6">
<!--l. 391--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g.
class="enumerate" id="x5-7046x6">
<!--l. 449--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g.
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov</span></span></span> with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bicgstab</span></span></span>.</li></ol>
</li></ol>
<!--l. 395--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
<!--l. 453--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
called on the data that is actually allocated to the current process, i.e. each process
generates its own data.
<!--l. 400--><p class="indent" > In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span
<!--l. 458--><p class="indent" > In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>, nor is there a
requirement to build a matrix row in its entirety before calling the routine; this
allows the application programmer to walk through the discretization mesh element
by element, generating the main part of a given matrix row but also contributions to
the rows corresponding to neighbouring elements.
<!--l. 407--><p class="indent" > From a functional point of view it is even possible to execute one call for each
<!--l. 465--><p class="indent" > From a functional point of view it is even possible to execute one call for each
nonzero coefficient; however this would have a substantial computational
overhead. It is therefore advisable to pack a certain amount of data into each
call to the insertion routine, say touching on a few tens of rows; the best
@ -595,23 +595,23 @@ process and pass it in a single call to <span class="obeylines-h"><span class="v
class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a
doubling of memory occupation, and thus would be almost always far from
optimal.
<!--l. 420--><p class="noindent" >
<!--l. 478--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">2.3.1 </span> <a
id="x4-70002.3.1"></a>User-defined index mappings</h5>
<!--l. 422--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
id="x5-80002.3.1"></a>User-defined index mappings</h5>
<!--l. 480--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
constraints outlined in sec.&#x00A0;<a
href="#x4-60002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>:
href="#x5-70002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-7002x1">
<!--l. 425--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span
class="enumerate" id="x5-8002x1">
<!--l. 483--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span
class="zplmr7m-">&#x2026;</span><span
class="zplmr7m-">n</span><sub>row<sub><span
class="zplmr7m-x-x-60">i</span></sub></sub>;
</li>
<li
class="enumerate" id="x4-7004x2">
<!--l. 427--><p class="noindent" >The set of halo points must be mapped to the set <span
class="enumerate" id="x5-8004x2">
<!--l. 485--><p class="noindent" >The set of halo points must be mapped to the set <span
class="zplmr7m-">n</span><sub>row<sub><span
class="zplmr7m-x-x-60">i</span></sub></sub> <span
class="zplmr7t-">+ </span>1<span
@ -619,14 +619,14 @@ class="zplmr7m-">&#x2026;</span><span
class="zplmr7m-">n</span><sub>col<sub>
<span
class="zplmr7m-x-x-60">i</span></sub></sub>;</li></ol>
<!--l. 430--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
<!--l. 488--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
consistency of this mapping; some errors may be caught by the library, but
this is not guaranteed. The application structure to support this usage is as
follows:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-7006x1">
<!--l. 436--><p class="noindent" >Initialize index
class="enumerate" id="x5-8006x1">
<!--l. 494--><p class="noindent" >Initialize index
space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the
vectors <span class="obeylines-h"><span class="verb"><span
@ -635,24 +635,24 @@ class="cmtt-10">vl(:)</span></span></span> containing the set of global indices
class="cmtt-10">lidx(:)</span></span></span> containing the corresponding local indices;
</li>
<li
class="enumerate" id="x4-7008x2">
<!--l. 441--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-8008x2">
<!--l. 499--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja(:)</span></span></span> and their associated local indices <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">lidx(:)</span></span></span>
with a(some) call(s) to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins(nz,ja,desc,info,lidx=lidx)</span></span></span>;
</li>
<li
class="enumerate" id="x4-7010x3">
<!--l. 444--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="enumerate" id="x5-8010x3">
<!--l. 502--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>;
</li>
<li
class="enumerate" id="x4-7012x4">
class="enumerate" id="x5-8012x4">
<!--l. 445--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in
<!--l. 503--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span> of the <span class="obeylines-h"><span class="verb"><span
@ -661,41 +661,41 @@ class="cmtt-10">local</span></span></span> argument specifying that the
class="cmtt-10">ia</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
<!--l. 452--><p class="noindent" >
<!--l. 510--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">2.4 </span> <a
id="x4-80002.4"></a>Programming model</h4>
<!--l. 454--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
id="x5-90002.4"></a>Programming model</h4>
<!--l. 512--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
programming model: each process participating in the computation performs the
same actions on a chunk of data. Parallelism is thus data-driven.
<!--l. 459--><p class="indent" > Because of this structure, many subroutines coordinate their action across the
<!--l. 517--><p class="indent" > Because of this structure, many subroutines coordinate their action across the
various processes, thus providing an implicit synchronization point, and therefore
<span
class="pplri7t-">must </span>be called simultaneously by all processes participating in the computation. This
is certainly true for the data allocation and assembly routines, for all the
computational routines and for some of the tools routines.
<!--l. 467--><p class="indent" > However there are many cases where no synchronization, and indeed no
<!--l. 525--><p class="indent" > However there are many cases where no synchronization, and indeed no
communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a
href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a>
href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a>
are only acting on the local data structures, and thus may be called independently.
The most important case is that of the coefficient insertion routines: since the number
of coefficients in the sparse and dense matrices varies among the processors, and
since the user is free to choose an arbitrary order in builiding the matrix entries,
these routines cannot imply a synchronization.
<!--l. 477--><p class="indent" > Throughout this user&#8217;s guide each subroutine will be clearly indicated
<!--l. 535--><p class="indent" > Throughout this user&#8217;s guide each subroutine will be clearly indicated
as:
<dl class="description"><dt class="description">
<!--l. 480--><p class="noindent" >
<!--l. 538--><p class="noindent" >
<span
class="pplb7t-">Synchronous:</span> </dt><dd
class="description">
<!--l. 480--><p class="noindent" >must be called simultaneously by all the processes in the relevant
<!--l. 538--><p class="noindent" >must be called simultaneously by all the processes in the relevant
communication context;
</dd><dt class="description">
<!--l. 482--><p class="noindent" >
<!--l. 540--><p class="noindent" >
<span
class="pplb7t-">Asynchronous:</span> </dt><dd
class="description">
<!--l. 482--><p class="noindent" >may be called in a totally independent manner.</dd></dl>
<!--l. 540--><p class="noindent" >may be called in a totally independent manner.</dd></dl>

@ -17,7 +17,7 @@ href="userhtmlse2.html#tailuserhtmlse2.html" >prev-tail</a>] [<a
href="#tailuserhtmlse3.html">tail</a>] [<a
href="userhtml.html#userhtmlse6.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">3 </span> <a
id="x8-90003"></a>Data Structures and Classes</h3>
id="x9-100003"></a>Data Structures and Classes</h3>
<!--l. 5--><p class="noindent" >In this chapter we illustrate the data structures used for definition of routines
interfaces. They include data structures for sparse matrices, communication
descriptors and preconditioners.
@ -93,7 +93,7 @@ not needed by the end-user, and therefore are described in the developer&#8217;s
documentation.
<!--l. 48--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">3.1 </span> <a
id="x8-100003.1"></a>Descriptor data structure</h4>
id="x9-110003.1"></a>Descriptor data structure</h4>
<!--l. 50--><p class="noindent" >All the general matrix informations and elements to be exchanged among processes
are stored within a data structure of the type <a
id="descdata"></a><span
@ -105,7 +105,7 @@ and other operations that are necessary for implementing the various algorithms
interest to us.
<!--l. 57--><p class="indent" > The data structure itself <code class="lstinline"><span style="color:#000000">psb_desc_type</span></code> can be treated as an opaque object
handled via the tools routines of Sec.&#x00A0;<a
href="userhtmlse6.html#x11-770006">6<!--tex4ht:ref: sec:toolsrout --></a> or the query routines detailed below;
href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a> or the query routines detailed below;
nevertheless we include here a description for the curious reader.
<!--l. 63--><p class="indent" > First we describe the <code class="lstinline"><span style="color:#000000">psb_indx_map</span></code> type. This is a data structure that keeps track
of a certain number of basic issues such as:
@ -155,7 +155,7 @@ communication among processes, and thus is a synchronous method. The
choice of a specific dynamic type for the index map is made at the time the
descriptor is initially allocated, according to the mode of initialization (see
also&#x00A0;<a
href="userhtmlse6.html#x11-770006">6<!--tex4ht:ref: sec:toolsrout --></a>).
href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>).
<!--l. 98--><p class="indent" > The descriptor contents are as follows:
<dl class="description"><dt class="description">
<!--l. 100--><p class="noindent" >
@ -179,26 +179,26 @@ class="description">
<ol class="enumerate1" >
<li
class="enumerate" id="x8-10002x1">
class="enumerate" id="x9-11002x1">
<!--l. 106--><p class="noindent" >Process identifier;
</li>
<li
class="enumerate" id="x8-10004x2">
class="enumerate" id="x9-11004x2">
<!--l. 107--><p class="noindent" >Number of points to be received;
</li>
<li
class="enumerate" id="x8-10006x3">
class="enumerate" id="x9-11006x3">
<!--l. 108--><p class="noindent" >Indices of points to be received;
</li>
<li
class="enumerate" id="x8-10008x4">
class="enumerate" id="x9-11008x4">
<!--l. 109--><p class="noindent" >Number of points to be sent;
</li>
<li
class="enumerate" id="x8-10010x5">
class="enumerate" id="x9-11010x5">
<!--l. 110--><p class="noindent" >Indices of points to be sent;</li></ol>
<!--l. 114--><p class="noindent" >Specified as: a vector of integer type, see&#x00A0;<a
href="#x8-460003.3">3.3<!--tex4ht:ref: sec:vecttype --></a>.
href="#x9-470003.3">3.3<!--tex4ht:ref: sec:vecttype --></a>.
</dd><dt class="description">
<!--l. 115--><p class="noindent" >
<span
@ -208,7 +208,7 @@ class="description">
<!--l. 115--><p class="noindent" >A list of element indices to be exchanged to implement the mapping between a
base descriptor and a descriptor with overlap. <br
class="newline" />Specified as: a vector of integer type, see&#x00A0;<a
href="#x8-460003.3">3.3<!--tex4ht:ref: sec:vecttype --></a>.
href="#x9-470003.3">3.3<!--tex4ht:ref: sec:vecttype --></a>.
</dd><dt class="description">
<!--l. 119--><p class="noindent" >
<span
@ -219,29 +219,29 @@ class="description">
the previous vector:
<ol class="enumerate1" >
<li
class="enumerate" id="x8-10012x1">
class="enumerate" id="x9-11012x1">
<!--l. 122--><p class="noindent" >Process identifier;
</li>
<li
class="enumerate" id="x8-10014x2">
class="enumerate" id="x9-11014x2">
<!--l. 123--><p class="noindent" >Number of points to be received;
</li>
<li
class="enumerate" id="x8-10016x3">
class="enumerate" id="x9-11016x3">
<!--l. 124--><p class="noindent" >Indices of points to be received;
</li>
<li
class="enumerate" id="x8-10018x4">
class="enumerate" id="x9-11018x4">
<!--l. 125--><p class="noindent" >Number of points to be sent;
</li>
<li
class="enumerate" id="x8-10020x5">
class="enumerate" id="x9-11020x5">
<!--l. 126--><p class="noindent" >Indices of points to be sent;</li></ol>
<!--l. 130--><p class="noindent" >Specified as: a vector of integer type, see&#x00A0;<a
href="#x8-460003.3">3.3<!--tex4ht:ref: sec:vecttype --></a>.
href="#x9-470003.3">3.3<!--tex4ht:ref: sec:vecttype --></a>.
</dd><dt class="description">
<!--l. 131--><p class="noindent" >
<span
@ -252,7 +252,7 @@ class="description">
<!--l. 131--><p class="noindent" >A list to retrieve the value of each overlap element from the respective master
process.<br
class="newline" />Specified as: a vector of integer type, see&#x00A0;<a
href="#x8-460003.3">3.3<!--tex4ht:ref: sec:vecttype --></a>.
href="#x9-470003.3">3.3<!--tex4ht:ref: sec:vecttype --></a>.
</dd><dt class="description">
<!--l. 134--><p class="noindent" >
<span
@ -262,15 +262,15 @@ class="description">
<!--l. 134--><p class="noindent" >For all overlap points belonging to th ecurrent process:
<ol class="enumerate1" >
<li
class="enumerate" id="x8-10022x1">
class="enumerate" id="x9-11022x1">
<!--l. 137--><p class="noindent" >Overlap point index;
</li>
<li
class="enumerate" id="x8-10024x2">
class="enumerate" id="x9-11024x2">
<!--l. 138--><p class="noindent" >Number of processes sharing that overlap points;
</li>
<li
class="enumerate" id="x8-10026x3">
class="enumerate" id="x9-11026x3">
<!--l. 139--><p class="noindent" >Index of a &#8220;master&#8221; process:</li></ol>
<!--l. 141--><p class="noindent" >Specified as: an allocatable integer array of rank two.
</dd><dt class="description">
@ -283,7 +283,7 @@ class="description">
processes.</dd></dl>
<!--l. 145--><p class="noindent" >The Fortran&#x00A0;2003 declaration for <code class="lstinline"><span style="color:#000000">psb_desc_type</span></code> structures is as follows:
<!--l. 147--><p class="indent" > <a
id="x8-10027r1"></a><hr class="float"><div class="float"
id="x9-11027r1"></a><hr class="float"><div class="float"
>
@ -306,7 +306,7 @@ end&#x00A0;type&#x00A0;psb_desc_type
<br /> <div class="caption"
><span class="id">Listing 1: </span><span
class="content">The PSBLAS defined data type that contains the communication
descriptor.</span></div><!--tex4ht:label?: x8-10027r1 -->
descriptor.</span></div><!--tex4ht:label?: x9-11027r1 -->
@ -330,10 +330,10 @@ class="description">
sparse matrix, such as matrix-vector products, are only possible in this
state.</dd></dl>
<h5 class="subsubsectionHead"><span class="titlemark">3.1.1 </span> <a
id="x8-110003.1.1"></a>Descriptor Methods</h5>
id="x9-120003.1.1"></a>Descriptor Methods</h5>
<!--l. 199--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.2 </span> <a
id="x8-120003.1.2"></a>get_local_rows &#8212; Get number of local rows</h5>
id="x9-130003.1.2"></a>get_local_rows &#8212; Get number of local rows</h5>
@ -377,7 +377,7 @@ class="pplb7t-">Function value</span> </dt><dd
class="description">
<!--l. 217--><p class="noindent" >The number of local rows, i.e. the number of rows owned by the current
process; as explained in&#x00A0;<a
href="userhtmlse1.html#x3-20001">1<!--tex4ht:ref: sec:intro --></a>, it is equal to <span
href="userhtmlse1.html#x4-30001">1<!--tex4ht:ref: sec:intro --></a>, it is equal to <span
class="zplmr7y-">|<img
src="zplmr7y-49.png" alt="I" class="x-x-49" /></span><sub><span
class="zplmr7m-x-x-76">i</span></sub><span
@ -390,7 +390,7 @@ class="zplmr7y-">|</span>. The returned value is
specific to the calling process.</dd></dl>
<!--l. 224--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.3 </span> <a
id="x8-130003.1.3"></a>get_local_cols &#8212; Get number of local cols</h5>
id="x9-140003.1.3"></a>get_local_cols &#8212; Get number of local cols</h5>
@ -434,7 +434,7 @@ class="pplb7t-">Function value</span> </dt><dd
class="description">
<!--l. 242--><p class="noindent" >The number of local cols, i.e. the number of indices used by the current
process, including both local and halo indices; as explained in&#x00A0;<a
href="userhtmlse1.html#x3-20001">1<!--tex4ht:ref: sec:intro --></a>, it is
href="userhtmlse1.html#x4-30001">1<!--tex4ht:ref: sec:intro --></a>, it is
equal to <span
class="zplmr7y-">|<img
src="zplmr7y-49.png" alt="I" class="x-x-49" /></span><sub><span
@ -453,7 +453,7 @@ class="zplmr7y-">|</span>. The returned value is specific to the calling
process.</dd></dl>
<!--l. 250--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.4 </span> <a
id="x8-140003.1.4"></a>get_global_rows &#8212; Get number of global rows</h5>
id="x9-150003.1.4"></a>get_global_rows &#8212; Get number of global rows</h5>
@ -498,7 +498,7 @@ class="description">
<!--l. 268--><p class="noindent" >The number of global rows, i.e. the size of the global index space.</dd></dl>
<!--l. 272--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.5 </span> <a
id="x8-150003.1.5"></a>get_global_cols &#8212; Get number of global cols</h5>
id="x9-160003.1.5"></a>get_global_cols &#8212; Get number of global cols</h5>
@ -544,7 +544,7 @@ class="description">
rows.</dd></dl>
<!--l. 295--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.6 </span> <a
id="x8-160003.1.6"></a>get_global_indices &#8212; Get vector of global indices</h5>
id="x9-170003.1.6"></a>get_global_indices &#8212; Get vector of global indices</h5>
@ -603,7 +603,7 @@ class="description">
<code class="lstinline"><span style="color:#000000">psb_lpk_</span></code> and rank 1.</dd></dl>
<!--l. 323--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.7 </span> <a
id="x8-170003.1.7"></a>get_context &#8212; Get communication context</h5>
id="x9-180003.1.7"></a>get_context &#8212; Get communication context</h5>
@ -648,7 +648,7 @@ class="description">
<!--l. 341--><p class="noindent" >The communication context.</dd></dl>
<!--l. 344--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.8 </span> <a
id="x8-180003.1.8"></a>Clone &#8212; clone current object</h5>
id="x9-190003.1.8"></a>Clone &#8212; clone current object</h5>
@ -699,7 +699,7 @@ class="description">
<!--l. 363--><p class="noindent" >Return code.</dd></dl>
<!--l. 367--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.9 </span> <a
id="x8-190003.1.9"></a>CNV &#8212; convert internal storage format</h5>
id="x9-200003.1.9"></a>CNV &#8212; convert internal storage format</h5>
@ -749,7 +749,7 @@ class="cmtt-10">_type</span>.</dd></dl>
and other accelerators.
<!--l. 391--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.10 </span> <a
id="x8-200003.1.10"></a>psb_cd_get_hash_threshold &#8212; Get threshold for index mapping
id="x9-210003.1.10"></a>psb_cd_get_hash_threshold &#8212; Get threshold for index mapping
switch</h5>
@ -780,7 +780,7 @@ class="description">
</dd></dl>
<!--l. 407--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.11 </span> <a
id="x8-210003.1.11"></a>psb_cd_set_hash_threshold &#8212; Set threshold for index mapping
id="x9-220003.1.11"></a>psb_cd_set_hash_threshold &#8212; Set threshold for index mapping
switch</h5>
@ -825,7 +825,7 @@ been initialized. Moreover the threshold must have the same value on all
processes.
<!--l. 435--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.12 </span> <a
id="x8-220003.1.12"></a>get_p_adjcncy &#8212; Get process adjacency list</h5>
id="x9-230003.1.12"></a>get_p_adjcncy &#8212; Get process adjacency list</h5>
@ -856,7 +856,7 @@ class="description">
</dd></dl>
<!--l. 451--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.13 </span> <a
id="x8-230003.1.13"></a>set_p_adjcncy &#8212; Set process adjacency list</h5>
id="x9-240003.1.13"></a>set_p_adjcncy &#8212; Set process adjacency list</h5>
@ -896,7 +896,7 @@ neighbours of the current process. The availability of this information may spee
the execution of the assembly call <code class="lstinline"><span style="color:#000000">psb_cdasb</span></code>.
<!--l. 474--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.14 </span> <a
id="x8-240003.1.14"></a>fnd_owner &#8212; Find the owner process of a set of indices</h5>
id="x9-250003.1.14"></a>fnd_owner &#8212; Find the owner process of a set of indices</h5>
@ -959,7 +959,7 @@ processes.
<!--l. 504--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.1.15 </span> <a
id="x8-250003.1.15"></a>Named Constants</h5>
id="x9-260003.1.15"></a>Named Constants</h5>
<!--l. 506--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 507--><p class="noindent" >
@ -1051,7 +1051,7 @@ class="description">
</dd></dl>
<!--l. 539--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">3.2 </span> <a
id="x8-260003.2"></a>Sparse Matrix class</h4>
id="x9-270003.2"></a>Sparse Matrix class</h4>
<!--l. 541--><p class="noindent" >The <a
id="spdata"></a><span
class="cmtt-10">psb</span><span
@ -1059,9 +1059,9 @@ class="cmtt-10">_Tspmat</span><span
class="cmtt-10">_type </span>class contains all information about the local portion of the
sparse matrix and its storage mode. Its design is based on the STATE design
pattern&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XDesignPatterns">13</a>]</span> as detailed in&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XSparse03">11</a>]</span>; the type declaration is shown in figure&#x00A0;<a
href="#x8-26001r2">2<!--tex4ht:ref: fig:spmattype --></a> where <code class="lstinline"><span style="color:#000000">T</span></code> is
href="userhtmlli3.html#XDesignPatterns">13</a>]</span> as detailed in&#x00A0;<span class="cite">[<a
href="userhtmlli3.html#XSparse03">11</a>]</span>; the type declaration is shown in figure&#x00A0;<a
href="#x9-27001r2">2<!--tex4ht:ref: fig:spmattype --></a> where <code class="lstinline"><span style="color:#000000">T</span></code> is
a placeholder for the data type and precision variants
<dl class="description"><dt class="description">
<!--l. 549--><p class="noindent" >
@ -1104,7 +1104,7 @@ class="cmtt-10">_mat</span>; its specific layout can be chosen dynamically among
predefined types, or an entirely new storage layout can be implemented and passed
to the library at runtime via the <code class="lstinline"><span style="color:#000000">psb_spasb</span></code> routine.
<!--l. 561--><p class="indent" > <a
id="x8-26001r2"></a><hr class="float"><div class="float"
id="x9-27001r2"></a><hr class="float"><div class="float"
>
@ -1120,7 +1120,7 @@ to the library at runtime via the <code class="lstinline"><span style="color:#00
<!--l. 576--><p class="nopar" > </div></div>
<br /><div class="caption"
><span class="id">Listing 2: </span><span
class="content"> The PSBLAS defined data type that contains a sparse matrix.</span></div><!--tex4ht:label?: x8-26001r2 -->
class="content"> The PSBLAS defined data type that contains a sparse matrix.</span></div><!--tex4ht:label?: x9-27001r2 -->
@ -1185,10 +1185,10 @@ class="description">
<!--l. 609--><p class="noindent" >The only storage variant supporting the build state is COO; all other variants are
obtained by conversion to/from it.
<h5 class="subsubsectionHead"><span class="titlemark">3.2.1 </span> <a
id="x8-270003.2.1"></a>Sparse Matrix Methods</h5>
id="x9-280003.2.1"></a>Sparse Matrix Methods</h5>
<!--l. 614--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.2 </span> <a
id="x8-280003.2.2"></a>get_nrows &#8212; Get number of rows in a sparse matrix</h5>
id="x9-290003.2.2"></a>get_nrows &#8212; Get number of rows in a sparse matrix</h5>
@ -1233,7 +1233,7 @@ class="description">
<!--l. 633--><p class="noindent" >The number of rows of sparse matrix <code class="lstinline"><span style="color:#000000">a</span></code>.</dd></dl>
<!--l. 638--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.3 </span> <a
id="x8-290003.2.3"></a>get_ncols &#8212; Get number of columns in a sparse matrix</h5>
id="x9-300003.2.3"></a>get_ncols &#8212; Get number of columns in a sparse matrix</h5>
@ -1278,7 +1278,7 @@ class="description">
<!--l. 657--><p class="noindent" >The number of columns of sparse matrix <code class="lstinline"><span style="color:#000000">a</span></code>.</dd></dl>
<!--l. 662--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.4 </span> <a
id="x8-300003.2.4"></a>get_nnzeros &#8212; Get number of nonzero elements in a sparse matrix</h5>
id="x9-310003.2.4"></a>get_nnzeros &#8212; Get number of nonzero elements in a sparse matrix</h5>
@ -1325,13 +1325,13 @@ class="description">
class="pplb7t-">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x8-30002x1">
class="enumerate" id="x9-31002x1">
<!--l. 686--><p class="noindent" >The function value is specific to the storage format of matrix <code class="lstinline"><span style="color:#000000">a</span></code>; some
storage formats employ padding, thus the returned value for the same
matrix may be different for different storage choices.</li></ol>
<!--l. 692--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.5 </span> <a
id="x8-310003.2.5"></a>get_size &#8212; Get maximum number of nonzero elements in a sparse
id="x9-320003.2.5"></a>get_size &#8212; Get maximum number of nonzero elements in a sparse
matrix</h5>
@ -1378,7 +1378,7 @@ class="description">
matrix <code class="lstinline"><span style="color:#000000">a</span></code> using its current memory allocation.</dd></dl>
<!--l. 716--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.6 </span> <a
id="x8-320003.2.6"></a>sizeof &#8212; Get memory occupation in bytes of a sparse matrix</h5>
id="x9-330003.2.6"></a>sizeof &#8212; Get memory occupation in bytes of a sparse matrix</h5>
@ -1423,7 +1423,7 @@ class="description">
<!--l. 735--><p class="noindent" >The memory occupation in bytes.</dd></dl>
<!--l. 739--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.7 </span> <a
id="x8-330003.2.7"></a>get_fmt &#8212; Short description of the dynamic type</h5>
id="x9-340003.2.7"></a>get_fmt &#8212; Short description of the dynamic type</h5>
<!--l. 743--><p class="noindent" ><code class="lstinline"><span style="color:#000000">write</span><span style="color:#000000">(*,*)</span><span style="color:#000000"> </span><span style="color:#000000">a</span><span style="color:#000000">%</span><span style="color:#000000">get_fmt</span><span style="color:#000000">()</span></code>
<!--l. 746--><p class="indent" >
<dl class="description"><dt class="description">
@ -1466,7 +1466,7 @@ class="description">
values include <code class="lstinline"><span style="color:#000000">NULL</span></code>, <code class="lstinline"><span style="color:#000000">COO</span></code>, <code class="lstinline"><span style="color:#000000">CSR</span></code> and <code class="lstinline"><span style="color:#000000">CSC</span></code>.</dd></dl>
<!--l. 763--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.8 </span> <a
id="x8-340003.2.8"></a>is_bld, is_upd, is_asb &#8212; Status check</h5>
id="x9-350003.2.8"></a>is_bld, is_upd, is_asb &#8212; Status check</h5>
<!--l. 767--><p class="noindent" ><code class="lstinline"><span style="color:#000000">if</span><span style="color:#000000"> </span><span style="color:#000000">(</span><span style="color:#000000">a</span><span style="color:#000000">%</span><span style="color:#000000">is_bld</span><span style="color:#000000">())</span><span style="color:#000000"> </span><span style="color:#000000">then</span></code><br
class="newline" /><code class="lstinline"><span style="color:#000000">if</span><span style="color:#000000"> </span><span style="color:#000000">(</span><span style="color:#000000">a</span><span style="color:#000000">%</span><span style="color:#000000">is_upd</span><span style="color:#000000">())</span><span style="color:#000000"> </span><span style="color:#000000">then</span></code><br
class="newline" /><code class="lstinline"><span style="color:#000000">if</span><span style="color:#000000"> </span><span style="color:#000000">(</span><span style="color:#000000">a</span><span style="color:#000000">%</span><span style="color:#000000">is_asb</span><span style="color:#000000">())</span><span style="color:#000000"> </span><span style="color:#000000">then</span></code>
@ -1511,7 +1511,7 @@ class="description">
Assembled state, respectively.</dd></dl>
<!--l. 788--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.9 </span> <a
id="x8-350003.2.9"></a>is_lower, is_upper, is_triangle, is_unit &#8212; Format check</h5>
id="x9-360003.2.9"></a>is_lower, is_upper, is_triangle, is_unit &#8212; Format check</h5>
@ -1561,7 +1561,7 @@ class="description">
unit (i.e. assumed) diagonal.</dd></dl>
<!--l. 826--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.10 </span> <a
id="x8-360003.2.10"></a>cscnv &#8212; Convert to a different storage format</h5>
id="x9-370003.2.10"></a>cscnv &#8212; Convert to a different storage format</h5>
@ -1641,7 +1641,7 @@ class="description">
and other accelerators.
<!--l. 866--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.11 </span> <a
id="x8-370003.2.11"></a>csclip &#8212; Reduce to a submatrix</h5>
id="x9-380003.2.11"></a>csclip &#8212; Reduce to a submatrix</h5>
@ -1712,7 +1712,7 @@ class="description">
<!--l. 899--><p class="noindent" >Return code.</dd></dl>
<!--l. 902--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.12 </span> <a
id="x8-380003.2.12"></a>clean_zeros &#8212; Eliminate zero coefficients</h5>
id="x9-390003.2.12"></a>clean_zeros &#8212; Eliminate zero coefficients</h5>
<!--l. 904--><p class="noindent" ><code class="lstinline"><span style="color:#000000">call</span><span style="color:#000000"> </span><span style="color:#000000">a</span><span style="color:#000000">%</span><span style="color:#000000">clean_zeros</span><span style="color:#000000">(</span><span style="color:#000000">info</span><span style="color:#000000">)</span></code>
<!--l. 906--><p class="indent" > Eliminates zero coefficients explicitly stored in the input matrix.
<!--l. 908--><p class="indent" >
@ -1765,17 +1765,17 @@ class="description">
class="pplb7t-">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x8-38002x1">
class="enumerate" id="x9-39002x1">
<!--l. 923--><p class="noindent" >Depending on the internal storage format, there may still be some
amount of zero padding in the output.
</li>
<li
class="enumerate" id="x8-38004x2">
class="enumerate" id="x9-39004x2">
<!--l. 925--><p class="noindent" >Any explicit zeros on the main diagonal are always kept in the data
structure.</li></ol>
<!--l. 929--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.13 </span> <a
id="x8-390003.2.13"></a>get_diag &#8212; Get main diagonal</h5>
id="x9-400003.2.13"></a>get_diag &#8212; Get main diagonal</h5>
<!--l. 931--><p class="noindent" ><code class="lstinline"><span style="color:#000000">call</span><span style="color:#000000"> </span><span style="color:#000000">a</span><span style="color:#000000">%</span><span style="color:#000000">get_diag</span><span style="color:#000000">(</span><span style="color:#000000">d</span><span style="color:#000000">,</span><span style="color:#000000">info</span><span style="color:#000000">)</span></code>
<!--l. 933--><p class="indent" > Returns a copy of the main diagonal.
<dl class="description"><dt class="description">
@ -1825,7 +1825,7 @@ class="description">
<!--l. 945--><p class="noindent" >Return code.</dd></dl>
<!--l. 949--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.14 </span> <a
id="x8-400003.2.14"></a>clip_diag &#8212; Cut out main diagonal</h5>
id="x9-410003.2.14"></a>clip_diag &#8212; Cut out main diagonal</h5>
<!--l. 951--><p class="noindent" ><code class="lstinline"><span style="color:#000000">call</span><span style="color:#000000"> </span><span style="color:#000000">a</span><span style="color:#000000">%</span><span style="color:#000000">clip_diag</span><span style="color:#000000">(</span><span style="color:#000000">b</span><span style="color:#000000">,</span><span style="color:#000000">info</span><span style="color:#000000">)</span></code>
<!--l. 953--><p class="indent" > Returns a copy of <code class="lstinline"><span style="color:#000000">a</span></code> without the main diagonal.
<dl class="description"><dt class="description">
@ -1875,7 +1875,7 @@ class="description">
<!--l. 965--><p class="noindent" >Return code.</dd></dl>
<!--l. 969--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.15 </span> <a
id="x8-410003.2.15"></a>tril &#8212; Return the lower triangle</h5>
id="x9-420003.2.15"></a>tril &#8212; Return the lower triangle</h5>
@ -1961,7 +1961,7 @@ class="description">
<!--l. 1008--><p class="noindent" >Return code.</dd></dl>
<!--l. 1011--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.16 </span> <a
id="x8-420003.2.16"></a>triu &#8212; Return the upper triangle</h5>
id="x9-430003.2.16"></a>triu &#8212; Return the upper triangle</h5>
@ -2047,7 +2047,7 @@ class="description">
<!--l. 1049--><p class="noindent" >Return code.</dd></dl>
<!--l. 1055--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.17 </span> <a
id="x8-430003.2.17"></a>psb_set_mat_default &#8212; Set default storage format</h5>
id="x9-440003.2.17"></a>psb_set_mat_default &#8212; Set default storage format</h5>
<!--l. 1059--><p class="noindent" ><code class="lstinline"><span style="color:#000000">call</span><span style="color:#000000"> </span><span style="color:#000000">psb_set_mat_default</span><span style="color:#000000">(</span><span style="color:#000000">a</span><span style="color:#000000">)</span></code>
<!--l. 1061--><p class="indent" >
<dl class="description"><dt class="description">
@ -2075,7 +2075,7 @@ class="newline" />Type: required.</dd></dl>
<!--l. 1073--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.18 </span> <a
id="x8-440003.2.18"></a>clone &#8212; Clone current object</h5>
id="x9-450003.2.18"></a>clone &#8212; Clone current object</h5>
<!--l. 1076--><p class="noindent" ><code class="lstinline"><span style="color:#000000">call</span><span style="color:#000000"> </span><span style="color:#000000">a</span><span style="color:#000000">%</span><span style="color:#000000">clone</span><span style="color:#000000">(</span><span style="color:#000000">b</span><span style="color:#000000">,</span><span style="color:#000000">info</span><span style="color:#000000">)</span></code>
<!--l. 1078--><p class="indent" >
<dl class="description"><dt class="description">
@ -2120,7 +2120,7 @@ class="description">
<!--l. 1091--><p class="noindent" >Return code.</dd></dl>
<!--l. 1095--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.2.19 </span> <a
id="x8-450003.2.19"></a>Named Constants</h5>
id="x9-460003.2.19"></a>Named Constants</h5>
<!--l. 1097--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 1102--><p class="noindent" >
@ -2182,7 +2182,7 @@ class="description">
description).</dd></dl>
<!--l. 1113--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">3.3 </span> <a
id="x8-460003.3"></a>Dense Vector Data Structure</h4>
id="x9-470003.3"></a>Dense Vector Data Structure</h4>
<!--l. 1115--><p class="noindent" >The <a
id="vdata"></a><span
class="cmtt-10">psb</span><span
@ -2198,11 +2198,11 @@ class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span>. The user will
not, in general, access the vector components directly, but rather via the routines of
sec.&#x00A0;<a
href="userhtmlse6.html#x11-770006">6<!--tex4ht:ref: sec:toolsrout --></a>. Among other simple things, we define here an extraction method that
href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>. Among other simple things, we define here an extraction method that
can be used to get a full copy of the part of the vector stored on the local
process.
<!--l. 1125--><p class="indent" > The type declaration is shown in figure&#x00A0;<a
href="#x8-46001r3">3<!--tex4ht:ref: fig:vectype --></a> where <code class="lstinline"><span style="color:#000000">T</span></code> is a placeholder for the data
href="#x9-47001r3">3<!--tex4ht:ref: fig:vectype --></a> where <code class="lstinline"><span style="color:#000000">T</span></code> is a placeholder for the data
type and precision variants
<dl class="description"><dt class="description">
<!--l. 1129--><p class="noindent" >
@ -2243,7 +2243,7 @@ to data storage made available elsewhere outside the direct control of the
compiler/application, e.g. data stored in a graphics accelerator&#8217;s private
memory.
<!--l. 1141--><p class="indent" > <a
id="x8-46001r3"></a><hr class="float"><div class="float"
id="x9-47001r3"></a><hr class="float"><div class="float"
>
@ -2264,16 +2264,16 @@ memory.
<!--l. 1166--><p class="nopar" > </div></div>
<br /><div class="caption"
><span class="id">Listing 3: </span><span
class="content"> The PSBLAS defined data type that contains a dense vector.</span></div><!--tex4ht:label?: x8-46001r3 -->
class="content"> The PSBLAS defined data type that contains a dense vector.</span></div><!--tex4ht:label?: x9-47001r3 -->
</div><hr class="endfloat" />
<h5 class="subsubsectionHead"><span class="titlemark">3.3.1 </span> <a
id="x8-470003.3.1"></a>Vector Methods</h5>
id="x9-480003.3.1"></a>Vector Methods</h5>
<!--l. 1181--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.3.2 </span> <a
id="x8-480003.3.2"></a>get_nrows &#8212; Get number of rows in a dense vector</h5>
id="x9-490003.3.2"></a>get_nrows &#8212; Get number of rows in a dense vector</h5>
<!--l. 1184--><p class="noindent" ><code class="lstinline"><span style="color:#000000">nr</span><span style="color:#000000"> </span><span style="color:#000000">=</span><span style="color:#000000"> </span><span style="color:#000000">v</span><span style="color:#000000">%</span><span style="color:#000000">get_nrows</span><span style="color:#000000">()</span></code>
<!--l. 1186--><p class="indent" >
<dl class="description"><dt class="description">
@ -2312,7 +2312,7 @@ class="description">
<!--l. 1198--><p class="noindent" >The number of rows of dense vector <code class="lstinline"><span style="color:#000000">v</span></code>.</dd></dl>
<!--l. 1203--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.3.3 </span> <a
id="x8-490003.3.3"></a>sizeof &#8212; Get memory occupation in bytes of a dense vector</h5>
id="x9-500003.3.3"></a>sizeof &#8212; Get memory occupation in bytes of a dense vector</h5>
<!--l. 1206--><p class="noindent" ><code class="lstinline"><span style="color:#000000">memory_size</span><span style="color:#000000"> </span><span style="color:#000000">=</span><span style="color:#000000"> </span><span style="color:#000000">v</span><span style="color:#000000">%</span><span style="color:#000000">sizeof</span><span style="color:#000000">()</span></code>
<!--l. 1208--><p class="indent" >
@ -2354,7 +2354,7 @@ class="description">
<!--l. 1220--><p class="noindent" >The memory occupation in bytes.</dd></dl>
<!--l. 1224--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.3.4 </span> <a
id="x8-500003.3.4"></a>set &#8212; Set contents of the vector</h5>
id="x9-510003.3.4"></a>set &#8212; Set contents of the vector</h5>
@ -2399,7 +2399,7 @@ class="pplb7t-">required </span><br
class="newline" />Intent: <span
class="pplb7t-">in</span>.<br
class="newline" />Specified as: a number of the data type indicated in Table&#x00A0;<a
href="userhtmlse4.html#x9-56001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.
href="userhtmlse4.html#x10-57001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.
</dd><dt class="description">
<!--l. 1253--><p class="noindent" >
<span
@ -2429,7 +2429,7 @@ class="pplb7t-">required </span><br
class="newline" />Intent: <span
class="pplb7t-">in</span>.<br
class="newline" />Specified as: a number of the data type indicated in Table&#x00A0;<a
href="userhtmlse4.html#x9-56001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.</dd></dl>
href="userhtmlse4.html#x10-57001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.</dd></dl>
<!--l. 1260--><p class="noindent" >Note that a call to <code class="lstinline"><span style="color:#000000">v</span><span style="color:#000000">%</span><span style="color:#000000">zero</span><span style="color:#000000">()</span></code> is provided as a shorthand, but is equivalent to a call
to <code class="lstinline"><span style="color:#000000">v</span><span style="color:#000000">%</span><span style="color:#000000">set</span><span style="color:#000000">(</span><span style="color:#000000">zero</span><span style="color:#000000">)</span></code> with the <code class="lstinline"><span style="color:#000000">zero</span></code> constant having the appropriate type and
kind.
@ -2451,7 +2451,7 @@ class="pplb7t-">local</span><br
class="newline" /></dd></dl>
<!--l. 1270--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.3.5 </span> <a
id="x8-510003.3.5"></a>get_vect &#8212; Get a copy of the vector contents</h5>
id="x9-520003.3.5"></a>get_vect &#8212; Get a copy of the vector contents</h5>
@ -2518,7 +2518,7 @@ class="zplmr7m-">n </span>is
<!--l. 1300--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">3.3.6 </span> <a
id="x8-520003.3.6"></a>clone &#8212; Clone current object</h5>
id="x9-530003.3.6"></a>clone &#8212; Clone current object</h5>
@ -2572,7 +2572,7 @@ class="description">
<!--l. 1323--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">3.4 </span> <a
id="x8-530003.4"></a>Preconditioner data structure</h4>
id="x9-540003.4"></a>Preconditioner data structure</h4>
<!--l. 1325--><p class="noindent" >Our base library offers support for simple well known preconditioners like Diagonal
Scaling or Block Jacobi with incomplete factorization ILU(0).
<!--l. 1329--><p class="indent" > A preconditioner is held in the <a
@ -2581,7 +2581,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_Tprec</span><span
class="cmtt-10">_type </span>data structure reported in
figure&#x00A0;<a
href="#x8-53001r4">4<!--tex4ht:ref: fig:prectype --></a>. The <code class="lstinline"><span style="color:#000000">psb_Tprec_type</span></code> data type may contain a simple preconditioning
href="#x9-54001r4">4<!--tex4ht:ref: fig:prectype --></a>. The <code class="lstinline"><span style="color:#000000">psb_Tprec_type</span></code> data type may contain a simple preconditioning
matrix with the associated communication descriptor. The internal preconditioner is
allocated appropriately with the dynamic type corresponding to the desired
preconditioner.
@ -2589,7 +2589,7 @@ preconditioner.
<!--l. 1348--><p class="indent" > <a
id="x8-53001r4"></a><hr class="float"><div class="float"
id="x9-54001r4"></a><hr class="float"><div class="float"
>
@ -2607,15 +2607,15 @@ preconditioner.
<!--l. 1366--><p class="nopar" > </div></div>
<br /> <div class="caption"
><span class="id">Listing 4: </span><span
class="content">The PSBLAS defined data type that contains a preconditioner.</span></div><!--tex4ht:label?: x8-53001r4 -->
class="content">The PSBLAS defined data type that contains a preconditioner.</span></div><!--tex4ht:label?: x9-54001r4 -->
</div><hr class="endfloat" />
<h4 class="subsectionHead"><span class="titlemark">3.5 </span> <a
id="x8-540003.5"></a>Heap data structure</h4>
id="x9-550003.5"></a>Heap data structure</h4>
<!--l. 1402--><p class="noindent" >Among the tools routines of sec.&#x00A0;<a
href="userhtmlse6.html#x11-770006">6<!--tex4ht:ref: sec:toolsrout --></a>, we have a number of sorting utilities; the heap
href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>, we have a number of sorting utilities; the heap
sort is implemented in terms of heaps having the following signatures:
<dl class="description"><dt class="description">
<!--l. 1406--><p class="noindent" >

@ -17,12 +17,12 @@ href="userhtmlse3.html#tailuserhtmlse3.html" >prev-tail</a>] [<a
href="#tailuserhtmlse4.html">tail</a>] [<a
href="userhtml.html#userhtmlse7.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">4 </span> <a
id="x9-550004"></a>Computational routines</h3>
id="x10-560004"></a>Computational routines</h3>
<h4 class="subsectionHead"><span class="titlemark">4.1 </span> <a
id="x9-560004.1"></a>psb_geaxpby &#8212; General Dense Matrix Sum</h4>
id="x10-570004.1"></a>psb_geaxpby &#8212; General Dense Matrix Sum</h4>
<!--l. 10--><p class="noindent" >This subroutine is an interface to the computational kernel for dense matrix
sum:
<div class="math-display" >
@ -36,7 +36,7 @@ src="userhtml1x.png" alt="y &#x2190; &#x03B1; x+ &#x03B2;y
<!--l. 23--><p class="indent" > <a
id="x9-56001r1"></a><hr class="float"><div class="float"
id="x10-57001r1"></a><hr class="float"><div class="float"
>
@ -81,7 +81,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;1: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-56001r1 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-57001r1 -->
@ -113,7 +113,7 @@ class="pplb7t-">required </span><br
class="newline" />Intent: <span
class="pplb7t-">in</span>.<br
class="newline" />Specified as: a number of the data type indicated in Table&#x00A0;<a
href="#x9-56001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.
href="#x10-57001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.
</dd><dt class="description">
<!--l. 45--><p class="noindent" >
<span
@ -134,7 +134,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-56001r1">1<!--tex4ht:ref: tab:f90axpby --></a>. The
href="#x10-57001r1">1<!--tex4ht:ref: tab:f90axpby --></a>. The
rank of <span
class="zplmr7m-">x </span>must be the same of <span
class="zplmr7m-">y</span>.
@ -152,7 +152,7 @@ class="pplb7t-">required </span><br
class="newline" />Intent: <span
class="pplb7t-">in</span>.<br
class="newline" />Specified as: a number of the data type indicated in Table&#x00A0;<a
href="#x9-56001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.
href="#x10-57001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.
</dd><dt class="description">
<!--l. 58--><p class="noindent" >
<span
@ -173,7 +173,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of the type indicated in Table&#x00A0;<a
href="#x9-56001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.
href="#x10-57001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.
The rank of <span
class="zplmr7m-">y </span>must be the same of <span
class="zplmr7m-">x</span>.
@ -222,7 +222,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of the type indicated in Table&#x00A0;<a
href="#x9-56001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.
href="#x10-57001r1">1<!--tex4ht:ref: tab:f90axpby --></a>.
</dd><dt class="description">
<!--l. 98--><p class="noindent" >
<span
@ -240,7 +240,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.2 </span> <a
id="x9-570004.2"></a>psb_gedot &#8212; Dot Product</h4>
id="x10-580004.2"></a>psb_gedot &#8212; Dot Product</h4>
<!--l. 113--><p class="noindent" >This function computes dot product between two vectors <span
class="zplmr7m-">x </span>and <span
class="zplmr7m-">y</span>.<br
@ -264,7 +264,7 @@ src="userhtml3x.png" alt="dot &#x2190; xHy
<!--l. 128--><p class="indent" > <a
id="x9-57001r2"></a><hr class="float"><div class="float"
id="x10-58001r2"></a><hr class="float"><div class="float"
>
@ -308,7 +308,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;2: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-57001r2 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-58001r2 -->
@ -346,7 +346,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-57001r2">2<!--tex4ht:ref: tab:f90dot --></a>. The
href="#x10-58001r2">2<!--tex4ht:ref: tab:f90dot --></a>. The
rank of <span
class="zplmr7m-">x </span>must be the same of <span
class="zplmr7m-">y</span>.
@ -370,7 +370,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-57001r2">2<!--tex4ht:ref: tab:f90dot --></a>. The
href="#x10-58001r2">2<!--tex4ht:ref: tab:f90dot --></a>. The
rank of <span
class="zplmr7m-">y </span>must be the same of <span
class="zplmr7m-">x</span>.
@ -428,7 +428,7 @@ class="newline" />Scope: <span
class="pplb7t-">global </span>unless the optional variable <code class="lstinline"><span style="color:#000000">global</span><span style="color:#000000">=.</span><span style="color:#000000">false</span><span style="color:#000000">.</span></code> has been
specified<br
class="newline" />Specified as: a number of the data type indicated in Table&#x00A0;<a
href="#x9-57001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
href="#x10-58001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
</dd><dt class="description">
<!--l. 196--><p class="noindent" >
<span
@ -446,14 +446,14 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x9-57003x1">
class="enumerate" id="x10-58003x1">
<!--l. 205--><p class="noindent" >The computation of a global result requires a global communication, which
entails a significant overhead. It may be necessary and/or advisable
to compute multiple dot products at the same time; in this case, it is
possible to improve the runtime efficiency by using the following scheme:
<!--l. 218-->
<pre class="lstlisting" id="listing-103"><span class="label"><a
id="x9-57004r1"></a></span><span
id="x10-58004r1"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -475,7 +475,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-57005r2"></a></span><span
id="x10-58005r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -497,7 +497,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-57006r3"></a></span><span
id="x10-58006r3"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -519,7 +519,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-57007r4"></a></span><span
id="x10-58007r4"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -537,7 +537,7 @@ class="cmtt-10">(1:3))</span></span></pre>
<h4 class="subsectionHead"><span class="titlemark">4.3 </span> <a
id="x9-580004.3"></a>psb_gedots &#8212; Generalized Dot Product</h4>
id="x10-590004.3"></a>psb_gedots &#8212; Generalized Dot Product</h4>
<!--l. 237--><p class="noindent" >This subroutine computes a series of dot products among the columns of two dense
matrices <span
class="zplmr7m-">x </span>and <span
@ -559,7 +559,7 @@ one array.
<!--l. 247--><p class="indent" > <a
id="x9-58001r3"></a><hr class="float"><div class="float"
id="x10-59001r3"></a><hr class="float"><div class="float"
>
@ -603,7 +603,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;3: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-58001r3 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-59001r3 -->
@ -641,7 +641,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-58001r3">3<!--tex4ht:ref: tab:f90mdot --></a>. The
href="#x10-59001r3">3<!--tex4ht:ref: tab:f90mdot --></a>. The
rank of <span
class="zplmr7m-">x </span>must be the same of <span
class="zplmr7m-">y</span>.
@ -665,7 +665,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-58001r3">3<!--tex4ht:ref: tab:f90mdot --></a>. The
href="#x10-59001r3">3<!--tex4ht:ref: tab:f90mdot --></a>. The
rank of <span
class="zplmr7m-">y </span>must be the same of <span
class="zplmr7m-">x</span>.
@ -710,7 +710,7 @@ class="newline" />Intent: <span
class="pplb7t-">out</span>.<br
class="newline" />Specified as: a number or a rank-one array of the data type indicated in
Table&#x00A0;<a
href="#x9-57001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
href="#x10-58001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
</dd><dt class="description">
<!--l. 293--><p class="noindent" >
<span
@ -728,7 +728,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.4 </span> <a
id="x9-590004.4"></a>psb_normi &#8212; Infinity-Norm of Vector</h4>
id="x10-600004.4"></a>psb_normi &#8212; Infinity-Norm of Vector</h4>
<!--l. 309--><p class="noindent" >This function computes the infinity-norm of a vector <span
class="zplmr7m-">x</span>.<br
class="newline" />If <span
@ -752,7 +752,7 @@ class="newline" /><code class="lstinline"><span style="color:#000000">psb_normi<
<!--l. 325--><p class="indent" > <a
id="x9-59001r4"></a><hr class="float"><div class="float"
id="x10-60001r4"></a><hr class="float"><div class="float"
>
@ -801,7 +801,7 @@ class="hline"><td><hr></td><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;4: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-59001r4 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-60001r4 -->
@ -839,7 +839,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-59001r4">4<!--tex4ht:ref: tab:f90amax --></a>.
href="#x10-60001r4">4<!--tex4ht:ref: tab:f90amax --></a>.
</dd><dt class="description">
<!--l. 355--><p class="noindent" >
<span
@ -910,13 +910,13 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x9-59003x1">
class="enumerate" id="x10-60003x1">
<!--l. 387--><p class="noindent" >The computation of a global result requires a global communication, which
entails a significant overhead. It may be necessary and/or advisable to
compute multiple norms at the same time; in this case, it is possible to improve
the runtime efficiency by using the following scheme: <!--l. 400-->
<pre class="lstlisting" id="listing-109"><span class="label"><a
id="x9-59004r1"></a></span><span
id="x10-60004r1"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -936,7 +936,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-59005r2"></a></span><span
id="x10-60005r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -956,7 +956,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-59006r3"></a></span><span
id="x10-60006r3"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -976,7 +976,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-59007r4"></a></span><span
id="x10-60007r4"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -994,7 +994,7 @@ class="cmtt-10">(1:3))</span></span></pre>
<h4 class="subsectionHead"><span class="titlemark">4.5 </span> <a
id="x9-600004.5"></a>psb_geamaxs &#8212; Generalized Infinity Norm</h4>
id="x10-610004.5"></a>psb_geamaxs &#8212; Generalized Infinity Norm</h4>
<!--l. 419--><p class="noindent" >This subroutine computes a series of infinity norms on the columns of a dense matrix
<span
class="zplmr7m-">x</span>:
@ -1010,7 +1010,7 @@ src="userhtml7x.png" alt="res(i) &#x2190; m ax|x(k,i)|
<!--l. 425--><p class="indent" > <a
id="x9-60001r5"></a><hr class="float"><div class="float"
id="x10-61001r5"></a><hr class="float"><div class="float"
>
@ -1059,7 +1059,7 @@ class="hline"><td><hr></td><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;5: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-60001r5 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-61001r5 -->
@ -1097,7 +1097,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-60001r5">5<!--tex4ht:ref: tab:f90mamax --></a>.
href="#x10-61001r5">5<!--tex4ht:ref: tab:f90mamax --></a>.
</dd><dt class="description">
<!--l. 452--><p class="noindent" >
<span
@ -1154,7 +1154,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.6 </span> <a
id="x9-610004.6"></a>psb_norm1 &#8212; 1-Norm of Vector</h4>
id="x10-620004.6"></a>psb_norm1 &#8212; 1-Norm of Vector</h4>
<!--l. 478--><p class="noindent" >This function computes the 1-norm of a vector <span
class="zplmr7m-">x</span>.<br
class="newline" />If <span
@ -1176,7 +1176,7 @@ src="userhtml9x.png" alt="asum &#x2190; &#x2225;re(x)&#x2225;1+ &#x2225;im(x)&#
<!--l. 489--><p class="indent" > <a
id="x9-61001r6"></a><hr class="float"><div class="float"
id="x10-62001r6"></a><hr class="float"><div class="float"
>
@ -1225,7 +1225,7 @@ class="hline"><td><hr></td><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;6: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-61001r6 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-62001r6 -->
@ -1263,7 +1263,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-61001r6">6<!--tex4ht:ref: tab:f90asum --></a>.
href="#x10-62001r6">6<!--tex4ht:ref: tab:f90asum --></a>.
</dd><dt class="description">
<!--l. 518--><p class="noindent" >
<span
@ -1334,13 +1334,13 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x9-61003x1">
class="enumerate" id="x10-62003x1">
<!--l. 545--><p class="noindent" >The computation of a global result requires a global communication, which
entails a significant overhead. It may be necessary and/or advisable to
compute multiple norms at the same time; in this case, it is possible to improve
the runtime efficiency by using the following scheme: <!--l. 558-->
<pre class="lstlisting" id="listing-115"><span class="label"><a
id="x9-61004r1"></a></span><span
id="x10-62004r1"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -1360,7 +1360,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-61005r2"></a></span><span
id="x10-62005r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -1380,7 +1380,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-61006r3"></a></span><span
id="x10-62006r3"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -1400,7 +1400,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-61007r4"></a></span><span
id="x10-62007r4"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -1418,7 +1418,7 @@ class="cmtt-10">(1:3))</span></span></pre>
<h4 class="subsectionHead"><span class="titlemark">4.7 </span> <a
id="x9-620004.7"></a>psb_geasums &#8212; Generalized 1-Norm of Vector</h4>
id="x10-630004.7"></a>psb_geasums &#8212; Generalized 1-Norm of Vector</h4>
<!--l. 572--><p class="noindent" >This subroutine computes a series of 1-norms on the columns of a dense matrix
<span
class="zplmr7m-">x</span>:
@ -1449,7 +1449,7 @@ src="userhtml12x.png" alt="res(i) &#x2190; &#x2225;re(x)&#x2225; + &#x2225;im(x)
<!--l. 585--><p class="indent" > <a
id="x9-62001r7"></a><hr class="float"><div class="float"
id="x10-63001r7"></a><hr class="float"><div class="float"
>
@ -1498,7 +1498,7 @@ class="hline"><td><hr></td><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;7: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-62001r7 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-63001r7 -->
@ -1536,7 +1536,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-62001r7">7<!--tex4ht:ref: tab:f90asums --></a>.
href="#x10-63001r7">7<!--tex4ht:ref: tab:f90asums --></a>.
</dd><dt class="description">
<!--l. 614--><p class="noindent" >
<span
@ -1594,7 +1594,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.8 </span> <a
id="x9-630004.8"></a>psb_norm2 &#8212; 2-Norm of Vector</h4>
id="x10-640004.8"></a>psb_norm2 &#8212; 2-Norm of Vector</h4>
<!--l. 643--><p class="noindent" >This function computes the 2-norm of a vector <span
class="zplmr7m-">x</span>.<br
class="newline" />If <span
@ -1617,7 +1617,7 @@ nrm 2 &#x2190; xHx
<!--l. 654--><p class="indent" > <a
id="x9-63001r8"></a><hr class="float"><div class="float"
id="x10-64001r8"></a><hr class="float"><div class="float"
>
@ -1666,7 +1666,7 @@ class="hline"><td><hr></td><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;8: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-63001r8 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-64001r8 -->
@ -1707,7 +1707,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-63001r8">8<!--tex4ht:ref: tab:f90nrm2 --></a>.
href="#x10-64001r8">8<!--tex4ht:ref: tab:f90nrm2 --></a>.
</dd><dt class="description">
<!--l. 687--><p class="noindent" >
<span
@ -1780,13 +1780,13 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x9-63003x1">
class="enumerate" id="x10-64003x1">
<!--l. 720--><p class="noindent" >The computation of a global result requires a global communication, which
entails a significant overhead. It may be necessary and/or advisable to
compute multiple norms at the same time; in this case, it is possible to improve
the runtime efficiency by using the following scheme: <!--l. 725-->
<pre class="lstlisting" id="listing-121"><span class="label"><a
id="x9-63004r1"></a></span><span
id="x10-64004r1"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -1806,7 +1806,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-63005r2"></a></span><span
id="x10-64005r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -1826,7 +1826,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-63006r3"></a></span><span
id="x10-64006r3"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -1846,7 +1846,7 @@ class="cmtt-10">=.</span></span><span style="color:#000000"><span
class="cmtt-10">false</span></span><span style="color:#000000"><span
class="cmtt-10">.)</span></span>
<span class="label"><a
id="x9-63007r4"></a></span><span
id="x10-64007r4"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
@ -1864,7 +1864,7 @@ class="cmtt-10">(1:3))</span></span></pre>
<h4 class="subsectionHead"><span class="titlemark">4.9 </span> <a
id="x9-640004.9"></a>psb_genrm2s &#8212; Generalized 2-Norm of Vector</h4>
id="x10-650004.9"></a>psb_genrm2s &#8212; Generalized 2-Norm of Vector</h4>
<!--l. 739--><p class="noindent" >This subroutine computes a series of 2-norms on the columns of a dense matrix
<span
class="zplmr7m-">x</span>:
@ -1879,7 +1879,7 @@ src="userhtml15x.png" alt="res(i) &#x2190; &#x2225;x(:,i)&#x2225;2
<!--l. 746--><p class="indent" > <a
id="x9-64001r9"></a><hr class="float"><div class="float"
id="x10-65001r9"></a><hr class="float"><div class="float"
>
@ -1928,7 +1928,7 @@ class="hline"><td><hr></td><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;9: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-64001r9 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-65001r9 -->
@ -1966,7 +1966,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-64001r9">9<!--tex4ht:ref: tab:f90nrm2s --></a>.
href="#x10-65001r9">9<!--tex4ht:ref: tab:f90nrm2s --></a>.
</dd><dt class="description">
<!--l. 775--><p class="noindent" >
<span
@ -2023,7 +2023,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.10 </span> <a
id="x9-650004.10"></a>psb_norm1 &#8212; 1-Norm of Sparse Matrix</h4>
id="x10-660004.10"></a>psb_norm1 &#8212; 1-Norm of Sparse Matrix</h4>
<!--l. 804--><p class="noindent" >This function computes the 1-norm of a matrix <span
class="zplmr7m-">A</span>:<br
class="newline" />
@ -2044,7 +2044,7 @@ class="zplmr7m-">A</span></dd></dl>
<!--l. 812--><p class="indent" > <a
id="x9-65001r10"></a><hr class="float"><div class="float"
id="x10-66001r10"></a><hr class="float"><div class="float"
>
@ -2086,7 +2086,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;10: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-65001r10 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-66001r10 -->
@ -2184,7 +2184,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.11 </span> <a
id="x9-660004.11"></a>psb_normi &#8212; Infinity Norm of Sparse Matrix</h4>
id="x10-670004.11"></a>psb_normi &#8212; Infinity Norm of Sparse Matrix</h4>
<!--l. 868--><p class="noindent" >This function computes the infinity-norm of a matrix <span
class="zplmr7m-">A</span>:<br
class="newline" />
@ -2205,7 +2205,7 @@ class="zplmr7m-">A</span></dd></dl>
<!--l. 876--><p class="indent" > <a
id="x9-66001r11"></a><hr class="float"><div class="float"
id="x10-67001r11"></a><hr class="float"><div class="float"
>
@ -2247,7 +2247,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;11: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-66001r11 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-67001r11 -->
@ -2345,7 +2345,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.12 </span> <a
id="x9-670004.12"></a>psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</h4>
id="x10-680004.12"></a>psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</h4>
<!--l. 933--><p class="noindent" >This subroutine computes the Sparse Matrix by Dense Matrix Product:
<table
class="equation"><tr><td>
@ -2353,7 +2353,7 @@ class="equation"><tr><td>
<img
src="userhtml18x.png" alt="y &#x2190; &#x03B1;Ax + &#x03B2;y
" class="math-display" ><a
id="x9-67001r1"></a></div>
id="x10-68001r1"></a></div>
</td><td class="equation-label">(1)</td></tr></table>
<!--l. 938--><p class="nopar" >
<table
@ -2363,7 +2363,7 @@ class="equation"><tr><td>
src="userhtml19x.png" alt=" T
y &#x2190; &#x03B1;A x+ &#x03B2;y
" class="math-display" ><a
id="x9-67002r2"></a></div>
id="x10-68002r2"></a></div>
</td><td class="equation-label">(2)</td></tr></table>
<!--l. 942--><p class="nopar" >
<table
@ -2372,7 +2372,7 @@ class="equation"><tr><td>
<img
src="userhtml20x.png" alt="y &#x2190; &#x03B1;AHx + &#x03B2;y
" class="math-display" ><a
id="x9-67003r3"></a></div>
id="x10-68003r3"></a></div>
</td><td class="equation-label">(3)</td></tr></table>
<!--l. 946--><p class="nopar" >
<!--l. 948--><p class="indent" > where:
@ -2411,7 +2411,7 @@ class="zplmr7m-">A</span></dd></dl>
<!--l. 955--><p class="indent" > <a
id="x9-67004r12"></a><hr class="float"><div class="float"
id="x10-68004r12"></a><hr class="float"><div class="float"
>
@ -2457,7 +2457,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;12: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-67004r12 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-68004r12 -->
@ -2491,7 +2491,7 @@ class="pplb7t-">required</span><br
class="newline" />Intent: <span
class="pplb7t-">in</span>.<br
class="newline" />Specified as: a number of the data type indicated in Table&#x00A0;<a
href="#x9-67004r12">12<!--tex4ht:ref: tab:f90spmm --></a>.
href="#x10-68004r12">12<!--tex4ht:ref: tab:f90spmm --></a>.
</dd><dt class="description">
<!--l. 983--><p class="noindent" >
<span
@ -2530,7 +2530,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-67004r12">12<!--tex4ht:ref: tab:f90spmm --></a>. The
href="#x10-68004r12">12<!--tex4ht:ref: tab:f90spmm --></a>. The
rank of <span
class="zplmr7m-">x </span>must be the same of <span
class="zplmr7m-">y</span>.
@ -2551,7 +2551,7 @@ class="pplb7t-">required </span><br
class="newline" />Intent: <span
class="pplb7t-">in</span>.<br
class="newline" />Specified as: a number of the data type indicated in Table&#x00A0;<a
href="#x9-67004r12">12<!--tex4ht:ref: tab:f90spmm --></a>.
href="#x10-68004r12">12<!--tex4ht:ref: tab:f90spmm --></a>.
</dd><dt class="description">
<!--l. 1004--><p class="noindent" >
<span
@ -2572,7 +2572,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-67004r12">12<!--tex4ht:ref: tab:f90spmm --></a>. The
href="#x10-68004r12">12<!--tex4ht:ref: tab:f90spmm --></a>. The
rank of <span
class="zplmr7m-">y </span>must be the same of <span
class="zplmr7m-">x</span>.
@ -2606,21 +2606,21 @@ class="description">
class="pplb7t-">trans = N</span> </dt><dd
class="description">
<!--l. 1021--><p class="noindent" >the operation is specified by equation <a
href="#x9-67001r1">1<!--tex4ht:ref: eq:f90spmm_no_tra --></a>
href="#x10-68001r1">1<!--tex4ht:ref: eq:f90spmm_no_tra --></a>
</dd><dt class="description">
<!--l. 1022--><p class="noindent" >
<span
class="pplb7t-">trans = T</span> </dt><dd
class="description">
<!--l. 1022--><p class="noindent" >the operation is specified by equation <a
href="#x9-67002r2">2<!--tex4ht:ref: eq:f90spmm_tra --></a>
href="#x10-68002r2">2<!--tex4ht:ref: eq:f90spmm_tra --></a>
</dd><dt class="description">
<!--l. 1024--><p class="noindent" >
<span
class="pplb7t-">trans = C</span> </dt><dd
class="description">
<!--l. 1024--><p class="noindent" >the operation is specified by equation <a
href="#x9-67003r3">3<!--tex4ht:ref: eq:f90spmm_con --></a></dd></dl>
href="#x10-68003r3">3<!--tex4ht:ref: eq:f90spmm_con --></a></dd></dl>
<!--l. 1027--><p class="noindent" >Scope: <span
class="pplb7t-">global </span><br
class="newline" />Type: <span
@ -2672,7 +2672,7 @@ class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />Specified as: an array of rank one or two containing numbers of type specified
in Table&#x00A0;<a
href="#x9-67004r12">12<!--tex4ht:ref: tab:f90spmm --></a>.
href="#x10-68004r12">12<!--tex4ht:ref: tab:f90spmm --></a>.
</dd><dt class="description">
<!--l. 1065--><p class="noindent" >
<span
@ -2690,7 +2690,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.13 </span> <a
id="x9-680004.13"></a>psb_spsm &#8212; Triangular System Solve</h4>
id="x10-690004.13"></a>psb_spsm &#8212; Triangular System Solve</h4>
<!--l. 1081--><p class="noindent" >This subroutine computes the Triangular System Solve:
<div class="eqnarray">
<div class="math-display" >
@ -2754,7 +2754,7 @@ class="newline" />
<!--l. 1107--><p class="indent" > <a
id="x9-68002r13"></a><hr class="float"><div class="float"
id="x10-69002r13"></a><hr class="float"><div class="float"
>
@ -2801,7 +2801,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;13: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-68002r13 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-69002r13 -->
@ -2833,7 +2833,7 @@ class="pplb7t-">required</span><br
class="newline" />Intent: <span
class="pplb7t-">in</span>.<br
class="newline" />Specified as: a number of the data type indicated in Table&#x00A0;<a
href="#x9-68002r13">13<!--tex4ht:ref: tab:f90spsm --></a>.
href="#x10-69002r13">13<!--tex4ht:ref: tab:f90spsm --></a>.
</dd><dt class="description">
<!--l. 1134--><p class="noindent" >
<span
@ -2849,7 +2849,7 @@ class="newline" />Intent: <span
class="pplb7t-">in</span>.<br
class="newline" />Specified as: an object type specified in <span
class="pplr8c-">§</span>&#x00A0;<a
href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a>.
href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a>.
</dd><dt class="description">
<!--l. 1141--><p class="noindent" >
<span
@ -2870,7 +2870,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-68002r13">13<!--tex4ht:ref: tab:f90spsm --></a>. The
href="#x10-69002r13">13<!--tex4ht:ref: tab:f90spsm --></a>. The
rank of <span
class="zplmr7m-">x </span>must be the same of <span
class="zplmr7m-">y</span>.
@ -2888,7 +2888,7 @@ class="pplb7t-">required </span><br
class="newline" />Intent: <span
class="pplb7t-">in</span>.<br
class="newline" />Specified as: a number of the data type indicated in Table&#x00A0;<a
href="#x9-68002r13">13<!--tex4ht:ref: tab:f90spsm --></a>.
href="#x10-69002r13">13<!--tex4ht:ref: tab:f90spsm --></a>.
@ -2912,7 +2912,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x9-68002r13">13<!--tex4ht:ref: tab:f90spsm --></a>. The
href="#x10-69002r13">13<!--tex4ht:ref: tab:f90spsm --></a>. The
rank of <span
class="zplmr7m-">y </span>must be the same of <span
class="zplmr7m-">x</span>.
@ -3067,7 +3067,7 @@ class="zplmr7m-">noscaling</span><span
class="zplmr7t-">)</span><br
class="newline" />Specified as: a rank one array containing numbers of the type indicated in
Table&#x00A0;<a
href="#x9-68002r13">13<!--tex4ht:ref: tab:f90spsm --></a>.
href="#x10-69002r13">13<!--tex4ht:ref: tab:f90spsm --></a>.
</dd><dt class="description">
<!--l. 1213--><p class="noindent" >
<span
@ -3104,7 +3104,7 @@ class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />Specified as: an array of rank one or two containing numbers of type specified
in Table&#x00A0;<a
href="#x9-68002r13">13<!--tex4ht:ref: tab:f90spsm --></a>.
href="#x10-69002r13">13<!--tex4ht:ref: tab:f90spsm --></a>.
</dd><dt class="description">
<!--l. 1231--><p class="noindent" >
<span
@ -3122,7 +3122,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.14 </span> <a
id="x9-690004.14"></a>psb_gemlt &#8212; Entrywise Product</h4>
id="x10-700004.14"></a>psb_gemlt &#8212; Entrywise Product</h4>
<!--l. 1247--><p class="noindent" >This function computes the entrywise product between two vectors <span
class="zplmr7m-">x </span>and
<span
@ -3138,7 +3138,7 @@ src="userhtml22x.png" alt="dot &#x2190; x(i)y(i).
<!--l. 1254--><p class="indent" > <a
id="x9-69001r14"></a><hr class="float"><div class="float"
id="x10-70001r14"></a><hr class="float"><div class="float"
>
@ -3182,7 +3182,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;14: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-69001r14 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-70001r14 -->
@ -3220,7 +3220,7 @@ class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of
type specified in Table&#x00A0;<a
href="#x9-57001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
href="#x10-58001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
</dd><dt class="description">
<!--l. 1283--><p class="noindent" >
<span
@ -3241,7 +3241,7 @@ class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of
type specified in Table&#x00A0;<a
href="#x9-57001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
href="#x10-58001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
</dd><dt class="description">
<!--l. 1293--><p class="noindent" >
<span
@ -3289,7 +3289,7 @@ class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of
the type indicated in Table&#x00A0;<a
href="#x9-69001r14">14<!--tex4ht:ref: tab:f90mlt --></a>.
href="#x10-70001r14">14<!--tex4ht:ref: tab:f90mlt --></a>.
</dd><dt class="description">
<!--l. 1305--><p class="noindent" >
<span
@ -3307,7 +3307,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.15 </span> <a
id="x9-700004.15"></a>psb_gediv &#8212; Entrywise Division</h4>
id="x10-710004.15"></a>psb_gediv &#8212; Entrywise Division</h4>
<!--l. 1314--><p class="noindent" >This function computes the entrywise division between two vectors <span
class="zplmr7m-">x </span>and
<span
@ -3323,7 +3323,7 @@ src="userhtml23x.png" alt="/ &#x2190; x(i)/y(i).
<!--l. 1321--><p class="indent" > <a
id="x9-70001r15"></a><hr class="float"><div class="float"
id="x10-71001r15"></a><hr class="float"><div class="float"
>
@ -3367,7 +3367,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;15: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-70001r15 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-71001r15 -->
@ -3405,7 +3405,7 @@ class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of
type specified in Table&#x00A0;<a
href="#x9-57001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
href="#x10-58001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
</dd><dt class="description">
<!--l. 1350--><p class="noindent" >
<span
@ -3426,7 +3426,7 @@ class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of
type specified in Table&#x00A0;<a
href="#x9-57001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
href="#x10-58001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
</dd><dt class="description">
<!--l. 1360--><p class="noindent" >
<span
@ -3491,7 +3491,7 @@ class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of
the type indicated in Table&#x00A0;<a
href="#x9-69001r14">14<!--tex4ht:ref: tab:f90mlt --></a>.
href="#x10-70001r14">14<!--tex4ht:ref: tab:f90mlt --></a>.
</dd><dt class="description">
<!--l. 1377--><p class="noindent" >
<span
@ -3509,7 +3509,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">4.16 </span> <a
id="x9-710004.16"></a>psb_geinv &#8212; Entrywise Inversion</h4>
id="x10-720004.16"></a>psb_geinv &#8212; Entrywise Inversion</h4>
<!--l. 1386--><p class="noindent" >This function computes the entrywise inverse of a vector <span
class="zplmr7m-">x </span>and puts it into
<span
@ -3525,7 +3525,7 @@ src="userhtml24x.png" alt="/ &#x2190; 1/x(i).
<!--l. 1393--><p class="indent" > <a
id="x9-71001r16"></a><hr class="float"><div class="float"
id="x10-72001r16"></a><hr class="float"><div class="float"
>
@ -3569,7 +3569,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;16: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x9-71001r16 -->
class="content">Data types</span></div><!--tex4ht:label?: x10-72001r16 -->
@ -3607,7 +3607,7 @@ class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of
type specified in Table&#x00A0;<a
href="#x9-57001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
href="#x10-58001r2">2<!--tex4ht:ref: tab:f90dot --></a>.
</dd><dt class="description">
<!--l. 1422--><p class="noindent" >
<span
@ -3672,7 +3672,7 @@ class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of
the type indicated in Table&#x00A0;<a
href="#x9-71001r16">16<!--tex4ht:ref: tab:f90inv --></a>.
href="#x10-72001r16">16<!--tex4ht:ref: tab:f90inv --></a>.
</dd><dt class="description">
<!--l. 1439--><p class="noindent" >
<span

@ -17,16 +17,16 @@ href="userhtmlse4.html#tailuserhtmlse4.html" >prev-tail</a>] [<a
href="#tailuserhtmlse5.html">tail</a>] [<a
href="userhtml.html#userhtmlse8.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">5 </span> <a
id="x10-720005"></a>Communication routines</h3>
id="x11-730005"></a>Communication routines</h3>
<!--l. 7--><p class="noindent" >The routines in this chapter implement various global communication operators on
vectors associated with a discretization mesh. For auxiliary communication routines
not tied to a discretization space see&#x00A0;<a
href="userhtmlse6.html#x11-770006">6<!--tex4ht:ref: sec:toolsrout --></a>.
href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>.
<h4 class="subsectionHead"><span class="titlemark">5.1 </span> <a
id="x10-730005.1"></a>psb_halo &#8212; Halo Data Communication</h4>
id="x11-740005.1"></a>psb_halo &#8212; Halo Data Communication</h4>
<!--l. 14--><p class="noindent" >These subroutines gathers the values of the halo elements:
<div class="par-math-display" >
<img
@ -44,7 +44,7 @@ class="description">
<!--l. 23--><p class="indent" > <a
id="x10-73001r17"></a><hr class="float"><div class="float"
id="x11-74001r17"></a><hr class="float"><div class="float"
>
@ -91,7 +91,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;17: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x10-73001r17 -->
class="content">Data types</span></div><!--tex4ht:label?: x11-74001r17 -->
@ -131,7 +131,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x10-73001r17">17<!--tex4ht:ref: tab:f90halo --></a>.
href="#x11-74001r17">17<!--tex4ht:ref: tab:f90halo --></a>.
</dd><dt class="description">
<!--l. 53--><p class="noindent" >
<span
@ -202,7 +202,7 @@ class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />Returned as: a rank one or two array containing numbers of type
specified in Table&#x00A0;<a
href="#x10-73001r17">17<!--tex4ht:ref: tab:f90halo --></a>.
href="#x11-74001r17">17<!--tex4ht:ref: tab:f90halo --></a>.
</dd><dt class="description">
<!--l. 79--><p class="noindent" >
<span
@ -219,7 +219,7 @@ class="pplb7t-">out</span>.<br
class="newline" />An integer value that contains an error code.</dd></dl>
<!--l. 85--><p class="noindent" ><hr class="figure"><div class="figure"
><a
id="x10-73002r3"></a>
id="x11-74002r3"></a>
<div class="center"
>
<!--l. 86--><p class="noindent" >
@ -228,11 +228,11 @@ src="try8x8.png" alt="PIC"
width="32" height="32" ></div>
<br /> <div class="caption"
><span class="id">Figure&#x00A0;3: </span><span
class="content">Sample discretization mesh.</span></div><!--tex4ht:label?: x10-73002r3 -->
class="content">Sample discretization mesh.</span></div><!--tex4ht:label?: x11-74002r3 -->
<!--l. 94--><p class="indent" > </div><hr class="endfigure">
<!--l. 96--><p class="noindent" ><span
class="pplb7t-x-x-120">Usage Example </span>Consider the discretization mesh depicted in fig.&#x00A0;<a
href="#x10-73002r3">3<!--tex4ht:ref: fig:try8x8 --></a>, partitioned
href="#x11-74002r3">3<!--tex4ht:ref: fig:try8x8 --></a>, partitioned
among two processes as shown by the dashed line; the data distribution is such that
each process will own 32 entries in the index space, with a halo made of 8 entries
placed at local indices 33 through 40. If process 0 assigns an initial value of 1
@ -842,7 +842,7 @@ class="td11"> </td></tr></table>
<h4 class="subsectionHead"><span class="titlemark">5.2 </span> <a
id="x10-740005.2"></a>psb_ovrl &#8212; Overlap Update</h4>
id="x11-750005.2"></a>psb_ovrl &#8212; Overlap Update</h4>
<!--l. 164--><p class="noindent" >These subroutines applies an overlap operator to the input vector:
<div class="par-math-display" >
<img
@ -871,7 +871,7 @@ class="zplmr7m-x-x-76">T</span></sup>.</dd></dl>
<!--l. 174--><p class="indent" > <a
id="x10-74001r18"></a><hr class="float"><div class="float"
id="x11-75001r18"></a><hr class="float"><div class="float"
>
@ -913,7 +913,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;18: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x10-74001r18 -->
class="content">Data types</span></div><!--tex4ht:label?: x11-75001r18 -->
@ -953,7 +953,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of type specified in Table&#x00A0;<a
href="#x10-74001r18">18<!--tex4ht:ref: tab:f90ovrl --></a>.
href="#x11-75001r18">18<!--tex4ht:ref: tab:f90ovrl --></a>.
</dd><dt class="description">
<!--l. 203--><p class="noindent" >
<span
@ -1060,7 +1060,7 @@ class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />Specified as: an array of rank one or two containing numbers of type specified
in Table&#x00A0;<a
href="#x10-74001r18">18<!--tex4ht:ref: tab:f90ovrl --></a>.
href="#x11-75001r18">18<!--tex4ht:ref: tab:f90ovrl --></a>.
</dd><dt class="description">
<!--l. 234--><p class="noindent" >
<span
@ -1078,7 +1078,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x10-74003x1">
class="enumerate" id="x11-75003x1">
@ -1086,7 +1086,7 @@ class="pplb7t-x-x-120">Notes</span>
descriptor, no operations are performed;
</li>
<li
class="enumerate" id="x10-74005x2">
class="enumerate" id="x11-75005x2">
<!--l. 246--><p class="noindent" >The operator <span
class="zplmr7m-">P</span><sup><span
class="zplmr7m-x-x-76">T</span></sup> performs the reduction sum of overlap elements; it is a
@ -1096,7 +1096,7 @@ class="zplmr7m-x-x-76">T</span></sup> that replicates overlap elements, accounti
for the physical replication of data;
</li>
<li
class="enumerate" id="x10-74007x3">
class="enumerate" id="x11-75007x3">
<!--l. 250--><p class="noindent" >The operator <span
class="zplmr7m-">P</span><sub><span
class="zplmr7m-x-x-76">a</span></sub> performs a scaling on the overlap elements by the
@ -1109,7 +1109,7 @@ class="zplmr7m-x-x-76">a</span></sub> performs a scaling on the overlap el
<a
id="x10-74008r4"></a>
id="x11-75008r4"></a>
@ -1121,17 +1121,17 @@ src="try8x8_ov.png" alt="PIC"
width="46" height="46" ></div>
<br /> <div class="caption"
><span class="id">Figure&#x00A0;4: </span><span
class="content">Sample discretization mesh.</span></div><!--tex4ht:label?: x10-74008r4 -->
class="content">Sample discretization mesh.</span></div><!--tex4ht:label?: x11-75008r4 -->
<!--l. 270--><p class="indent" > </div><hr class="endfigure">
<!--l. 271--><p class="noindent" ><span
class="pplb7t-x-x-120">Example of use </span>Consider the discretization mesh depicted in fig.&#x00A0;<a
href="#x10-74008r4">4<!--tex4ht:ref: fig:try8x8_ov --></a>, partitioned
href="#x11-75008r4">4<!--tex4ht:ref: fig:try8x8_ov --></a>, partitioned
among two processes as shown by the dashed lines, with an overlap of 1 extra
layer with respect to the partition of fig.&#x00A0;<a
href="#x10-73002r3">3<!--tex4ht:ref: fig:try8x8 --></a>; the data distribution is such that
href="#x11-74002r3">3<!--tex4ht:ref: fig:try8x8 --></a>; the data distribution is such that
each process will own 40 entries in the index space, with an overlap of 16
entries placed at local indices 25 through 40; the halo will run from local
index 41 through local index 48.. If process 0 assigns an initial value of 1 to
@ -1853,7 +1853,7 @@ class="td11"> </td></tr></table>
<h4 class="subsectionHead"><span class="titlemark">5.3 </span> <a
id="x10-750005.3"></a>psb_gather &#8212; Gather Global Dense Matrix</h4>
id="x11-760005.3"></a>psb_gather &#8212; Gather Global Dense Matrix</h4>
<!--l. 353--><p class="noindent" >These subroutines collect the portions of global dense matrix distributed over all
process into one single array stored on one process.
<div class="par-math-display" >
@ -1896,7 +1896,7 @@ class="description">
<!--l. 366--><p class="indent" > <a
id="x10-75001r19"></a><hr class="float"><div class="float"
id="x11-76001r19"></a><hr class="float"><div class="float"
>
@ -1944,7 +1944,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;19: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x10-75001r19 -->
class="content">Data types</span></div><!--tex4ht:label?: x11-76001r19 -->
@ -1986,7 +1986,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;indicated in Table&#x00A0;<a
href="#x10-75001r19">19<!--tex4ht:ref: tab:gather --></a>.
href="#x11-76001r19">19<!--tex4ht:ref: tab:gather --></a>.
</dd><dt class="description">
<!--l. 397--><p class="noindent" >
<span
@ -2069,7 +2069,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">5.4 </span> <a
id="x10-760005.4"></a>psb_scatter &#8212; Scatter Global Dense Matrix</h4>
id="x11-770005.4"></a>psb_scatter &#8212; Scatter Global Dense Matrix</h4>
<!--l. 453--><p class="noindent" >These subroutines scatters the portions of global dense matrix owned by a process to
all the processes in the processes grid.
<div class="par-math-display" >
@ -2112,7 +2112,7 @@ class="description">
<!--l. 465--><p class="indent" > <a
id="x10-76001r20"></a><hr class="float"><div class="float"
id="x11-77001r20"></a><hr class="float"><div class="float"
>
@ -2160,7 +2160,7 @@ class="hline"><td><hr></td><td><hr></td></tr><tr
class="td11"> </td></tr></table> </div></div>
<br /><div class="caption"
><span class="id">Table&#x00A0;20: </span><span
class="content">Data types</span></div><!--tex4ht:label?: x10-76001r20 -->
class="content">Data types</span></div><!--tex4ht:label?: x11-77001r20 -->
@ -2291,7 +2291,7 @@ class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>&#x00A0;containing numbers of the type indicated in Table&#x00A0;<a
href="#x10-76001r20">20<!--tex4ht:ref: tab:scatter --></a>.
href="#x11-77001r20">20<!--tex4ht:ref: tab:scatter --></a>.
</dd><dt class="description">
<!--l. 542--><p class="noindent" >
<span

@ -17,10 +17,10 @@ href="userhtmlse5.html#tailuserhtmlse5.html" >prev-tail</a>] [<a
href="userhtmlse3.html#tailuserhtmlse6.html">tail</a>] [<a
href="userhtml.html#userhtmlse9.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">6 </span> <a
id="x11-770006"></a>Data management routines</h3>
id="x12-780006"></a>Data management routines</h3>
<!--l. 8--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">6.1 </span> <a
id="x11-780006.1"></a>psb_cdall &#8212; Allocates a communication descriptor</h4>
id="x12-790006.1"></a>psb_cdall &#8212; Allocates a communication descriptor</h4>
@ -261,7 +261,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-78002x1">
class="enumerate" id="x12-79002x1">
@ -427,7 +427,7 @@ class="cmtt-10">local</span></span></span> in calls to <span class="obeylines-h"
class="cmtt-10">psb_spins</span></span></span> and
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span>; see also sec.&#x00A0;<a
href="userhtmlse2.html#x4-70002.3.1">2.3.1<!--tex4ht:ref: sec:usermaps --></a>.
href="userhtmlse2.html#x5-80002.3.1">2.3.1<!--tex4ht:ref: sec:usermaps --></a>.
</dd><dt class="description">
<!--l. 165--><p class="noindent" >
<span
@ -452,18 +452,18 @@ class="description">
multilevel preconditioners.</dd></dl>
</li>
<li
class="enumerate" id="x11-78004x2">
class="enumerate" id="x12-79004x2">
<!--l. 173--><p class="noindent" >On exit from this routine the descriptor is in the build state.
</li>
<li
class="enumerate" id="x11-78006x3">
class="enumerate" id="x12-79006x3">
<!--l. 175--><p class="noindent" >Calling the routine with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">vg</span></span></span> or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">parts</span></span></span> implies that every process will scan the
entire index space to figure out the local indices.
</li>
<li
class="enumerate" id="x11-78008x4">
class="enumerate" id="x12-79008x4">
<!--l. 178--><p class="noindent" >Overlapped indices are possible with both <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">parts</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">vl</span></span></span> invocations.
@ -472,7 +472,7 @@ class="cmtt-10">vl</span></span></span> invocations.
</li>
<li
class="enumerate" id="x11-78010x5">
class="enumerate" id="x12-79010x5">
<!--l. 180--><p class="noindent" >When the subroutine is invoked with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">vl</span></span></span> in conjunction with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">globalcheck=.true.</span></span></span>,
@ -480,7 +480,7 @@ class="cmtt-10">globalcheck=.true.</span></span></span>,
indices.
</li>
<li
class="enumerate" id="x11-78012x6">
class="enumerate" id="x12-79012x6">
<!--l. 183--><p class="noindent" >When the subroutine is invoked with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">vl</span></span></span> in conjunction with
<span class="obeylines-h"><span class="verb"><span
@ -491,7 +491,7 @@ class="cmtt-10">vl</span></span></span> have
unpredictable.
</li>
<li
class="enumerate" id="x11-78014x7">
class="enumerate" id="x12-79014x7">
<!--l. 189--><p class="noindent" >Orphan and overlap indices are impossible by construction when the
subroutine is invoked with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">nl</span></span></span> (alone), or <span class="obeylines-h"><span class="verb"><span
@ -500,7 +500,7 @@ class="cmtt-10">vg</span></span></span>.</li></ol>
<h4 class="subsectionHead"><span class="titlemark">6.2 </span> <a
id="x11-790006.2"></a>psb_cdins &#8212; Communication descriptor insert routine</h4>
id="x12-800006.2"></a>psb_cdins &#8212; Communication descriptor insert routine</h4>
@ -696,25 +696,25 @@ class="zplmr7m-">nz</span>.
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-79002x1">
class="enumerate" id="x12-80002x1">
<!--l. 279--><p class="noindent" >This routine may only be called if the descriptor is in the build state;
</li>
<li
class="enumerate" id="x11-79004x2">
class="enumerate" id="x12-80004x2">
<!--l. 281--><p class="noindent" >This routine automatically ignores edges that do not insist on the current
process, i.e. edges for which neither the starting nor the end vertex belong
to the current process.
</li>
<li
class="enumerate" id="x11-79006x3">
class="enumerate" id="x12-80006x3">
<!--l. 284--><p class="noindent" >The second form of this routine will be useful when dealing with
user-specified index mappings; see also&#x00A0;<a
href="userhtmlse2.html#x4-70002.3.1">2.3.1<!--tex4ht:ref: sec:usermaps --></a>.</li></ol>
href="userhtmlse2.html#x5-80002.3.1">2.3.1<!--tex4ht:ref: sec:usermaps --></a>.</li></ol>
<h4 class="subsectionHead"><span class="titlemark">6.3 </span> <a
id="x11-800006.3"></a>psb_cdasb &#8212; Communication descriptor assembly routine</h4>
id="x12-810006.3"></a>psb_cdasb &#8212; Communication descriptor assembly routine</h4>
@ -818,7 +818,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-80002x1">
class="enumerate" id="x12-81002x1">
<!--l. 331--><p class="noindent" >On exit from this routine the descriptor is in the assembled state.</li></ol>
<!--l. 334--><p class="noindent" >This call will set up all the necessary information for the halo data exchanges. In doing
so, the library will need to identify the set of processes owning the halo indices
@ -835,7 +835,7 @@ class="cmtt-10">psb_cdasb</span></span></span>.
<h4 class="subsectionHead"><span class="titlemark">6.4 </span> <a
id="x11-810006.4"></a>psb_cdcpy &#8212; Copies a communication descriptor</h4>
id="x12-820006.4"></a>psb_cdcpy &#8212; Copies a communication descriptor</h4>
@ -920,7 +920,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">6.5 </span> <a
id="x11-820006.5"></a>psb_cdfree &#8212; Frees a communication descriptor</h4>
id="x12-830006.5"></a>psb_cdfree &#8212; Frees a communication descriptor</h4>
@ -983,7 +983,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">6.6 </span> <a
id="x11-830006.6"></a>psb_cdbldext &#8212; Build an extended communication descriptor</h4>
id="x12-840006.6"></a>psb_cdbldext &#8212; Build an extended communication descriptor</h4>
@ -1116,7 +1116,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-83002x1">
class="enumerate" id="x12-84002x1">
<!--l. 465--><p class="noindent" >Specifying <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_ovt_xhal_</span></span></span> for the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">extype</span></span></span> argument the user will obtain
@ -1125,7 +1125,7 @@ class="cmtt-10">extype</span></span></span> argument the user will obtain
mapping is identical to that of the base descriptor;
</li>
<li
class="enumerate" id="x11-83004x2">
class="enumerate" id="x12-84004x2">
@ -1139,7 +1139,7 @@ class="cmtt-10">extype</span></span></span> argument the user will obtain
<h4 class="subsectionHead"><span class="titlemark">6.7 </span> <a
id="x11-840006.7"></a>psb_spall &#8212; Allocates a sparse matrix</h4>
id="x12-850006.7"></a>psb_spall &#8212; Allocates a sparse matrix</h4>
@ -1272,18 +1272,18 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-84002x1">
class="enumerate" id="x12-85002x1">
<!--l. 597--><p class="noindent" >On exit from this routine the sparse matrix is in the build state.
</li>
<li
class="enumerate" id="x11-84004x2">
class="enumerate" id="x12-85004x2">
<!--l. 599--><p class="noindent" >The descriptor may be in either the build or assembled state.
</li>
<li
class="enumerate" id="x11-84006x3">
class="enumerate" id="x12-85006x3">
<!--l. 600--><p class="noindent" >Providing a good estimate for the number of nonzeroes <span
class="zplmr7m-">nnz </span>in the
assembled matrix may substantially improve performance in the matrix
@ -1291,7 +1291,7 @@ class="zplmr7m-">nnz </span>in the
multiple) data reallocations;
</li>
<li
class="enumerate" id="x11-84008x4">
class="enumerate" id="x12-85008x4">
<!--l. 604--><p class="noindent" >Using <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_matbld_remote_</span></span></span> is likely to cause a runtime overhead at
assembly time;</li></ol>
@ -1299,7 +1299,7 @@ class="cmtt-10">psb_matbld_remote_</span></span></span> is likely to cause a
<h4 class="subsectionHead"><span class="titlemark">6.8 </span> <a
id="x11-850006.8"></a>psb_spins &#8212; Insert a set of coefficients into a sparse matrix</h4>
id="x12-860006.8"></a>psb_spins &#8212; Insert a set of coefficients into a sparse matrix</h4>
@ -1523,17 +1523,17 @@ class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-85002x1">
class="enumerate" id="x12-86002x1">
<!--l. 702--><p class="noindent" >On entry to this routine the descriptor may be in either the build or
assembled state.
</li>
<li
class="enumerate" id="x11-85004x2">
class="enumerate" id="x12-86004x2">
<!--l. 704--><p class="noindent" >On entry to this routine the sparse matrix may be in either the build or
update state.
</li>
<li
class="enumerate" id="x11-85006x3">
class="enumerate" id="x12-86006x3">
<!--l. 706--><p class="noindent" >If the descriptor is in the build state, then the sparse matrix must also be
in the build state; the action of the routine is to (implicitly) call <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins</span></span></span>
@ -1542,11 +1542,11 @@ class="cmtt-10">psb_cdins</span></span></span>
appropriate processing;
</li>
<li
class="enumerate" id="x11-85008x4">
class="enumerate" id="x12-86008x4">
<!--l. 712--><p class="noindent" >The input data can be passed in either COO or CSR formats;
</li>
<li
class="enumerate" id="x11-85010x5">
class="enumerate" id="x12-86010x5">
<!--l. 713--><p class="noindent" >In COO format the coefficients to be inserted are represented by the
ordered triples <span
class="zplmr7m-">ia</span><span
@ -1568,7 +1568,7 @@ class="zplmr7m-">nz</span>; these triples are
arbitrary;
</li>
<li
class="enumerate" id="x11-85012x6">
class="enumerate" id="x12-86012x6">
<!--l. 716--><p class="noindent" >In CSR format the coefficients to be inserted for each input row <span
class="zplmr7m-">i </span><span
class="zplmr7t-">= </span>1, <span
@ -1610,7 +1610,7 @@ class="zplmr7y-">- </span>1 should be one of the local indices, but are
arbitrary;
</li>
<li
class="enumerate" id="x11-85014x7">
class="enumerate" id="x12-86014x7">
<!--l. 721--><p class="noindent" >There is no requirement that a given row must be passed in its entirety
to a single call to this routine: the buildup of a row may be split into as
many calls as desired (even in the CSR format);
@ -1619,12 +1619,12 @@ class="zplmr7y-">- </span>1 should be one of the local indices, but are
</li>
<li
class="enumerate" id="x11-85016x8">
class="enumerate" id="x12-86016x8">
<!--l. 724--><p class="noindent" >Coefficients from different rows may also be mixed up freely in a single
call, according to the application needs;
</li>
<li
class="enumerate" id="x11-85018x9">
class="enumerate" id="x12-86018x9">
<!--l. 726--><p class="noindent" >Coefficients from matrix rows not owned by the calling process are
treated according to the value of <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bldmode</span></span></span> specified at allocation time; if
@ -1634,20 +1634,20 @@ class="cmtt-10">psb_matbld_remote_</span></span></span> the library will keep tr
of them, otherwise they are silently ignored;
</li>
<li
class="enumerate" id="x11-85020x10">
class="enumerate" id="x12-86020x10">
<!--l. 731--><p class="noindent" >If the descriptor is in the assembled state, then any entries in the sparse
matrix that would generate additional communication requirements are
ignored;
</li>
<li
class="enumerate" id="x11-85022x11">
class="enumerate" id="x12-86022x11">
<!--l. 734--><p class="noindent" >If the matrix is in the update state, any entries in positions that were not
present in the original matrix are ignored.</li></ol>
<h4 class="subsectionHead"><span class="titlemark">6.9 </span> <a
id="x11-860006.9"></a>psb_spasb &#8212; Sparse matrix assembly routine</h4>
id="x12-870006.9"></a>psb_spasb &#8212; Sparse matrix assembly routine</h4>
@ -1798,7 +1798,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-86002x1">
class="enumerate" id="x12-87002x1">
<!--l. 795--><p class="noindent" >On entry to this routine the descriptor must be in the assembled state, i.e.
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span> must already have been called.
@ -1807,17 +1807,17 @@ class="cmtt-10">psb_cdasb</span></span></span> must already have been called.
</li>
<li
class="enumerate" id="x11-86004x2">
class="enumerate" id="x12-87004x2">
<!--l. 797--><p class="noindent" >The sparse matrix may be in either the build or update state;
</li>
<li
class="enumerate" id="x11-86006x3">
class="enumerate" id="x12-87006x3">
<!--l. 798--><p class="noindent" >Duplicate entries are detected and handled in both build and update
state, with the exception of the error action that is only taken in the build
state, i.e. on the first assembly;
</li>
<li
class="enumerate" id="x11-86008x4">
class="enumerate" id="x12-87008x4">
<!--l. 801--><p class="noindent" >If the update choice is <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_upd_perm_</span></span></span>, then subsequent calls to
<span class="obeylines-h"><span class="verb"><span
@ -1826,16 +1826,16 @@ class="cmtt-10">psb_spins</span></span></span> to update the matrix must be arra
at the first assembly;
</li>
<li
class="enumerate" id="x11-86010x5">
class="enumerate" id="x12-87010x5">
<!--l. 805--><p class="noindent" >The output storage format need not be the same on all processes;
</li>
<li
class="enumerate" id="x11-86012x6">
class="enumerate" id="x12-87012x6">
<!--l. 807--><p class="noindent" >On exit from this routine the matrix is in the assembled state, and thus is
suitable for the computational routines;
</li>
<li
class="enumerate" id="x11-86014x7">
class="enumerate" id="x12-87014x7">
<!--l. 809--><p class="noindent" >If the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bldmode=psb_matbld_remote_</span></span></span> value was specified at allocation
time, contributions defined on the current process but belonging to a
@ -1848,7 +1848,7 @@ class="cmtt-10">dupl=psb_dupl_add_</span></span></span>; it is necessary
<h4 class="subsectionHead"><span class="titlemark">6.10 </span> <a
id="x11-870006.10"></a>psb_spfree &#8212; Frees a sparse matrix</h4>
id="x12-880006.10"></a>psb_spfree &#8212; Frees a sparse matrix</h4>
@ -1931,7 +1931,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">6.11 </span> <a
id="x11-880006.11"></a>psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</h4>
id="x12-890006.11"></a>psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</h4>
@ -2027,13 +2027,13 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-88002x1">
class="enumerate" id="x12-89002x1">
<!--l. 930--><p class="noindent" >On exit from this routine the sparse matrix is in the update state.</li></ol>
<h4 class="subsectionHead"><span class="titlemark">6.12 </span> <a
id="x11-890006.12"></a>psb_geall &#8212; Allocates a dense matrix</h4>
id="x12-900006.12"></a>psb_geall &#8212; Allocates a dense matrix</h4>
@ -2190,7 +2190,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-89002x1">
class="enumerate" id="x12-90002x1">
<!--l. 1040--><p class="noindent" >Using <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_matbld_remote_</span></span></span> is likely to cause a runtime overhead at
assembly time;</li></ol>
@ -2198,7 +2198,7 @@ class="cmtt-10">psb_matbld_remote_</span></span></span> is likely to cause a
<h4 class="subsectionHead"><span class="titlemark">6.13 </span> <a
id="x11-900006.13"></a>psb_geins &#8212; Dense matrix insertion routine</h4>
id="x12-910006.13"></a>psb_geins &#8212; Dense matrix insertion routine</h4>
@ -2349,18 +2349,18 @@ class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-90002x1">
class="enumerate" id="x12-91002x1">
<!--l. 1105--><p class="noindent" >Dense vectors/matrices do not have an associated state;
</li>
<li
class="enumerate" id="x11-90004x2">
class="enumerate" id="x12-91004x2">
<!--l. 1106--><p class="noindent" >Duplicate entries are either overwritten or added, there is no provision
for raising an error condition.</li></ol>
<h4 class="subsectionHead"><span class="titlemark">6.14 </span> <a
id="x11-910006.14"></a>psb_geasb &#8212; Assembly a dense matrix</h4>
id="x12-920006.14"></a>psb_geasb &#8212; Assembly a dense matrix</h4>
@ -2473,13 +2473,13 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-91002x1">
class="enumerate" id="x12-92002x1">
<!--l. 1155--><p class="noindent" >On entry to this routine the descriptor must be in the assembled state, i.e.
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span> must already have been called.
</li>
<li
class="enumerate" id="x11-91004x2">
class="enumerate" id="x12-92004x2">
<!--l. 1157--><p class="noindent" >If the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bldmode=psb_matbld_remote_</span></span></span> value was specified at allocation
time, contributions defined on the current process but belonging to a
@ -2490,7 +2490,7 @@ class="cmtt-10">dupl=psb_dupl_add_</span></span></span>.</li></ol>
<h4 class="subsectionHead"><span class="titlemark">6.15 </span> <a
id="x11-920006.15"></a>psb_gefree &#8212; Frees a dense matrix</h4>
id="x12-930006.15"></a>psb_gefree &#8212; Frees a dense matrix</h4>
@ -2577,7 +2577,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">6.16 </span> <a
id="x11-930006.16"></a>psb_gelp &#8212; Applies a left permutation to a dense matrix</h4>
id="x12-940006.16"></a>psb_gelp &#8212; Applies a left permutation to a dense matrix</h4>
@ -2673,7 +2673,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">6.17 </span> <a
id="x11-940006.17"></a>psb_glob_to_loc &#8212; Global to local indices convertion</h4>
id="x12-950006.17"></a>psb_glob_to_loc &#8212; Global to local indices convertion</h4>
@ -2815,7 +2815,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-94002x1">
class="enumerate" id="x12-95002x1">
@ -2823,7 +2823,7 @@ class="pplb7t-x-x-120">Notes</span>
set to a negative number;
</li>
<li
class="enumerate" id="x11-94004x2">
class="enumerate" id="x12-95004x2">
<!--l. 1311--><p class="noindent" >The default <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">I</span></span></span>gnore means that the negative output is the only action
taken on an out-of-range input.</li></ol>
@ -2831,7 +2831,7 @@ class="cmtt-10">I</span></span></span>gnore means that the negative output
<h4 class="subsectionHead"><span class="titlemark">6.18 </span> <a
id="x11-950006.18"></a>psb_loc_to_glob &#8212; Local to global indices conversion</h4>
id="x12-960006.18"></a>psb_loc_to_glob &#8212; Local to global indices conversion</h4>
@ -2959,7 +2959,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">6.19 </span> <a
id="x11-960006.19"></a>psb_is_owned &#8212; </h4>
id="x12-970006.19"></a>psb_is_owned &#8212; </h4>
@ -3040,7 +3040,7 @@ class="newline" /></dd></dl>
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-96002x1">
class="enumerate" id="x12-97002x1">
<!--l. 1407--><p class="noindent" >This routine returns a <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">.true.</span></span></span> value for an index that is strictly owned by
the current process, excluding the halo indices</li></ol>
@ -3048,7 +3048,7 @@ class="cmtt-10">.true.</span></span></span> value for an index that is strictly
<h4 class="subsectionHead"><span class="titlemark">6.20 </span> <a
id="x11-970006.20"></a>psb_owned_index &#8212; </h4>
id="x12-980006.20"></a>psb_owned_index &#8212; </h4>
@ -3158,7 +3158,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-97002x1">
class="enumerate" id="x12-98002x1">
<!--l. 1459--><p class="noindent" >This routine returns a <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">.true.</span></span></span> value for those indices that are strictly
owned by the current process, excluding the halo indices</li></ol>
@ -3166,7 +3166,7 @@ class="cmtt-10">.true.</span></span></span> value for those indices that ar
<h4 class="subsectionHead"><span class="titlemark">6.21 </span> <a
id="x11-980006.21"></a>psb_is_local &#8212; </h4>
id="x12-990006.21"></a>psb_is_local &#8212; </h4>
@ -3247,7 +3247,7 @@ class="newline" /></dd></dl>
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-98002x1">
class="enumerate" id="x12-99002x1">
<!--l. 1499--><p class="noindent" >This routine returns a <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">.true.</span></span></span> value for an index that is local to the current
process, including the halo indices</li></ol>
@ -3255,7 +3255,7 @@ class="cmtt-10">.true.</span></span></span> value for an index that is local to
<h4 class="subsectionHead"><span class="titlemark">6.22 </span> <a
id="x11-990006.22"></a>psb_local_index &#8212; </h4>
id="x12-1000006.22"></a>psb_local_index &#8212; </h4>
@ -3365,7 +3365,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-99002x1">
class="enumerate" id="x12-100002x1">
<!--l. 1550--><p class="noindent" >This routine returns a <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">.true.</span></span></span> value for those indices that are local to the
current process, including the halo indices.</li></ol>
@ -3373,7 +3373,7 @@ class="cmtt-10">.true.</span></span></span> value for those indices that are loc
<h4 class="subsectionHead"><span class="titlemark">6.23 </span> <a
id="x11-1000006.23"></a>psb_get_boundary &#8212; Extract list of boundary elements</h4>
id="x12-1010006.23"></a>psb_get_boundary &#8212; Extract list of boundary elements</h4>
@ -3453,13 +3453,13 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-100002x1">
class="enumerate" id="x12-101002x1">
<!--l. 1596--><p class="noindent" >If there are no boundary elements (i.e., if the local part of the connectivity
graph is self-contained) the output vector is set to the &#8220;not allocated&#8221;
state.
</li>
<li
class="enumerate" id="x11-100004x2">
class="enumerate" id="x12-101004x2">
<!--l. 1599--><p class="noindent" >Otherwise the size of <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bndel</span></span></span> will be exactly equal to the number of
boundary elements.</li></ol>
@ -3467,7 +3467,7 @@ class="cmtt-10">bndel</span></span></span> will be exactly equal to the nu
<h4 class="subsectionHead"><span class="titlemark">6.24 </span> <a
id="x11-1010006.24"></a>psb_get_overlap &#8212; Extract list of overlap elements</h4>
id="x12-1020006.24"></a>psb_get_overlap &#8212; Extract list of overlap elements</h4>
@ -3547,12 +3547,12 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-101002x1">
class="enumerate" id="x12-102002x1">
<!--l. 1639--><p class="noindent" >If there are no overlap elements the output vector is set to the &#8220;not
allocated&#8221; state.
</li>
<li
class="enumerate" id="x11-101004x2">
class="enumerate" id="x12-102004x2">
<!--l. 1641--><p class="noindent" >Otherwise the size of <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ovrel</span></span></span> will be exactly equal to the number of overlap
elements.</li></ol>
@ -3560,7 +3560,7 @@ class="cmtt-10">ovrel</span></span></span> will be exactly equal to the number o
<h4 class="subsectionHead"><span class="titlemark">6.25 </span> <a
id="x11-1020006.25"></a>psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</h4>
id="x12-1030006.25"></a>psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</h4>
@ -3742,7 +3742,7 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-102002x1">
class="enumerate" id="x12-103002x1">
<!--l. 1727--><p class="noindent" >The output <span
class="zplmr7m-">nz </span>is always the size of the output generated by the current
call; thus, if <span class="obeylines-h"><span class="verb"><span
@ -3755,12 +3755,12 @@ class="cmtt-10">nzin+1:nzin+nz</span></span></span> of the
array arguments;
</li>
<li
class="enumerate" id="x11-102004x2">
class="enumerate" id="x12-103004x2">
<!--l. 1731--><p class="noindent" >When <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">append=.true.</span></span></span> the output arrays are reallocated as necessary;
</li>
<li
class="enumerate" id="x11-102006x3">
class="enumerate" id="x12-103006x3">
<!--l. 1733--><p class="noindent" >The row and column indices are returned in the local numbering
scheme; if the global numbering is desired, the user may employ the
<span class="obeylines-h"><span class="verb"><span
@ -3769,7 +3769,7 @@ class="cmtt-10">psb_loc_to_glob</span></span></span> routine on the output.</li>
<h4 class="subsectionHead"><span class="titlemark">6.26 </span> <a
id="x11-1030006.26"></a>psb_sizeof &#8212; Memory occupation</h4>
id="x12-1040006.26"></a>psb_sizeof &#8212; Memory occupation</h4>
<!--l. 1744--><p class="noindent" >This function computes the memory occupation of a PSBLAS object.
@ -3869,7 +3869,7 @@ class="cmtt-10">integer(psb_long_int_k_)</span></span></span> number.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">6.27 </span> <a
id="x11-1040006.27"></a>Sorting utilities &#8212; </h4>
id="x12-1050006.27"></a>Sorting utilities &#8212; </h4>
<!--l. 1783--><p class="noindent" ><span
class="pplb7t-x-x-120">psb</span><span
class="pplb7t-x-x-120">_msort &#8212; Sorting by the Merge-sort algorithm</span>
@ -4005,12 +4005,12 @@ class="zplmr7m-">x</span>.</dd></dl>
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x11-104002x1">
class="enumerate" id="x12-105002x1">
<!--l. 1839--><p class="noindent" >For integer or real data the sorting can be performed in the up/down
direction, on the natural or absolute values;
</li>
<li
class="enumerate" id="x11-104004x2">
class="enumerate" id="x12-105004x2">
<!--l. 1841--><p class="noindent" >For complex data the sorting can be done in a lexicographic order (i.e.:
sort on the real part with ties broken according to the imaginary part) or
on the absolute values;
@ -4019,7 +4019,7 @@ class="pplb7t-x-x-120">Notes</span>
</li>
<li
class="enumerate" id="x11-104006x3">
class="enumerate" id="x12-105006x3">
<!--l. 1844--><p class="noindent" >The routines return the items in the chosen ordering; the output
difference is the handling of ties (i.e. items with an equal value) in the
original input. With the merge-sort algorithm ties are preserved in the
@ -4027,7 +4027,7 @@ class="pplb7t-x-x-120">Notes</span>
guaranteed for quicksort or heapsort;
</li>
<li
class="enumerate" id="x11-104008x4">
class="enumerate" id="x12-105008x4">
<!--l. 1850--><p class="noindent" >If <span
class="zplmr7m-">flag </span><span
class="zplmr7t-">= </span><span
@ -4062,7 +4062,7 @@ class="zplmr7t-">) </span>occupied
in the original data sequence;
</li>
<li
class="enumerate" id="x11-104010x5">
class="enumerate" id="x12-105010x5">
<!--l. 1855--><p class="noindent" >If <span
class="zplmr7m-">flag </span><span
class="zplmr7t-">= </span><span
@ -4075,7 +4075,7 @@ class="zplmr7m-">ix</span><span
class="zplmr7t-">(:) </span>have already been initialized by the user;
</li>
<li
class="enumerate" id="x11-104012x6">
class="enumerate" id="x12-105012x6">
<!--l. 1857--><p class="noindent" >The three sorting algorithms have a similar <span
class="zplmr7m-">O</span><span
class="zplmr7t-">(</span><span
@ -4086,7 +4086,7 @@ class="zplmr7t-">) </span>expected running time;
However note that:
<ol class="enumerate2" >
<li
class="enumerate" id="x11-104014x1">
class="enumerate" id="x12-105014x1">
<!--l. 1861--><p class="noindent" >The worst case running time
for quicksort is <span
class="zplmr7m-">O</span><span
@ -4098,7 +4098,7 @@ class="zplmr7t-">)</span>; the algorithm implemented here follows the
apply;
</li>
<li
class="enumerate" id="x11-104016x2">
class="enumerate" id="x12-105016x2">
<!--l. 1864--><p class="noindent" >The worst case running time for merge-sort and heap-sort is
<span
class="zplmr7m-">O</span><span
@ -4108,7 +4108,7 @@ class="zplmr7m-">n</span><span
class="zplmr7t-">) </span>as the average case;
</li>
<li
class="enumerate" id="x11-104018x3">
class="enumerate" id="x12-105018x3">
<!--l. 1866--><p class="noindent" >The merge-sort algorithm is implemented to take advantage of
subsequences that may be already in the desired ordering prior to
the subroutine call; this situation is relatively common when dealing

@ -17,12 +17,12 @@ href="userhtmlse6.html#tailuserhtmlse6.html" >prev-tail</a>] [<a
href="userhtmlse4.html#tailuserhtmlse7.html">tail</a>] [<a
href="userhtml.html#userhtmlse10.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">7 </span> <a
id="x12-1050007"></a>Parallel environment routines</h3>
id="x13-1060007"></a>Parallel environment routines</h3>
<h4 class="subsectionHead"><span class="titlemark">7.1 </span> <a
id="x12-1060007.1"></a>psb_init &#8212; Initializes PSBLAS parallel environment</h4>
id="x13-1070007.1"></a>psb_init &#8212; Initializes PSBLAS parallel environment</h4>
@ -138,11 +138,11 @@ class="newline" />Specified as: an integer variable.</dd></dl>
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-106002x1">
class="enumerate" id="x13-107002x1">
<!--l. 64--><p class="noindent" >A call to this routine must precede any other PSBLAS call.
</li>
<li
class="enumerate" id="x12-106004x2">
class="enumerate" id="x13-107004x2">
<!--l. 65--><p class="noindent" >It is an error to specify a value for <span
class="zplmr7m-">np </span>greater than the number of processes
available in the underlying base parallel environment.</li></ol>
@ -150,7 +150,7 @@ class="zplmr7m-">np </span>greater than the number of processes
<h4 class="subsectionHead"><span class="titlemark">7.2 </span> <a
id="x12-1070007.2"></a>psb_info &#8212; Return information about PSBLAS parallel environment</h4>
id="x13-1080007.2"></a>psb_info &#8212; Return information about PSBLAS parallel environment</h4>
@ -231,7 +231,7 @@ class="newline" />Specified as: an integer variable. &#x00A0;</dd></dl>
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-107002x1">
class="enumerate" id="x13-108002x1">
<!--l. 108--><p class="noindent" >For processes in the virtual parallel machine the identifier will satisfy
0 <span
class="zplmr7y-">&#x2264; </span><span
@ -241,7 +241,7 @@ class="zplmr7m-">np</span><span
class="zplmr7y-">- </span>1;
</li>
<li
class="enumerate" id="x12-107004x2">
class="enumerate" id="x13-108004x2">
<!--l. 110--><p class="noindent" >If the user has requested on <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span> a number of processes less than
the total available in the parallel execution environment, the remaining
@ -256,7 +256,7 @@ class="cmtt-10">psb_exit</span></span></span>.</li></ol>
<h4 class="subsectionHead"><span class="titlemark">7.3 </span> <a
id="x12-1080007.3"></a>psb_exit &#8212; Exit from PSBLAS parallel environment</h4>
id="x13-1090007.3"></a>psb_exit &#8212; Exit from PSBLAS parallel environment</h4>
@ -309,7 +309,7 @@ class="newline" />Specified as: a logical variable, default value: true.</dd></d
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-108002x1">
class="enumerate" id="x13-109002x1">
<!--l. 146--><p class="noindent" >This routine may be called even if a previous call to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_info</span></span></span> has
returned with <span
@ -323,14 +323,14 @@ class="cmtt-10">ctxt</span></span></span> in this situation.
</li>
<li
class="enumerate" id="x12-108004x2">
class="enumerate" id="x13-109004x2">
<!--l. 150--><p class="noindent" >A call to this routine with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">close=.true.</span></span></span> implies a call to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">MPI_Finalize</span></span></span>,
after which no parallel routine may be called.
</li>
<li
class="enumerate" id="x12-108006x3">
class="enumerate" id="x13-109006x3">
<!--l. 152--><p class="noindent" >If the user whishes to use multiple communication contexts in the
same program, or to enter and exit multiple times into the parallel
environment, this routine may be called to selectively close the
@ -343,7 +343,7 @@ class="cmtt-10">close=.true.</span></span></span> to shutdown in a clean wa
<h4 class="subsectionHead"><span class="titlemark">7.4 </span> <a
id="x12-1090007.4"></a>psb_get_mpi_comm &#8212; Get the MPI communicator</h4>
id="x13-1100007.4"></a>psb_get_mpi_comm &#8212; Get the MPI communicator</h4>
@ -407,7 +407,7 @@ deprecated.
<h4 class="subsectionHead"><span class="titlemark">7.5 </span> <a
id="x12-1100007.5"></a>psb_get_mpi_rank &#8212; Get the MPI rank</h4>
id="x13-1110007.5"></a>psb_get_mpi_rank &#8212; Get the MPI rank</h4>
@ -491,7 +491,7 @@ class="cmtt-10">psb_get_rank</span></span></span> is still available but is depr
<h4 class="subsectionHead"><span class="titlemark">7.6 </span> <a
id="x12-1110007.6"></a>psb_wtime &#8212; Wall clock timing</h4>
id="x13-1120007.6"></a>psb_wtime &#8212; Wall clock timing</h4>
@ -525,7 +525,7 @@ class="cmtt-10">real(psb_dpk_)</span></span></span> variable.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">7.7 </span> <a
id="x12-1120007.7"></a>psb_barrier &#8212; Sinchronization point parallel environment</h4>
id="x13-1130007.7"></a>psb_barrier &#8212; Sinchronization point parallel environment</h4>
@ -564,7 +564,7 @@ class="newline" />Specified as: an integer variable.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">7.8 </span> <a
id="x12-1130007.8"></a>psb_abort &#8212; Abort a computation</h4>
id="x13-1140007.8"></a>psb_abort &#8212; Abort a computation</h4>
@ -602,7 +602,7 @@ class="newline" />Specified as: an integer variable.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">7.9 </span> <a
id="x12-1140007.9"></a>psb_bcast &#8212; Broadcast data</h4>
id="x13-1150007.9"></a>psb_bcast &#8212; Broadcast data</h4>
@ -709,8 +709,7 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 335--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 336--><p class="noindent" >
@ -746,19 +745,21 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 352--><p class="noindent" ><span
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-114002x1">
class="enumerate" id="x13-115002x1">
<!--l. 354--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument is both input and output, and its value may be changed
even on processes different from the final result destination.
</li>
<li
class="enumerate" id="x12-114004x2">
class="enumerate" id="x13-115004x2">
<!--l. 357--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> argument can be built with the bitwise <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">IOR()</span></span></span> operator; in the
@ -774,7 +775,7 @@ class="cmtt-10">request</span></span></span> argument needs not be specified:
<!--l. 371--><p class="nopar" > </div></div>
</li>
<li
class="enumerate" id="x12-114006x3">
class="enumerate" id="x13-115006x3">
<!--l. 376--><p class="noindent" >When splitting the operation in two calls, the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument <span
class="pplri7t-">must not </span>be
@ -793,7 +794,7 @@ class="pplri7t-">must not </span>be
<h4 class="subsectionHead"><span class="titlemark">7.10 </span> <a
id="x12-1150007.10"></a>psb_sum &#8212; Global sum</h4>
id="x13-1160007.10"></a>psb_sum &#8212; Global sum</h4>
@ -901,8 +902,7 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 449--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 450--><p class="noindent" >
@ -938,19 +938,21 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 466--><p class="noindent" ><span
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-115002x1">
class="enumerate" id="x13-116002x1">
<!--l. 468--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument is both input and output, and its value may be changed
even on processes different from the final result destination.
</li>
<li
class="enumerate" id="x12-115004x2">
class="enumerate" id="x13-116004x2">
<!--l. 471--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> argument can be built with the bitwise <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">IOR()</span></span></span> operator; in the
@ -966,7 +968,7 @@ class="cmtt-10">request</span></span></span> argument needs not be specified:
<!--l. 485--><p class="nopar" > </div></div>
</li>
<li
class="enumerate" id="x12-115006x3">
class="enumerate" id="x13-116006x3">
<!--l. 490--><p class="noindent" >When splitting the operation in two calls, the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument <span
class="pplri7t-">must not </span>be
@ -985,7 +987,7 @@ class="pplri7t-">must not </span>be
<h4 class="subsectionHead"><span class="titlemark">7.11 </span> <a
id="x12-1160007.11"></a>psb_max &#8212; Global maximum</h4>
id="x13-1170007.11"></a>psb_max &#8212; Global maximum</h4>
@ -1094,8 +1096,7 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 563--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 564--><p class="noindent" >
@ -1130,8 +1131,7 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
@ -1139,13 +1139,13 @@ class="cmtt-10">mode</span></span></span> does not specify synchronous completio
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-116002x1">
class="enumerate" id="x13-117002x1">
<!--l. 583--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument is both input and output, and its value may be changed
even on processes different from the final result destination.
</li>
<li
class="enumerate" id="x12-116004x2">
class="enumerate" id="x13-117004x2">
<!--l. 586--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> argument can be built with the bitwise <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">IOR()</span></span></span> operator; in the
@ -1161,7 +1161,7 @@ class="cmtt-10">request</span></span></span> argument needs not be specified:
<!--l. 600--><p class="nopar" > </div></div>
</li>
<li
class="enumerate" id="x12-116006x3">
class="enumerate" id="x13-117006x3">
<!--l. 605--><p class="noindent" >When splitting the operation in two calls, the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument <span
class="pplri7t-">must not </span>be
@ -1180,7 +1180,7 @@ class="pplri7t-">must not </span>be
<h4 class="subsectionHead"><span class="titlemark">7.12 </span> <a
id="x12-1170007.12"></a>psb_min &#8212; Global minimum</h4>
id="x13-1180007.12"></a>psb_min &#8212; Global minimum</h4>
@ -1288,8 +1288,7 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 676--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 677--><p class="noindent" >
@ -1325,19 +1324,21 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 694--><p class="noindent" ><span
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-117002x1">
class="enumerate" id="x13-118002x1">
<!--l. 696--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument is both input and output, and its value may be changed
even on processes different from the final result destination.
</li>
<li
class="enumerate" id="x12-117004x2">
class="enumerate" id="x13-118004x2">
<!--l. 699--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> argument can be built with the bitwise <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">IOR()</span></span></span> operator; in the
@ -1353,7 +1354,7 @@ class="cmtt-10">request</span></span></span> argument needs not be specified:
<!--l. 713--><p class="nopar" > </div></div>
</li>
<li
class="enumerate" id="x12-117006x3">
class="enumerate" id="x13-118006x3">
<!--l. 718--><p class="noindent" >When splitting the operation in two calls, the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument <span
class="pplri7t-">must not </span>be
@ -1372,7 +1373,7 @@ class="pplri7t-">must not </span>be
<h4 class="subsectionHead"><span class="titlemark">7.13 </span> <a
id="x12-1180007.13"></a>psb_amx &#8212; Global maximum absolute value</h4>
id="x13-1190007.13"></a>psb_amx &#8212; Global maximum absolute value</h4>
@ -1481,8 +1482,7 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 789--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 790--><p class="noindent" >
@ -1518,19 +1518,21 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 807--><p class="noindent" ><span
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-118002x1">
class="enumerate" id="x13-119002x1">
<!--l. 809--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument is both input and output, and its value may be changed
even on processes different from the final result destination.
</li>
<li
class="enumerate" id="x12-118004x2">
class="enumerate" id="x13-119004x2">
<!--l. 812--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> argument can be built with the bitwise <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">IOR()</span></span></span> operator; in the
@ -1546,7 +1548,7 @@ class="cmtt-10">request</span></span></span> argument needs not be specified:
<!--l. 826--><p class="nopar" > </div></div>
</li>
<li
class="enumerate" id="x12-118006x3">
class="enumerate" id="x13-119006x3">
<!--l. 831--><p class="noindent" >When splitting the operation in two calls, the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument <span
class="pplri7t-">must not </span>be
@ -1565,7 +1567,7 @@ class="pplri7t-">must not </span>be
<h4 class="subsectionHead"><span class="titlemark">7.14 </span> <a
id="x12-1190007.14"></a>psb_amn &#8212; Global minimum absolute value</h4>
id="x13-1200007.14"></a>psb_amn &#8212; Global minimum absolute value</h4>
@ -1674,8 +1676,7 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 902--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 903--><p class="noindent" >
@ -1711,19 +1712,21 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 920--><p class="noindent" ><span
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-119002x1">
class="enumerate" id="x13-120002x1">
<!--l. 922--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument is both input and output, and its value may be changed
even on processes different from the final result destination.
</li>
<li
class="enumerate" id="x12-119004x2">
class="enumerate" id="x13-120004x2">
<!--l. 925--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> argument can be built with the bitwise <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">IOR()</span></span></span> operator; in the
@ -1739,7 +1742,7 @@ class="cmtt-10">request</span></span></span> argument needs not be specified:
<!--l. 939--><p class="nopar" > </div></div>
</li>
<li
class="enumerate" id="x12-119006x3">
class="enumerate" id="x13-120006x3">
<!--l. 944--><p class="noindent" >When splitting the operation in two calls, the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument <span
class="pplri7t-">must not </span>be
@ -1758,7 +1761,7 @@ class="pplri7t-">must not </span>be
<h4 class="subsectionHead"><span class="titlemark">7.15 </span> <a
id="x12-1200007.15"></a>psb_nrm2 &#8212; Global 2-norm reduction</h4>
id="x13-1210007.15"></a>psb_nrm2 &#8212; Global 2-norm reduction</h4>
@ -1866,8 +1869,7 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
<!--l. 1015--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 1016--><p class="noindent" >
@ -1902,8 +1904,7 @@ class="pplb7t-">optional</span>.<br
class="newline" />Intent: <span
class="pplb7t-">inout</span>.<br
class="newline" />If <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> does not specify synchronous completion, then this variable must
be present.</dd></dl>
class="cmtt-10">mode</span></span></span> specifies non-blocking action, then this variable must be present.</dd></dl>
@ -1911,12 +1912,12 @@ class="cmtt-10">mode</span></span></span> does not specify synchronous completio
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-120002x1">
class="enumerate" id="x13-121002x1">
<!--l. 1035--><p class="noindent" >This reduction is appropriate to compute the results of multiple (local)
NRM2 operations at the same time.
</li>
<li
class="enumerate" id="x12-120004x2">
class="enumerate" id="x13-121004x2">
<!--l. 1037--><p class="noindent" >Denoting by <span
class="zplmr7m-">dat</span><sub><span
class="zplmr7m-x-x-76">i</span></sub> the value of the variable <span
@ -1933,13 +1934,13 @@ res = &#x2211; dat2i,
<!--l. 1039--><p class="nopar" > with care taken to avoid unnecessary overflow.
</li>
<li
class="enumerate" id="x12-120006x3">
class="enumerate" id="x13-121006x3">
<!--l. 1041--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument is both input and output, and its value may be changed
even on processes different from the final result destination.
</li>
<li
class="enumerate" id="x12-120008x4">
class="enumerate" id="x13-121008x4">
<!--l. 1044--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">mode</span></span></span> argument can be built with the bitwise <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">IOR()</span></span></span> operator; in the
@ -1955,7 +1956,7 @@ class="cmtt-10">request</span></span></span> argument needs not be specified:
<!--l. 1058--><p class="nopar" > </div></div>
</li>
<li
class="enumerate" id="x12-120010x5">
class="enumerate" id="x13-121010x5">
<!--l. 1063--><p class="noindent" >When splitting the operation in two calls, the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">dat</span></span></span> argument <span
class="pplri7t-">must not </span>be
@ -1977,7 +1978,7 @@ class="pplri7t-">must not </span>be
<h4 class="subsectionHead"><span class="titlemark">7.16 </span> <a
id="x12-1210007.16"></a>psb_snd &#8212; Send data</h4>
id="x13-1220007.16"></a>psb_snd &#8212; Send data</h4>
@ -2091,7 +2092,7 @@ class="description">
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-121002x1">
class="enumerate" id="x13-122002x1">
<!--l. 1134--><p class="noindent" >This subroutine implies a synchronization, but only between the calling
process and the destination process <span
class="zplmr7m-">dst</span>.</li></ol>
@ -2099,7 +2100,7 @@ class="zplmr7m-">dst</span>.</li></ol>
<h4 class="subsectionHead"><span class="titlemark">7.17 </span> <a
id="x12-1220007.17"></a>psb_rcv &#8212; Receive data</h4>
id="x13-1230007.17"></a>psb_rcv &#8212; Receive data</h4>
@ -2213,7 +2214,7 @@ class="zplmr7m-">m </span>is not specified, size
class="pplb7t-x-x-120">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x12-122002x1">
class="enumerate" id="x13-123002x1">
<!--l. 1185--><p class="noindent" >This subroutine implies a synchronization, but only between the calling
process and the source process <span
class="zplmr7m-">src</span>.</li></ol>

@ -17,7 +17,7 @@ href="userhtmlse7.html#tailuserhtmlse7.html" >prev-tail</a>] [<a
href="userhtmlse5.html#tailuserhtmlse8.html">tail</a>] [<a
href="userhtml.html#userhtmlse11.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">8 </span> <a
id="x13-1230008"></a>Error handling</h3>
id="x14-1240008"></a>Error handling</h3>
<!--l. 5--><p class="noindent" >The PSBLAS library error handling policy has been completely rewritten in version
2.0. The idea behind the design of this new error handling strategy is to keep error
messages on a stack allowing the user to trace back up to the point where the first
@ -36,7 +36,7 @@ zero, an error condition is raised. This process continues on all the levels of
nested calls until the level where the user decides to abort the program
execution.
<!--l. 23--><p class="indent" > Figure&#x00A0;<a
href="#x13-123025r5">5<!--tex4ht:ref: fig:routerr --></a> shows the layout of a generic <span class="obeylines-h"><span class="verb"><span
href="#x14-124025r5">5<!--tex4ht:ref: fig:routerr --></a> shows the layout of a generic <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_foo</span></span></span> routine with respect to the
PSBLAS-2.0 error handling policy. It is possible to see how, whenever an error
condition is detected, the <span class="obeylines-h"><span class="verb"><span
@ -58,7 +58,7 @@ explicitly.
<!--l. 40--><p class="indent" > <a
id="x13-123025r5"></a><hr class="float"><div class="float"
id="x14-124025r5"></a><hr class="float"><div class="float"
>
@ -68,7 +68,7 @@ explicitly.
<!--l. 101--><p class="noindent" >
<div class="fbox"><div class="minipage"><!--l. 72-->
<pre class="lstlisting" id="listing-154"><span class="label"><a
id="x13-123001r1"></a></span><span style="color:#000000"><span
id="x14-124001r1"></a></span><span style="color:#000000"><span
class="cmtt-9">subroutine</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_foo</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
@ -78,13 +78,13 @@ class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="c
class="cmtt-9">info</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span>
<span class="label"><a
id="x13-123002r2"></a></span><span
id="x14-124002r2"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">...</span></span>
<span class="label"><a
id="x13-123003r3"></a></span><span
id="x14-124003r3"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
@ -95,7 +95,7 @@ class="cmtt-9">detected</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">then</span></span>
<span class="label"><a
id="x13-123004r4"></a></span><span
id="x14-124004r4"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -105,7 +105,7 @@ class="cmtt-9">info</span></span><span style="color:#000000"><span
class="cmtt-9">=</span></span><span style="color:#000000"><span
class="cmtt-9">errcode1</span></span>
<span class="label"><a
id="x13-123005r5"></a></span><span
id="x14-124005r5"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -121,7 +121,7 @@ class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="c
class="cmtt-9">errcode1</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span>
<span class="label"><a
id="x13-123006r6"></a></span><span
id="x14-124006r6"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -130,20 +130,20 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">9999</span></span>
<span class="label"><a
id="x13-123007r7"></a></span><span
id="x14-124007r7"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">if</span></span>
<span class="label"><a
id="x13-123008r8"></a></span><span
id="x14-124008r8"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">...</span></span>
<span class="label"><a
id="x13-123009r9"></a></span><span
id="x14-124009r9"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
@ -156,7 +156,7 @@ class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="c
class="cmtt-9">info</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span>
<span class="label"><a
id="x13-123010r10"></a></span><span
id="x14-124010r10"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
@ -170,7 +170,7 @@ class="cmtt-9">zero</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">then</span></span>
<span class="label"><a
id="x13-123011r11"></a></span><span
id="x14-124011r11"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -180,7 +180,7 @@ class="cmtt-9">info</span></span><span style="color:#000000"><span
class="cmtt-9">=</span></span><span style="color:#000000"><span
class="cmtt-9">errcode2</span></span>
<span class="label"><a
id="x13-123012r12"></a></span><span
id="x14-124012r12"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -196,7 +196,7 @@ class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="c
class="cmtt-9">errcode2</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span>
<span class="label"><a
id="x13-123013r13"></a></span><span
id="x14-124013r13"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -205,24 +205,24 @@ class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">9999</span></span>
<span class="label"><a
id="x13-123014r14"></a></span><span
id="x14-124014r14"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">if</span></span>
<span class="label"><a
id="x13-123015r15"></a></span><span
id="x14-124015r15"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">...</span></span>
<span class="label"><a
id="x13-123016r16"></a></span><span style="color:#000000"><span
id="x14-124016r16"></a></span><span style="color:#000000"><span
class="cmtt-9">9999</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">continue</span></span>
<span class="label"><a
id="x13-123017r17"></a></span><span
id="x14-124017r17"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
@ -236,7 +236,7 @@ class="cmtt-9">act_abort</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">then</span></span>
<span class="label"><a
id="x13-123018r18"></a></span><span
id="x14-124018r18"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
@ -247,36 +247,36 @@ class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">icontxt</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span>
<span class="label"><a
id="x13-123019r19"></a></span><span
id="x14-124019r19"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">return</span></span>
<span class="label"><a
id="x13-123020r20"></a></span><span
id="x14-124020r20"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">else</span></span>
<span class="label"><a
id="x13-123021r21"></a></span><span
id="x14-124021r21"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">return</span></span>
<span class="label"><a
id="x13-123022r22"></a></span><span
id="x14-124022r22"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">if</span></span>
<span class="label"><a
id="x13-123023r23"></a></span>
id="x14-124023r23"></a></span>
<span class="label"><a
id="x13-123024r24"></a></span><span style="color:#000000"><span
id="x14-124024r24"></a></span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">subroutine</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_foo</span></span></pre></div></div>
@ -286,13 +286,13 @@ class="cmtt-9">psb_foo</span></span></pre></div></div>
class="content">The layout of a generic <span
class="cmtt-10">psb</span><span
class="cmtt-10">_foo </span>routine with respect to PSBLAS-2.0
error handling policy.</span></div><!--tex4ht:label?: x13-123025r5 -->
error handling policy.</span></div><!--tex4ht:label?: x14-124025r5 -->
</div><hr class="endfloat" />
<!--l. 112--><p class="indent" > Figure&#x00A0;<a
href="#x13-123026r6">6<!--tex4ht:ref: fig:errormsg --></a> reports a sample error message generated by the PSBLAS-2.0
href="#x14-124026r6">6<!--tex4ht:ref: fig:errormsg --></a> reports a sample error message generated by the PSBLAS-2.0
library. This error has been generated by the fact that the user has chosen the
invalid &#8220;FOO&#8221; storage format to represent the sparse matrix. From this
error message it is possible to see that the error has been detected inside
@ -304,7 +304,7 @@ process).
<!--l. 120--><p class="indent" > <a
id="x13-123026r6"></a><hr class="float"><div class="float"
id="x14-124026r6"></a><hr class="float"><div class="float"
>
@ -333,7 +333,7 @@ Aborting...
<br /> <div class="caption"
><span class="id">Listing 6: </span><span
class="content">A sample PSBLAS-3.0 error message. Process 0 detected an error
condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x13-123026r6 -->
condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x14-124026r6 -->
@ -342,10 +342,10 @@ condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x13-1230
<h4 class="subsectionHead"><span class="titlemark">8.1 </span> <a
id="x13-1240008.1"></a>psb_errpush &#8212; Pushes an error code onto the error stack</h4>
id="x14-1250008.1"></a>psb_errpush &#8212; Pushes an error code onto the error stack</h4>
<!--l. 174-->
<pre class="lstlisting" id="listing-155"><span class="label"><a
id="x13-124001r1"></a></span><span style="color:#000000"><span
id="x14-125001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_errpush</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -433,10 +433,10 @@ class="newline" /></dd></dl>
<h4 class="subsectionHead"><span class="titlemark">8.2 </span> <a
id="x13-1250008.2"></a>psb_error &#8212; Prints the error stack content and aborts execution</h4>
id="x14-1260008.2"></a>psb_error &#8212; Prints the error stack content and aborts execution</h4>
<!--l. 204-->
<pre class="lstlisting" id="listing-156"><span class="label"><a
id="x13-125001r1"></a></span><span style="color:#000000"><span
id="x14-126001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_error</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -473,10 +473,10 @@ class="newline" />Specified as: an integer.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">8.3 </span> <a
id="x13-1260008.3"></a>psb_set_errverbosity &#8212; Sets the verbosity of error messages</h4>
id="x14-1270008.3"></a>psb_set_errverbosity &#8212; Sets the verbosity of error messages</h4>
<!--l. 224-->
<pre class="lstlisting" id="listing-157"><span class="label"><a
id="x13-126001r1"></a></span><span style="color:#000000"><span
id="x14-127001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_set_errverbosity</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -513,11 +513,11 @@ class="newline" />Specified as: an integer.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">8.4 </span> <a
id="x13-1270008.4"></a>psb_set_erraction &#8212; Set the type of action to be taken upon error
id="x14-1280008.4"></a>psb_set_erraction &#8212; Set the type of action to be taken upon error
condition</h4>
<!--l. 241-->
<pre class="lstlisting" id="listing-158"><span class="label"><a
id="x13-127001r1"></a></span><span style="color:#000000"><span
id="x14-128001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_set_erraction</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span

@ -17,7 +17,7 @@ href="userhtmlse8.html#tailuserhtmlse8.html" >prev-tail</a>] [<a
href="userhtmlse6.html#tailuserhtmlse9.html">tail</a>] [<a
href="userhtml.html#userhtmlse12.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">9 </span> <a
id="x14-1280009"></a>Utilities</h3>
id="x15-1290009"></a>Utilities</h3>
<!--l. 4--><p class="noindent" >We have some utilities available for input and output of sparse matrices; the
interfaces to these routines are available in the module <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_util_mod</span></span></span>.
@ -25,11 +25,11 @@ class="cmtt-10">psb_util_mod</span></span></span>.
<h4 class="subsectionHead"><span class="titlemark">9.1 </span> <a
id="x14-1290009.1"></a> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing
id="x15-1300009.1"></a> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing
format</h4>
<!--l. 16-->
<pre class="lstlisting" id="listing-159"><span class="label"><a
id="x14-129001r1"></a></span><span style="color:#000000"><span
id="x15-130001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">hb_read</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -139,11 +139,11 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">9.2 </span> <a
id="x14-1300009.2"></a>hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing
id="x15-1310009.2"></a>hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing
format</h4>
<!--l. 59-->
<pre class="lstlisting" id="listing-160"><span class="label"><a
id="x14-130001r1"></a></span><span style="color:#000000"><span
id="x15-131001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">hb_write</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -265,11 +265,11 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">9.3 </span> <a
id="x14-1310009.3"></a>mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket
id="x15-1320009.3"></a>mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket
format</h4>
<!--l. 111-->
<pre class="lstlisting" id="listing-161"><span class="label"><a
id="x14-131001r1"></a></span><span style="color:#000000"><span
id="x15-132001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mm_mat_read</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -354,11 +354,11 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">9.4 </span> <a
id="x14-1320009.4"></a>mm_array_read &#8212; Read a dense array from a file in the MatrixMarket
id="x15-1330009.4"></a>mm_array_read &#8212; Read a dense array from a file in the MatrixMarket
format</h4>
<!--l. 142-->
<pre class="lstlisting" id="listing-162"><span class="label"><a
id="x14-132001r1"></a></span><span style="color:#000000"><span
id="x15-133001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mm_array_read</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -449,11 +449,11 @@ class="newline" />An integer value; 0 means no error has been detected.</dd></dl
<h4 class="subsectionHead"><span class="titlemark">9.5 </span> <a
id="x14-1330009.5"></a>mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket
id="x15-1340009.5"></a>mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket
format</h4>
<!--l. 179-->
<pre class="lstlisting" id="listing-163"><span class="label"><a
id="x14-133001r1"></a></span><span style="color:#000000"><span
id="x15-134001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mm_mat_write</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
@ -582,11 +582,11 @@ psb_i_t&#x00A0;psb_c_&#x003C;s,d,c,z&#x003E;global_mat_write(ah,cdh);
<h4 class="subsectionHead"><span class="titlemark">9.6 </span> <a
id="x14-1340009.6"></a>mm_array_write &#8212; Write a dense array from a file in the MatrixMarket
id="x15-1350009.6"></a>mm_array_write &#8212; Write a dense array from a file in the MatrixMarket
format</h4>
<!--l. 261-->
<pre class="lstlisting" id="listing-165"><span class="label"><a
id="x14-134001r1"></a></span><span style="color:#000000"><span
id="x15-135001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mm_array_write</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span

File diff suppressed because one or more lines are too long

@ -151,9 +151,9 @@ Lawson, C., Hanson, R., Kincaid, D. and Krogh, F.,
of partial differential equations.}
{ACM Trans. Math. Softw.} vol.~{23}, 32--49.
\bibitem{metcalf}
{Metcalf, M., Reid, J. and Cohen, M.}
{\em Fortran 95/2003 explained.}
{Oxford University Press}, 2004.
{Metcalf, M., Reid, J., Cohen, M., Bader, R.}
{\em Modern Fortran explained.}
{Oxford University Press}, 2024.
%
\bibitem{MRC:11}
{Metcalf, M., Reid, J. and Cohen, M.}

@ -203,13 +203,13 @@ Get number of devices available on current computing node.
\ifpdf
\begin{minted}[breaklines=true]{fortran}
ngpus = psb_cuda_getDevice()
dev = psb_cuda_getDevice()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
ngpus = psb_cuda_getDevice()
dev = psb_cuda_getDevice()
\end{verbatim}
\end{minipage}
\end{center}

@ -1,3 +1,60 @@
\section*{Preface}
\addcontentsline{toc}{section}{Preface}
This manual describes the main features of PSBLAS, a library for
parallel sparse computations that has been developed over a number of
years.
Our work has been mainly devoted to providing a foundational toolkit
on which many algorithms can be implemented; the toolkit has proven
its effectiveness and flexibility in many ways.
The PSBLAS component deals mostly with the computational kernels and
environment handling; it supports computations on normal CPUs,
including the usage of OpenMP for parallellizing across multiple
cores.
This foundational package provides linear solvers and some very
simple preconditioners; the companion package AMG4PSBLAS explores how
to use the base toolkit to build much more sophisticated
preconditioners which can be plugged seamlessly into the base solvers.
The software architecture allows us to offer support for many
alternatives in the implementation, including usage of
heterogeneous platforms, and computations performed on GPUs throuh
CUDA.
There is support for GPU computations through OpenACC, but it is at
this time a highly experimental version; we plan to also look at using
accelerators through OpenMP as support from compilers improves.
The project is lead by Salvatore Filippone; a number of people have been contributing to this package over the
years; contributors in roughly reverse chronological order:
\begin{obeylines}
Theophane Loloum
Fabio Durastante
Dimitri Walther
Andea Di Iorio
Stefano Petrilli
Soren Rasmussen
Zaak Beekman
Ambra Abdullahi Hassan
Pasqua D'Ambra
Alfredo Buttari
Daniela di Serafino
Michele Martone
Michele Colajanni
Fabio Cerioni
Stefano Maiolatesi
Dario Pascucci
\end{obeylines}
\begin{flushright}
Salvatore Filippone\\
Alfredo Buttari\\
Fabio Durastante
\end{flushright}
\clearpage
\section{Introduction}\label{sec:intro}
The PSBLAS library, developed with the aim to facilitate the
@ -12,19 +69,20 @@ addresses a distributed memory execution model operating with message
passing.
The PSBLAS library version 3 is implemented in
the Fortran~2003~\cite{metcalf} programming language, with reuse and/or
the Fortran~2008~\cite{metcalf} programming language, with reuse and/or
adaptation of existing Fortran~77 and Fortran~95 software, plus a
handful of C routines.
The use of Fortran~2003 offers a number of advantages over Fortran~95,
The use of Fortran~2008 offers a number of advantages over Fortran~95,
mostly in the handling of requirements for evolution and adaptation of
the library to new computing architectures and integration of
new algorithms.
For a detailed discussion of our design see~\cite{Sparse03}; other
works discussing advanced programming in Fortran~2003
works discussing advanced programming in Fortran~2008
include~\cite{DesPat:11,RouXiaXu:11}; sufficient support for
Fortran~2003 is now available from many compilers, including the GNU
Fortran compiler from the Free Software Foundation (as of version 4.8).
Fortran~2008 is now available from many compilers, including recent
versions of the GNU Fortran compiler from the Free Software
Foundation, and the FLANG compiler from the LLVM project.
Previous approaches have been based on mixing Fortran~95, with its
@ -83,7 +141,7 @@ influenced by the structure of the ScaLAPACK parallel
library. The layered structure of the PSBLAS library
is shown in figure~\ref{fig:psblas}; lower layers of the library
indicate an encapsulation relationship with upper layers. The ongoing
discussion focuses on the Fortran~2003 layer immediately below the
discussion focuses on the Fortran~2008 layer immediately below the
application layer.
The serial parts of the computation on each process are executed through
calls to the serial sparse BLAS subroutines.
@ -257,7 +315,7 @@ systems solution for block diagonal matrices;
\item Sparse matrix and data distribution preprocessing.
\end{itemize}
\item[Preconditioner routines]
\item[Iterative methods] a subset of Krylov subspace iterative
\item[Iterative methods] a subset of classical and Krylov subspace iterative
methods
\end{description}
The following naming scheme has been adopted for all the symbols

@ -327,7 +327,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -345,7 +345,7 @@ Type, kind, rank and size must agree on all processes.
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -442,7 +442,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -459,7 +459,7 @@ Type, kind, rank and size must agree on all processes.
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -555,7 +555,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -573,7 +573,7 @@ Type, kind, rank and size must agree on all processes.
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -668,7 +668,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -686,7 +686,7 @@ Type, kind, rank and size must agree on all processes.
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -781,7 +781,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -799,7 +799,7 @@ Type, kind, rank and size must agree on all processes.
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -894,7 +894,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -912,7 +912,7 @@ Type, kind, rank and size must agree on all processes.
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -1007,7 +1007,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}
@ -1025,7 +1025,7 @@ Kind, rank and size must agree on all processes.
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf inout}.\\
If \verb|mode| does not specify synchronous completion, then this
If \verb|mode| specifies non-blocking action, then this
variable must be present.
\end{description}

@ -314,7 +314,8 @@ An integer value; 0 means no error has been detected.
\end{description}
{\par\noindent\large\bfseries Notes}
This method is almost always called by the iterative methods of
Sec.~\ref{sec:methods}, and practically never directly by the user.
Sec.~\ref{sec:methods}; it is extremely unlikely to be needed directly
by the application developer.
\clearpage\subsection{descr --- Prints a description of current

@ -134,8 +134,9 @@
\flushright
{\bfseries
by Salvatore Filippone\\
and Alfredo Buttari}\\
Aug 1st, 2024
Alfredo Buttari \\
Fabio Durastante}\\
Jun 1st, 2025
\end{minipage}}
}
%\addtolength{\textwidth}{\centeroffset}

@ -99,13 +99,14 @@
{\LARGE\bfseries PSBLAS\\[.8ex] User's and Reference
Guide}\\[\baselineskip]
\emph{\large A reference guide for the Parallel Sparse BLAS library}\\[3ex]
{\bfseries Salvatore Filippone\\
Alfredo Buttari } \\
{\bfseries by Salvatore Filippone\\
Alfredo Buttari \\
Fabio Durastante } \\
%\\[10ex]
%\today
Software version: 3.9.0\\
%\today
Aug 1st, 2024
Jun 1st, 2025
\cleardoublepage
\begingroup
\renewcommand*{\thepage}{toc}

Loading…
Cancel
Save