Merge branch 'development' into cmake

cmake
sfilippone 1 year ago
commit a9685267a4

@ -26,6 +26,7 @@ MPFC=@MPIFC@
MPCC=@MPICC@ MPCC=@MPICC@
FLINK=@FLINK@ FLINK=@FLINK@
CLINK=@CLINK@
LIBS=@LIBS@ LIBS=@LIBS@
FLIBS=@FLIBS@ FLIBS=@FLIBS@

@ -20,7 +20,8 @@ EXEDIR=./runs
all: pdegen3dc all: pdegen3dc
pdegen3dc: pdegen3dc.o pdegen3dc: pdegen3dc.o
$(MPFC) pdegen3dc.o -o pdegen3dc $(PSBC_LIBS) $(PSB_LIBS) $(PSBLDLIBS) -lm -lgfortran $(FLINK) pdegen3dc.o -o pdegen3dc $(PSBC_LIBS) $(PSB_LIBS)\
$(PSBLDLIBS) -lm
/bin/mv pdegen3dc $(EXEDIR) /bin/mv pdegen3dc $(EXEDIR)

2344
configure vendored

File diff suppressed because it is too large Load Diff

@ -576,6 +576,7 @@ FDEFINES="$psblas_cv_define_prepend-DPSB_LPK${pac_cv_lpk_size} $FDEFINES";
dnl CDEFINES="-DPSB_IPK${pac_cv_ipk_size} -DPSB_LPK${pac_cv_lpk_size} $CDEFINES" dnl CDEFINES="-DPSB_IPK${pac_cv_ipk_size} -DPSB_LPK${pac_cv_lpk_size} $CDEFINES"
FLINK="$MPIFC" FLINK="$MPIFC"
CLINK="$MPICC"
PAC_ARG_OPENMP() PAC_ARG_OPENMP()
if test x"$pac_cv_openmp" == x"yes" ; then if test x"$pac_cv_openmp" == x"yes" ; then
FDEFINES="$psblas_cv_define_prepend-DPSB_OPENMP $FDEFINES"; FDEFINES="$psblas_cv_define_prepend-DPSB_OPENMP $FDEFINES";
@ -585,6 +586,7 @@ if test x"$pac_cv_openmp" == x"yes" ; then
CCOPT="$CCOPT $pac_cv_openmp_ccopt"; CCOPT="$CCOPT $pac_cv_openmp_ccopt";
CXXOPT="$CXXOPT $pac_cv_openmp_cxxopt"; CXXOPT="$CXXOPT $pac_cv_openmp_cxxopt";
FLINK="$FLINK $pac_cv_openmp_fcopt"; FLINK="$FLINK $pac_cv_openmp_fcopt";
CLINK="$CLINK $pac_cv_openmp_fcopt";
fi fi
# #
# Tests for support of various Fortran features; some of them are critical, # Tests for support of various Fortran features; some of them are critical,
@ -1004,6 +1006,7 @@ AC_SUBST(FIFLAG)
AC_SUBST(FMFLAG) AC_SUBST(FMFLAG)
AC_SUBST(MODEXT) AC_SUBST(MODEXT)
AC_SUBST(FLINK) AC_SUBST(FLINK)
AC_SUBST(CLINK)
AC_SUBST(LIBS) AC_SUBST(LIBS)
AC_SUBST(FLIBS) AC_SUBST(FLIBS)
AC_SUBST(AR) AC_SUBST(AR)

@ -23,7 +23,7 @@ class="pplb7t-">Alfredo Buttari </span><br
class="newline" /><span class="newline" /><span
class="pplb7t-">Fabio Durastante </span><br class="pplb7t-">Fabio Durastante </span><br
class="newline" />Software version: 3.9.0<br class="newline" />Software version: 3.9.0<br
class="newline" />Jun 1st, 2025 class="newline" />June 9th, 2025

@ -23,7 +23,7 @@ class="pplb7t-">Alfredo Buttari </span><br
class="newline" /><span class="newline" /><span
class="pplb7t-">Fabio Durastante </span><br class="pplb7t-">Fabio Durastante </span><br
class="newline" />Software version: 3.9.0<br class="newline" />Software version: 3.9.0<br
class="newline" />Jun 1st, 2025 class="newline" />June 9th, 2025

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

@ -11,7 +11,7 @@
</head><body </head><body
> >
<div class="footnote-text"> <div class="footnote-text">
<!--l. 209--><p class="indent" > <span class="footnote-mark"><a <!--l. 208--><p class="indent" > <span class="footnote-mark"><a
id="fn1x0"><a id="fn1x0"><a
id="x6-4003x2"></a> <sup class="textsuperscript">1</sup></a></span><span id="x6-4003x2"></a> <sup class="textsuperscript">1</sup></a></span><span
class="pplr7t-x-x-80">In our prototype implementation we provide sample scatter/gather routines.</span></div> class="pplr7t-x-x-80">In our prototype implementation we provide sample scatter/gather routines.</span></div>

@ -11,7 +11,7 @@
</head><body </head><body
> >
<div class="footnote-text"> <div class="footnote-text">
<!--l. 253--><p class="noindent" ><span class="footnote-mark"><a <!--l. 252--><p class="noindent" ><span class="footnote-mark"><a
id="fn2x0"><a id="fn2x0"><a
id="x7-5002x2.1"></a> <sup class="textsuperscript">2</sup></a></span><span id="x7-5002x2.1"></a> <sup class="textsuperscript">2</sup></a></span><span
class="pplr7t-x-x-80">This is the normal situation when the pattern of the sparse matrix is symmetric, which is equivalent to</span> class="pplr7t-x-x-80">This is the normal situation when the pattern of the sparse matrix is symmetric, which is equivalent to</span>

@ -11,7 +11,7 @@
</head><body </head><body
> >
<div class="footnote-text"> <div class="footnote-text">
<!--l. 421--><p class="noindent" ><span class="footnote-mark"><a <!--l. 420--><p class="noindent" ><span class="footnote-mark"><a
id="fn3x0"><a id="fn3x0"><a
id="x8-7020x3"></a> <sup class="textsuperscript">3</sup></a></span><span id="x8-7020x3"></a> <sup class="textsuperscript">3</sup></a></span><span
class="pplr7t-x-x-80">The subroutine style </span><span class="pplr7t-x-x-80">The subroutine style </span><span

@ -41,8 +41,8 @@ improves.
<!--l. 28--><p class="indent" > The project is lead by Salvatore Filippone; a number of people have been <!--l. 28--><p class="indent" > The project is lead by Salvatore Filippone; a number of people have been
contributing to this package over the years; contributors in roughly reverse contributing to this package over the years; contributors in roughly reverse
chronological order: <span class="obeylines-h"> chronological order: <span class="obeylines-h">
<br />Luca Pepè Sciarria
<br />Theophane Loloum <br />Theophane Loloum
<br />Fabio Durastante
<br />Dimitri Walther <br />Dimitri Walther
<br />Andea Di Iorio <br />Andea Di Iorio
<br />Stefano Petrilli <br />Stefano Petrilli
@ -50,7 +50,6 @@ chronological order: <span class="obeylines-h">
<br />Zaak Beekman <br />Zaak Beekman
<br />Ambra Abdullahi Hassan <br />Ambra Abdullahi Hassan
<br />Pasqua D&#8217;Ambra <br />Pasqua D&#8217;Ambra
<br />Alfredo Buttari
<br />Daniela di Serafino <br />Daniela di Serafino
<br />Michele Martone <br />Michele Martone
<br />Michele Colajanni <br />Michele Colajanni
@ -59,11 +58,11 @@ chronological order: <span class="obeylines-h">
<br />Dario Pascucci</span> <br />Dario Pascucci</span>
<div class="flushright" <div class="flushright"
> >
<!--l. 48--><p class="noindent" >
Salvatore Filippone<br />
<!--l. 49--><p class="noindent" >
Salvatore Filippone<br />
Alfredo Buttari<br /> Alfredo Buttari<br />
Fabio Durastante</div> Fabio Durastante</div>
@ -72,12 +71,12 @@ Fabio Durastante</div>
<!--l. 58--><div class="crosslinks"><p class="noindent">[<a <!--l. 57--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse1.html" >next</a>] [<a href="userhtmlse1.html" >next</a>] [<a
href="userhtmlli1.html" >prev</a>] [<a href="userhtmlli1.html" >prev</a>] [<a
href="userhtmlli1.html#tailuserhtmlli1.html" >prev-tail</a>] [<a href="userhtmlli1.html#tailuserhtmlli1.html" >prev-tail</a>] [<a
href="userhtmlli2.html" >front</a>] [<a href="userhtmlli2.html" >front</a>] [<a
href="userhtml.html#userhtmlli2.html" >up</a>] </p></div> href="userhtml.html#userhtmlli2.html" >up</a>] </p></div>
<!--l. 58--><p class="indent" > <a <!--l. 57--><p class="indent" > <a
id="tailuserhtmlli2.html"></a> id="tailuserhtmlli2.html"></a>
</body></html> </body></html>

@ -10,7 +10,7 @@
<link rel="stylesheet" type="text/css" href="userhtml.css"> <link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body </head><body
> >
<!--l. 58--><div class="crosslinks"><p class="noindent">[<a <!--l. 57--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse2.html" >next</a>] [<a href="userhtmlse2.html" >next</a>] [<a
href="userhtmlli2.html" >prev</a>] [<a href="userhtmlli2.html" >prev</a>] [<a
href="userhtmlli2.html#tailuserhtmlli2.html" >prev-tail</a>] [<a href="userhtmlli2.html#tailuserhtmlli2.html" >prev-tail</a>] [<a
@ -18,7 +18,7 @@ href="#tailuserhtmlse1.html">tail</a>] [<a
href="userhtml.html#userhtmlse1.html" >up</a>] </p></div> href="userhtml.html#userhtmlse1.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">1 </span> <a <h3 class="sectionHead"><span class="titlemark">1 </span> <a
id="x4-30001"></a>Introduction</h3> id="x4-30001"></a>Introduction</h3>
<!--l. 60--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of <!--l. 59--><p class="noindent" >The PSBLAS library, developed with the aim to facilitate the parallelization of
computationally intensive scientific applications, is designed to address parallel computationally intensive scientific applications, is designed to address parallel
implementation of iterative solvers for sparse linear systems through the implementation of iterative solvers for sparse linear systems through the
distributed memory paradigm. It includes routines for multiplying sparse distributed memory paradigm. It includes routines for multiplying sparse
@ -27,11 +27,11 @@ diagonal entries, preprocessing sparse matrices, and contains additional
routines for dense matrix operations. The current implementation of PSBLAS routines for dense matrix operations. The current implementation of PSBLAS
addresses a distributed memory execution model operating with message addresses a distributed memory execution model operating with message
passing. passing.
<!--l. 71--><p class="indent" > The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2008&#x00A0;<span class="cite">[<a <!--l. 70--><p class="indent" > The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2008&#x00A0;<span class="cite">[<a
href="userhtmlli3.html#Xmetcalf">17</a>]</span> href="userhtmlli3.html#Xmetcalf">17</a>]</span>
programming language, with reuse and/or adaptation of existing Fortran&#x00A0;77 and programming language, with reuse and/or adaptation of existing Fortran&#x00A0;77 and
Fortran&#x00A0;95 software, plus a handful of C routines. Fortran&#x00A0;95 software, plus a handful of C routines.
<!--l. 76--><p class="indent" > The use of Fortran&#x00A0;2008 offers a number of advantages over Fortran&#x00A0;95, mostly <!--l. 75--><p class="indent" > The use of Fortran&#x00A0;2008 offers a number of advantages over Fortran&#x00A0;95, mostly
in the handling of requirements for evolution and adaptation of the library to new in the handling of requirements for evolution and adaptation of the library to new
computing architectures and integration of new algorithms. For a detailed computing architectures and integration of new algorithms. For a detailed
discussion of our design see&#x00A0;<span class="cite">[<a discussion of our design see&#x00A0;<span class="cite">[<a
@ -42,7 +42,7 @@ href="userhtmlli3.html#XRouXiaXu:11">19</a>]</span>; sufficient support for Fort
from many compilers, including recent versions of the GNU Fortran compiler from from many compilers, including recent versions of the GNU Fortran compiler from
the Free Software Foundation, and the FLANG compiler from the LLVM the Free Software Foundation, and the FLANG compiler from the LLVM
project. project.
<!--l. 88--><p class="indent" > Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for <!--l. 87--><p class="indent" > Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
object-based design, with other languages; these have been advocated by a number object-based design, with other languages; these have been advocated by a number
of authors, e.g.&#x00A0;<span class="cite">[<a of authors, e.g.&#x00A0;<span class="cite">[<a
href="userhtmlli3.html#Xmachiels">16</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory href="userhtmlli3.html#Xmachiels">16</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
@ -50,7 +50,7 @@ management and interface overloading greatly enhance the usability of the PSBLAS
subroutines. In this way, the library can take care of runtime memory requirements subroutines. In this way, the library can take care of runtime memory requirements
that are quite difficult or even impossible to predict at implementation or that are quite difficult or even impossible to predict at implementation or
compilation time. compilation time.
<!--l. 98--><p class="indent" > The presentation of the PSBLAS library follows the general structure of the <!--l. 97--><p class="indent" > The presentation of the PSBLAS library follows the general structure of the
proposal for serial Sparse BLAS&#x00A0;<span class="cite">[<a proposal for serial Sparse BLAS&#x00A0;<span class="cite">[<a
href="userhtmlli3.html#Xsblas97">8</a>,&#x00A0;<a href="userhtmlli3.html#Xsblas97">8</a>,&#x00A0;<a
href="userhtmlli3.html#Xsblas02">9</a>]</span>, which in its turn is based on the proposal for href="userhtmlli3.html#Xsblas02">9</a>]</span>, which in its turn is based on the proposal for
@ -58,7 +58,7 @@ BLAS on dense matrices&#x00A0;<span class="cite">[<a
href="userhtmlli3.html#XBLAS1">15</a>,&#x00A0;<a href="userhtmlli3.html#XBLAS1">15</a>,&#x00A0;<a
href="userhtmlli3.html#XBLAS2">5</a>,&#x00A0;<a href="userhtmlli3.html#XBLAS2">5</a>,&#x00A0;<a
href="userhtmlli3.html#XBLAS3">6</a>]</span>. href="userhtmlli3.html#XBLAS3">6</a>]</span>.
<!--l. 103--><p class="indent" > The applicability of sparse iterative solvers to many different areas causes <!--l. 102--><p class="indent" > The applicability of sparse iterative solvers to many different areas causes
some terminology problems because the same concept may be denoted some terminology problems because the same concept may be denoted
through different names depending on the application area. The PSBLAS through different names depending on the application area. The PSBLAS
features presented in this document will be discussed referring to a finite features presented in this document will be discussed referring to a finite
@ -67,7 +67,7 @@ the scope of the library is wider than that: for example, it can be applied
to finite element discretizations of PDEs, and even to different classes of to finite element discretizations of PDEs, and even to different classes of
problems such as nonlinear optimization, for example in optimal control problems such as nonlinear optimization, for example in optimal control
problems. problems.
<!--l. 113--><p class="indent" > The design of a solver for sparse linear systems is driven by many conflicting <!--l. 112--><p class="indent" > The design of a solver for sparse linear systems is driven by many conflicting
objectives, such as limiting occupation of storage resources, exploiting regularities in objectives, such as limiting occupation of storage resources, exploiting regularities in
the input data, exploiting hardware characteristics of the parallel platform. To the input data, exploiting hardware characteristics of the parallel platform. To
@ -88,12 +88,12 @@ applications.
<!--l. 130--><div class="crosslinks"><p class="noindent">[<a <!--l. 129--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse2.html" >next</a>] [<a href="userhtmlse2.html" >next</a>] [<a
href="userhtmlli2.html" >prev</a>] [<a href="userhtmlli2.html" >prev</a>] [<a
href="userhtmlli2.html#tailuserhtmlli2.html" >prev-tail</a>] [<a href="userhtmlli2.html#tailuserhtmlli2.html" >prev-tail</a>] [<a
href="userhtmlse1.html" >front</a>] [<a href="userhtmlse1.html" >front</a>] [<a
href="userhtml.html#userhtmlse1.html" >up</a>] </p></div> href="userhtml.html#userhtmlse1.html" >up</a>] </p></div>
<!--l. 130--><p class="indent" > <a <!--l. 129--><p class="indent" > <a
id="tailuserhtmlse1.html"></a> id="tailuserhtmlse1.html"></a>
</body></html> </body></html>

@ -10,7 +10,7 @@
<link rel="stylesheet" type="text/css" href="userhtml.css"> <link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body </head><body
> >
<!--l. 130--><div class="crosslinks"><p class="noindent">[<a <!--l. 129--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse6.html" >next</a>] [<a href="userhtmlse6.html" >next</a>] [<a
href="userhtmlse1.html" >prev</a>] [<a href="userhtmlse1.html" >prev</a>] [<a
href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a
@ -18,7 +18,7 @@ href="#tailuserhtmlse2.html">tail</a>] [<a
href="userhtml.html#userhtmlse2.html" >up</a>] </p></div> href="userhtml.html#userhtmlse2.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">2 </span> <a <h3 class="sectionHead"><span class="titlemark">2 </span> <a
id="x5-40002"></a>General overview</h3> id="x5-40002"></a>General overview</h3>
<!--l. 132--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for <!--l. 131--><p class="noindent" >The PSBLAS library is designed to handle the implementation of iterative solvers for
sparse linear systems on distributed memory parallel computers. The system sparse linear systems on distributed memory parallel computers. The system
coefficient matrix <span coefficient matrix <span
class="zplmr7m-">A </span>must be square; it may be real or complex, nonsymmetric, and class="zplmr7m-">A </span>must be square; it may be real or complex, nonsymmetric, and
@ -40,7 +40,7 @@ directly with MPI; however, in some cases, MPI routines are used directly
to improve efficiency. For further details on our communication layer see to improve efficiency. For further details on our communication layer see
Sec.&#x00A0;<a Sec.&#x00A0;<a
href="userhtmlse7.html#x13-1060007">7<!--tex4ht:ref: sec:parenv --></a>. href="userhtmlse7.html#x13-1060007">7<!--tex4ht:ref: sec:parenv --></a>.
<!--l. 159--><p class="indent" > <hr class="figure"><div class="figure" <!--l. 158--><p class="indent" > <hr class="figure"><div class="figure"
> >
@ -52,8 +52,8 @@ href="userhtmlse7.html#x13-1060007">7<!--tex4ht:ref: sec:parenv --></a>.
<div class="center" <div class="center"
> >
<!--l. 160--><p class="noindent" > <!--l. 159--><p class="noindent" >
<!--l. 162--><p class="noindent" ><img <!--l. 161--><p class="noindent" ><img
src="psblas.png" alt="PIC" src="psblas.png" alt="PIC"
width="46" height="46" ></div> width="46" height="46" ></div>
<br /> <div class="caption" <br /> <div class="caption"
@ -62,8 +62,8 @@ class="content">PSBLAS library components hierarchy.</span></div><!--tex4ht:labe
<!--l. 168--><p class="indent" > </div><hr class="endfigure"> <!--l. 167--><p class="indent" > </div><hr class="endfigure">
<!--l. 171--><p class="indent" > The type of linear system matrices that we address typically arise in <!--l. 170--><p class="indent" > The type of linear system matrices that we address typically arise in
the numerical solution of PDEs; in such a context, it is necessary to pay the numerical solution of PDEs; in such a context, it is necessary to pay
special attention to the structure of the problem from which the application special attention to the structure of the problem from which the application
originates. The nonzero pattern of a matrix arising from the discretization of a originates. The nonzero pattern of a matrix arising from the discretization of a
@ -71,7 +71,7 @@ PDE is influenced by various factors, such as the shape of the domain, the
discretization strategy, and the equation/unknown ordering. The matrix itself can be discretization strategy, and the equation/unknown ordering. The matrix itself can be
interpreted as the adjacency matrix of the graph associated with the discretization interpreted as the adjacency matrix of the graph associated with the discretization
mesh. mesh.
<!--l. 182--><p class="indent" > The distribution of the coefficient matrix for the linear system is based on the <!--l. 181--><p class="indent" > The distribution of the coefficient matrix for the linear system is based on the
&#8220;owner computes&#8221; rule: the variable associated to each mesh point is assigned to a &#8220;owner computes&#8221; rule: the variable associated to each mesh point is assigned to a
process that will own the corresponding row in the coefficient matrix and will process that will own the corresponding row in the coefficient matrix and will
carry out all related computations. This allocation strategy is equivalent to a carry out all related computations. This allocation strategy is equivalent to a
@ -88,7 +88,7 @@ the literature, e.g. METIS&#x00A0;<span class="cite">[<a
href="userhtmlli3.html#XMETIS">14</a>]</span>. Dense vectors conform to sparse matrices, href="userhtmlli3.html#XMETIS">14</a>]</span>. Dense vectors conform to sparse matrices,
that is, the entries of a vector follow the same distribution of the matrix that is, the entries of a vector follow the same distribution of the matrix
rows. rows.
<!--l. 204--><p class="indent" > We assume that the sparse matrix is built in parallel, where each process generates <!--l. 203--><p class="indent" > We assume that the sparse matrix is built in parallel, where each process generates
its own portion. We never require that the entire matrix be available on a single its own portion. We never require that the entire matrix be available on a single
node. However, it is possible to hold the entire matrix in one process and distribute it node. However, it is possible to hold the entire matrix in one process and distribute it
explicitly<span class="footnote-mark"><a explicitly<span class="footnote-mark"><a
@ -98,10 +98,10 @@ even though the resulting memory bottleneck would make this option unattractive
in most cases. in most cases.
<h4 class="subsectionHead"><span class="titlemark">2.1 </span> <a <h4 class="subsectionHead"><span class="titlemark">2.1 </span> <a
id="x5-50002.1"></a>Basic Nomenclature</h4> id="x5-50002.1"></a>Basic Nomenclature</h4>
<!--l. 216--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed <!--l. 215--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
memory machine is guided by the structure of the physical model, and specifically memory machine is guided by the structure of the physical model, and specifically
by the discretization mesh of the PDE. by the discretization mesh of the PDE.
<!--l. 221--><p class="indent" > Each point of the discretization mesh will have (at least) one associated <!--l. 220--><p class="indent" > Each point of the discretization mesh will have (at least) one associated
equation/variable, and therefore one index. We say that point <span equation/variable, and therefore one index. We say that point <span
class="zplmr7m-">i </span><span class="zplmr7m-">i </span><span
class="pplri7t-">depends </span>on point <span class="pplri7t-">depends </span>on point <span
@ -117,11 +117,11 @@ class="pplri7t-">sub-domains </span>assigned
to the parallel processes, we classify the points of a given sub-domain as to the parallel processes, we classify the points of a given sub-domain as
following. following.
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 230--><p class="noindent" > <!--l. 229--><p class="noindent" >
<span <span
class="pplb7t-">Internal.</span> </dt><dd class="pplb7t-">Internal.</span> </dt><dd
class="description"> class="description">
<!--l. 230--><p class="noindent" >An internal point of a given domain <span <!--l. 229--><p class="noindent" >An internal point of a given domain <span
class="pplri7t-">depends </span>only on points of the same class="pplri7t-">depends </span>only on points of the same
domain. If all points of a domain are assigned to one process, then domain. If all points of a domain are assigned to one process, then
a computational step (e.g., a matrix-vector product) of the equations a computational step (e.g., a matrix-vector product) of the equations
@ -131,19 +131,19 @@ class="pplri7t-">depends </span>only on points of the same
associated with the internal points requires no data items from other associated with the internal points requires no data items from other
domains and no communications. domains and no communications.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 239--><p class="noindent" > <!--l. 238--><p class="noindent" >
<span <span
class="pplb7t-">Boundary.</span> </dt><dd class="pplb7t-">Boundary.</span> </dt><dd
class="description"> class="description">
<!--l. 239--><p class="noindent" >A point of a given domain is a boundary point if it <span <!--l. 238--><p class="noindent" >A point of a given domain is a boundary point if it <span
class="pplri7t-">depends </span>on points class="pplri7t-">depends </span>on points
belonging to other domains. belonging to other domains.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 243--><p class="noindent" > <!--l. 242--><p class="noindent" >
<span <span
class="pplb7t-">Halo.</span> </dt><dd class="pplb7t-">Halo.</span> </dt><dd
class="description"> class="description">
<!--l. 243--><p class="noindent" >A halo point for a given domain is a point belonging to another domain <!--l. 242--><p class="noindent" >A halo point for a given domain is a point belonging to another domain
such that there is a boundary point which <span such that there is a boundary point which <span
class="pplri7t-">depends </span>on it. Whenever performing class="pplri7t-">depends </span>on it. Whenever performing
a computational step, such as a matrix-vector product, the values associated a computational step, such as a matrix-vector product, the values associated
@ -151,22 +151,22 @@ class="pplri7t-">depends </span>on it. Whenever performing
a given domain is usually a halo point for some other domain<span class="footnote-mark"><a a given domain is usually a halo point for some other domain<span class="footnote-mark"><a
href="userhtml7.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a href="userhtml7.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a
id="x5-5001f2"></a> ; id="x5-5001f2"></a> ;
therefore the cardinality of the boundary points set denotes the amount therefore the cardinality of the boundary points set determines the amount
of data sent to other domains. of data sent to other domains.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 256--><p class="noindent" > <!--l. 255--><p class="noindent" >
<span <span
class="pplb7t-">Overlap.</span> </dt><dd class="pplb7t-">Overlap.</span> </dt><dd
class="description"> class="description">
<!--l. 256--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any <!--l. 255--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any
operation that involves an overlap point has to be replicated for each operation that involves an overlap point has to be replicated for each
assignment.</dd></dl> assignment.</dd></dl>
<!--l. 260--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a <!--l. 259--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
feature of Domain Decomposition Schwarz preconditioners which are the subject of feature of Domain Decomposition Schwarz preconditioners which are the subject of
related research work&#x00A0;<span class="cite">[<a related research work&#x00A0;<span class="cite">[<a
href="userhtmlli3.html#X2007c">4</a>,&#x00A0;<a href="userhtmlli3.html#X2007c">4</a>,&#x00A0;<a
href="userhtmlli3.html#X2007d">3</a>]</span>. href="userhtmlli3.html#X2007d">3</a>]</span>.
<!--l. 265--><p class="indent" > We denote the sets of internal, boundary and halo points for a given subdomain <!--l. 264--><p class="indent" > We denote the sets of internal, boundary and halo points for a given subdomain
by <span by <span
class="zplmr7y-"><img class="zplmr7y-"><img
src="zplmr7y-49.png" alt="I" class="x-x-49" /></span>, <span src="zplmr7y-49.png" alt="I" class="x-x-49" /></span>, <span
@ -203,7 +203,7 @@ class="zplmr7y-">|<img
src="zplmr7y-48.png" alt="H" class="x-x-48" /></span><sub><span src="zplmr7y-48.png" alt="H" class="x-x-48" /></span><sub><span
class="zplmr7m-x-x-76">i</span></sub><span class="zplmr7m-x-x-76">i</span></sub><span
class="zplmr7y-">|</span>. class="zplmr7y-">|</span>.
<!--l. 275--><p class="indent" > <hr class="figure"><div class="figure" <!--l. 274--><p class="indent" > <hr class="figure"><div class="figure"
> >
@ -215,8 +215,8 @@ class="zplmr7y-">|</span>.
<div class="center" <div class="center"
> >
<!--l. 276--><p class="noindent" > <!--l. 275--><p class="noindent" >
<!--l. 279--><p class="noindent" ><img <!--l. 278--><p class="noindent" ><img
src="points.png" alt="PIC" src="points.png" alt="PIC"
width="46" height="46" ></div> width="46" height="46" ></div>
<br /> <div class="caption" <br /> <div class="caption"
@ -225,113 +225,113 @@ class="content">Point classfication.</span></div><!--tex4ht:label?: x5-5003r2 --
<!--l. 285--><p class="indent" > </div><hr class="endfigure"> <!--l. 284--><p class="indent" > </div><hr class="endfigure">
<!--l. 287--><p class="indent" > This classification of mesh points guides the naming scheme that we adopted in <!--l. 286--><p class="indent" > This classification of mesh points guides the naming scheme that we adopted in
the library internals and in the data structures. We explicitly note that &#8220;Halo&#8221; points the library internals and in the data structures. We explicitly note that &#8220;Halo&#8221; points
are also often called &#8220;ghost&#8221; points in the literature. are also often called &#8220;ghost&#8221; points in the literature.
<h4 class="subsectionHead"><span class="titlemark">2.2 </span> <a <h4 class="subsectionHead"><span class="titlemark">2.2 </span> <a
id="x5-60002.2"></a>Library contents</h4> id="x5-60002.2"></a>Library contents</h4>
<!--l. 296--><p class="noindent" >The PSBLAS library consists of various classes of subroutines: <!--l. 295--><p class="noindent" >The PSBLAS library consists of various classes of subroutines:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 298--><p class="noindent" > <!--l. 297--><p class="noindent" >
<span <span
class="pplb7t-">Computational routines</span> </dt><dd class="pplb7t-">Computational routines</span> </dt><dd
class="description"> class="description">
<!--l. 298--><p class="noindent" >comprising: <!--l. 297--><p class="noindent" >comprising:
<ul class="itemize1"> <ul class="itemize1">
<li class="itemize"> <li class="itemize">
<!--l. 300--><p class="noindent" >Sparse matrix by dense matrix product; <!--l. 299--><p class="noindent" >Sparse matrix by dense matrix product;
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 301--><p class="noindent" >Sparse triangular systems solution for block diagonal matrices; <!--l. 300--><p class="noindent" >Sparse triangular systems solution for block diagonal matrices;
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 303--><p class="noindent" >Vector and matrix norms; <!--l. 302--><p class="noindent" >Vector and matrix norms;
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 304--><p class="noindent" >Dense matrix sums; <!--l. 303--><p class="noindent" >Dense matrix sums;
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 305--><p class="noindent" >Dot products.</li></ul> <!--l. 304--><p class="noindent" >Dot products.</li></ul>
</dd><dt class="description"> </dd><dt class="description">
<!--l. 307--><p class="noindent" > <!--l. 306--><p class="noindent" >
<span <span
class="pplb7t-">Communication routines</span> </dt><dd class="pplb7t-">Communication routines</span> </dt><dd
class="description"> class="description">
<!--l. 307--><p class="noindent" >handling halo and overlap communications; <!--l. 306--><p class="noindent" >handling halo and overlap communications;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 309--><p class="noindent" > <!--l. 308--><p class="noindent" >
<span <span
class="pplb7t-">Data management and auxiliary routines</span> </dt><dd class="pplb7t-">Data management and auxiliary routines</span> </dt><dd
class="description"> class="description">
<!--l. 309--><p class="noindent" >including: <!--l. 308--><p class="noindent" >including:
<ul class="itemize1"> <ul class="itemize1">
<li class="itemize"> <li class="itemize">
<!--l. 311--><p class="noindent" >Parallel environment management <!--l. 310--><p class="noindent" >Parallel environment management
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 312--><p class="noindent" >Communication descriptors allocation; <!--l. 311--><p class="noindent" >Communication descriptors allocation;
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 313--><p class="noindent" >Dense and sparse matrix allocation; <!--l. 312--><p class="noindent" >Dense and sparse matrix allocation;
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 314--><p class="noindent" >Dense and sparse matrix build and update; <!--l. 313--><p class="noindent" >Dense and sparse matrix build and update;
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 315--><p class="noindent" >Sparse matrix and data distribution preprocessing.</li></ul> <!--l. 314--><p class="noindent" >Sparse matrix and data distribution preprocessing.</li></ul>
</dd><dt class="description"> </dd><dt class="description">
<!--l. 317--><p class="noindent" > <!--l. 316--><p class="noindent" >
<span <span
class="pplb7t-">Preconditioner routines</span> </dt><dd class="pplb7t-">Preconditioner routines</span> </dt><dd
class="description"> class="description">
<!--l. 317--><p class="noindent" > <!--l. 316--><p class="noindent" >
</dd><dt class="description"> </dd><dt class="description">
<!--l. 318--><p class="noindent" > <!--l. 317--><p class="noindent" >
<span <span
class="pplb7t-">Iterative methods</span> </dt><dd class="pplb7t-">Iterative methods</span> </dt><dd
class="description"> class="description">
<!--l. 318--><p class="noindent" >a subset of classical and Krylov subspace iterative methods</dd></dl> <!--l. 317--><p class="noindent" >a subset of classical and Krylov subspace iterative methods</dd></dl>
<!--l. 321--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined <!--l. 320--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined
in the PSBLAS software package: in the PSBLAS software package:
<ul class="itemize1"> <ul class="itemize1">
<li class="itemize"> <li class="itemize">
<!--l. 324--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span <!--l. 323--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_</span></span></span> class="cmtt-10">psb_</span></span></span>
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 326--><p class="noindent" >all data type names are suffixed by <span class="obeylines-h"><span class="verb"><span <!--l. 325--><p class="noindent" >all data type names are suffixed by <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">_type</span></span></span> class="cmtt-10">_type</span></span></span>
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 327--><p class="noindent" >all constants are suffixed by <span class="obeylines-h"><span class="verb"><span <!--l. 326--><p class="noindent" >all constants are suffixed by <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">_</span></span></span> class="cmtt-10">_</span></span></span>
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 328--><p class="noindent" >all top-level subroutine names follow the rule <span class="obeylines-h"><span class="verb"><span <!--l. 327--><p class="noindent" >all top-level subroutine names follow the rule <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_xxname</span></span></span> where <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_xxname</span></span></span> where <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">xx</span></span></span> can be class="cmtt-10">xx</span></span></span> can be
either: either:
<ul class="itemize2"> <ul class="itemize2">
<li class="itemize"> <li class="itemize">
<!--l. 331--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span <!--l. 330--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ge</span></span></span>: the routine is related to dense data, class="cmtt-10">ge</span></span></span>: the routine is related to dense data,
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 332--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span <!--l. 331--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">sp</span></span></span>: the routine is related to sparse data, class="cmtt-10">sp</span></span></span>: the routine is related to sparse data,
</li> </li>
<li class="itemize"> <li class="itemize">
<!--l. 333--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span <!--l. 332--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">cd</span></span></span>: the routine is related to communication descriptor (see&#x00A0;<a class="cmtt-10">cd</span></span></span>: the routine is related to communication descriptor (see&#x00A0;<a
href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a>).</li></ul> href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a>).</li></ul>
<!--l. 336--><p class="noindent" >For example the <span class="obeylines-h"><span class="verb"><span <!--l. 335--><p class="noindent" >For example the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span>, <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_geins</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins</span></span></span> perform the same class="cmtt-10">psb_cdins</span></span></span> perform the same
@ -339,33 +339,33 @@ class="cmtt-10">psb_cdins</span></span></span> perform the same
href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>) on dense matrices, sparse matrices and communication href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>) on dense matrices, sparse matrices and communication
descriptors respectively. Interface overloading allows the usage of the same descriptors respectively. Interface overloading allows the usage of the same
subroutine names for both real and complex data.</li></ul> subroutine names for both real and complex data.</li></ul>
<!--l. 343--><p class="noindent" >In the description of the subroutines, arguments or argument entries are classified <!--l. 342--><p class="noindent" >In the description of the subroutines, arguments or argument entries are classified
as: as:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 346--><p class="noindent" > <!--l. 345--><p class="noindent" >
<span <span
class="pplb7t-">global</span> </dt><dd class="pplb7t-">global</span> </dt><dd
class="description"> class="description">
<!--l. 346--><p class="noindent" >For input arguments, the value must be the same on all processes <!--l. 345--><p class="noindent" >For input arguments, the value must be the same on all processes
participating in the subroutine call; for output arguments the value is participating in the subroutine call; for output arguments the value is
guaranteed to be the same. guaranteed to be the same.
</dd><dt class="description"> </dd><dt class="description">
<!--l. 349--><p class="noindent" > <!--l. 348--><p class="noindent" >
<span <span
class="pplb7t-">local</span> </dt><dd class="pplb7t-">local</span> </dt><dd
class="description"> class="description">
<!--l. 349--><p class="noindent" >Each process has its own value(s) independently.</dd></dl> <!--l. 348--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
<!--l. 351--><p class="noindent" >To finish our general description, we define a version string with the constant <!--l. 350--><p class="noindent" >To finish our general description, we define a version string with the constant
<div class="math-display" > <div class="math-display" >
<img <img
src="userhtml0x.png" alt="psb_version_string_ src="userhtml0x.png" alt="psb_version_string_
" class="math-display" ></div> " class="math-display" ></div>
<!--l. 353--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span <!--l. 352--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">3.9.0</span></span></span> class="cmtt-10">3.9.0</span></span></span>
<!--l. 356--><p class="noindent" > <!--l. 355--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">2.3 </span> <a <h4 class="subsectionHead"><span class="titlemark">2.3 </span> <a
id="x5-70002.3"></a>Application structure</h4> id="x5-70002.3"></a>Application structure</h4>
<!--l. 359--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are <!--l. 358--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are
created and exist with reference to a discretized space to which there corresponds created and exist with reference to a discretized space to which there corresponds
an index space and a matrix sparsity pattern. As an example, consider a an index space and a matrix sparsity pattern. As an example, consider a
cell-centered finite-volume discretization of the Navier-Stokes equations on a cell-centered finite-volume discretization of the Navier-Stokes equations on a
@ -375,13 +375,13 @@ class="zplmr7m-">n </span>is isomorphic to the set of cell centers,
whereas the pattern of the associated linear system matrix is isomorphic to the whereas the pattern of the associated linear system matrix is isomorphic to the
adjacency graph imposed on the discretization mesh by the discretization adjacency graph imposed on the discretization mesh by the discretization
stencil. stencil.
<!--l. 369--><p class="indent" > Thus the first order of business is to establish an index space, and this is done <!--l. 368--><p class="indent" > Thus the first order of business is to establish an index space, and this is done
with a call to <span class="obeylines-h"><span class="verb"><span with a call to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span> in which we specify the size of the index space <span class="cmtt-10">psb_cdall</span></span></span> in which we specify the size of the index space <span
class="zplmr7m-">n </span>and the class="zplmr7m-">n </span>and the
allocation of the elements of the index space to the various processes making up the allocation of the elements of the index space to the various processes making up the
MPI (virtual) parallel machine. MPI (virtual) parallel machine.
<!--l. 375--><p class="indent" > The index space is partitioned among processes, and this creates a mapping from <!--l. 374--><p class="indent" > The index space is partitioned among processes, and this creates a mapping from
the &#8220;global&#8221; numbering 1<span the &#8220;global&#8221; numbering 1<span
class="zplmr7m-">&#x2026;</span><span class="zplmr7m-">&#x2026;</span><span
class="zplmr7m-">n </span>to a numbering &#8220;local&#8221; to each process; each process <span class="zplmr7m-">n </span>to a numbering &#8220;local&#8221; to each process; each process <span
@ -400,7 +400,7 @@ numbering.
<!--l. 385--><p class="indent" > For a given index space 1<span <!--l. 384--><p class="indent" > For a given index space 1<span
class="zplmr7m-">&#x2026;</span><span class="zplmr7m-">&#x2026;</span><span
class="zplmr7m-">n </span>there are many possible associated topologies, i.e. class="zplmr7m-">n </span>there are many possible associated topologies, i.e.
many different discretization stencils; thus the description of the index space is not many different discretization stencils; thus the description of the index space is not
@ -430,44 +430,44 @@ class="zplmr7m-">A</span>, and thus they have to be fetched from (neighbouring)
processes. The descriptor of the index space is built exactly for the purpose processes. The descriptor of the index space is built exactly for the purpose
of properly sequencing the communication steps required to achieve this of properly sequencing the communication steps required to achieve this
objective. objective.
<!--l. 401--><p class="indent" > A simple application structure will walk through the index space allocation, <!--l. 400--><p class="indent" > A simple application structure will walk through the index space allocation,
matrix/vector creation and linear system solution as follows: matrix/vector creation and linear system solution as follows:
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x5-7002x1"> class="enumerate" id="x5-7002x1">
<!--l. 405--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span <!--l. 404--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span>; class="cmtt-10">psb_init</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x5-7004x2"> class="enumerate" id="x5-7004x2">
<!--l. 406--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span <!--l. 405--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span>; class="cmtt-10">psb_cdall</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x5-7006x3"> class="enumerate" id="x5-7006x3">
<!--l. 407--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span <!--l. 406--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geall</span></span></span>; class="cmtt-10">psb_geall</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x5-7008x4"> class="enumerate" id="x5-7008x4">
<!--l. 409--><p class="noindent" >Loop over all local rows, generate matrix and vector entries, and insert <!--l. 408--><p class="noindent" >Loop over all local rows, generate matrix and vector entries, and insert
them with <span class="obeylines-h"><span class="verb"><span them with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span> class="cmtt-10">psb_geins</span></span></span>
</li> </li>
<li <li
class="enumerate" id="x5-7010x5"> class="enumerate" id="x5-7010x5">
<!--l. 411--><p class="noindent" >Assemble the various entities: <!--l. 410--><p class="noindent" >Assemble the various entities:
<ol class="enumerate2" > <ol class="enumerate2" >
<li <li
class="enumerate" id="x5-7012x1"> class="enumerate" id="x5-7012x1">
<!--l. 413--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span <!--l. 412--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>, class="cmtt-10">psb_cdasb</span></span></span>,
</li> </li>
<li <li
class="enumerate" id="x5-7014x2"> class="enumerate" id="x5-7014x2">
<!--l. 414--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span <!--l. 413--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span>, class="cmtt-10">psb_spasb</span></span></span>,
@ -475,12 +475,12 @@ class="cmtt-10">psb_spasb</span></span></span>,
</li> </li>
<li <li
class="enumerate" id="x5-7016x3"> class="enumerate" id="x5-7016x3">
<!--l. 415--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span <!--l. 414--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geasb</span></span></span>;</li></ol> class="cmtt-10">psb_geasb</span></span></span>;</li></ol>
</li> </li>
<li <li
class="enumerate" id="x5-7018x6"> class="enumerate" id="x5-7018x6">
<!--l. 417--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span <!--l. 416--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%set</span></span></span>, and build it with class="cmtt-10">prec%set</span></span></span>, and build it with
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
@ -490,39 +490,39 @@ href="userhtml8.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a
</li> </li>
<li <li
class="enumerate" id="x5-7022x7"> class="enumerate" id="x5-7022x7">
<!--l. 422--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span <!--l. 421--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov</span></span></span> class="cmtt-10">psb_krylov</span></span></span>
with <span class="obeylines-h"><span class="verb"><span with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bicgstab</span></span></span>.</li></ol> class="cmtt-10">bicgstab</span></span></span>.</li></ol>
<!--l. 425--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span <!--l. 424--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">test/pargen/</span></span></span>. class="cmtt-10">test/pargen/</span></span></span>.
<!--l. 428--><p class="indent" > For a simulation in which the same discretization mesh is used over multiple <!--l. 427--><p class="indent" > For a simulation in which the same discretization mesh is used over multiple
time steps, the following structure may be more appropriate: time steps, the following structure may be more appropriate:
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x5-7024x1"> class="enumerate" id="x5-7024x1">
<!--l. 431--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span <!--l. 430--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span> class="cmtt-10">psb_init</span></span></span>
</li> </li>
<li <li
class="enumerate" id="x5-7026x2"> class="enumerate" id="x5-7026x2">
<!--l. 432--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span <!--l. 431--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span> class="cmtt-10">psb_cdall</span></span></span>
</li> </li>
<li <li
class="enumerate" id="x5-7028x3"> class="enumerate" id="x5-7028x3">
<!--l. 433--><p class="noindent" >Loop over the topology of the discretization mesh and build the <!--l. 432--><p class="noindent" >Loop over the topology of the discretization mesh and build the
descriptor with <span class="obeylines-h"><span class="verb"><span descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins</span></span></span>; class="cmtt-10">psb_cdins</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x5-7030x4"> class="enumerate" id="x5-7030x4">
<!--l. 435--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span <!--l. 434--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>; class="cmtt-10">psb_cdasb</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x5-7032x5"> class="enumerate" id="x5-7032x5">
<!--l. 436--><p class="noindent" >Allocate the sparse matrices and dense vectors with; <span class="obeylines-h"><span class="verb"><span <!--l. 435--><p class="noindent" >Allocate the sparse matrices and dense vectors with; <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spall</span></span></span> and class="cmtt-10">psb_spall</span></span></span> and
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geall</span></span></span>; class="cmtt-10">psb_geall</span></span></span>;
@ -532,34 +532,34 @@ class="cmtt-10">psb_geall</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x5-7034x6"> class="enumerate" id="x5-7034x6">
<!--l. 438--><p class="noindent" >Loop over the time steps: <!--l. 437--><p class="noindent" >Loop over the time steps:
<ol class="enumerate2" > <ol class="enumerate2" >
<li <li
class="enumerate" id="x5-7036x1"> class="enumerate" id="x5-7036x1">
<!--l. 440--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span <!--l. 439--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_sprn</span></span></span>; class="cmtt-10">psb_sprn</span></span></span>;
also zero out the dense vectors; also zero out the dense vectors;
</li> </li>
<li <li
class="enumerate" id="x5-7038x2"> class="enumerate" id="x5-7038x2">
<!--l. 443--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update <!--l. 442--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update
them with <span class="obeylines-h"><span class="verb"><span them with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span>; class="cmtt-10">psb_geins</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x5-7040x3"> class="enumerate" id="x5-7040x3">
<!--l. 445--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span <!--l. 444--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geasb</span></span></span>; class="cmtt-10">psb_geasb</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x5-7042x4"> class="enumerate" id="x5-7042x4">
<!--l. 446--><p class="noindent" > <!--l. 445--><p class="noindent" >
</li> </li>
<li <li
class="enumerate" id="x5-7044x5"> class="enumerate" id="x5-7044x5">
<!--l. 446--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span <!--l. 445--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%init</span></span></span> and class="cmtt-10">prec%init</span></span></span> and
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%set</span></span></span>, and build it with <span class="obeylines-h"><span class="verb"><span class="cmtt-10">prec%set</span></span></span>, and build it with <span class="obeylines-h"><span class="verb"><span
@ -567,21 +567,21 @@ class="cmtt-10">prec%build</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x5-7046x6"> class="enumerate" id="x5-7046x6">
<!--l. 449--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g. <!--l. 448--><p class="noindent" >Call one of the iterative drivers with the method of choice, e.g.
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov</span></span></span> with <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_krylov</span></span></span> with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bicgstab</span></span></span>.</li></ol> class="cmtt-10">bicgstab</span></span></span>.</li></ol>
</li></ol> </li></ol>
<!--l. 453--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be <!--l. 452--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
called on the data that is actually allocated to the current process, i.e. each process called on the data that is actually allocated to the current process, i.e. each process
generates its own data. generates its own data.
<!--l. 458--><p class="indent" > In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span <!--l. 457--><p class="indent" > In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>, nor is there a class="cmtt-10">psb_spins</span></span></span>, nor is there a
requirement to build a matrix row in its entirety before calling the routine; this requirement to build a matrix row in its entirety before calling the routine; this
allows the application programmer to walk through the discretization mesh element allows the application programmer to walk through the discretization mesh element
by element, generating the main part of a given matrix row but also contributions to by element, generating the main part of a given matrix row but also contributions to
the rows corresponding to neighbouring elements. the rows corresponding to neighbouring elements.
<!--l. 465--><p class="indent" > From a functional point of view it is even possible to execute one call for each <!--l. 464--><p class="indent" > From a functional point of view it is even possible to execute one call for each
nonzero coefficient; however this would have a substantial computational nonzero coefficient; however this would have a substantial computational
overhead. It is therefore advisable to pack a certain amount of data into each overhead. It is therefore advisable to pack a certain amount of data into each
call to the insertion routine, say touching on a few tens of rows; the best call to the insertion routine, say touching on a few tens of rows; the best
@ -595,23 +595,23 @@ process and pass it in a single call to <span class="obeylines-h"><span class="v
class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a
doubling of memory occupation, and thus would be almost always far from doubling of memory occupation, and thus would be almost always far from
optimal. optimal.
<!--l. 478--><p class="noindent" > <!--l. 477--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">2.3.1 </span> <a <h5 class="subsubsectionHead"><span class="titlemark">2.3.1 </span> <a
id="x5-80002.3.1"></a>User-defined index mappings</h5> id="x5-80002.3.1"></a>User-defined index mappings</h5>
<!--l. 480--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the <!--l. 479--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
constraints outlined in sec.&#x00A0;<a constraints outlined in sec.&#x00A0;<a
href="#x5-70002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>: href="#x5-70002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>:
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x5-8002x1"> class="enumerate" id="x5-8002x1">
<!--l. 483--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span <!--l. 482--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span
class="zplmr7m-">&#x2026;</span><span class="zplmr7m-">&#x2026;</span><span
class="zplmr7m-">n</span><sub>row<sub><span class="zplmr7m-">n</span><sub>row<sub><span
class="zplmr7m-x-x-60">i</span></sub></sub>; class="zplmr7m-x-x-60">i</span></sub></sub>;
</li> </li>
<li <li
class="enumerate" id="x5-8004x2"> class="enumerate" id="x5-8004x2">
<!--l. 485--><p class="noindent" >The set of halo points must be mapped to the set <span <!--l. 484--><p class="noindent" >The set of halo points must be mapped to the set <span
class="zplmr7m-">n</span><sub>row<sub><span class="zplmr7m-">n</span><sub>row<sub><span
class="zplmr7m-x-x-60">i</span></sub></sub> <span class="zplmr7m-x-x-60">i</span></sub></sub> <span
class="zplmr7t-">+ </span>1<span class="zplmr7t-">+ </span>1<span
@ -619,14 +619,14 @@ class="zplmr7m-">&#x2026;</span><span
class="zplmr7m-">n</span><sub>col<sub> class="zplmr7m-">n</span><sub>col<sub>
<span <span
class="zplmr7m-x-x-60">i</span></sub></sub>;</li></ol> class="zplmr7m-x-x-60">i</span></sub></sub>;</li></ol>
<!--l. 488--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure <!--l. 487--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
consistency of this mapping; some errors may be caught by the library, but consistency of this mapping; some errors may be caught by the library, but
this is not guaranteed. The application structure to support this usage is as this is not guaranteed. The application structure to support this usage is as
follows: follows:
<ol class="enumerate1" > <ol class="enumerate1" >
<li <li
class="enumerate" id="x5-8006x1"> class="enumerate" id="x5-8006x1">
<!--l. 494--><p class="noindent" >Initialize index <!--l. 493--><p class="noindent" >Initialize index
space with <span class="obeylines-h"><span class="verb"><span space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the
vectors <span class="obeylines-h"><span class="verb"><span vectors <span class="obeylines-h"><span class="verb"><span
@ -636,7 +636,7 @@ class="cmtt-10">lidx(:)</span></span></span> containing the corresponding local
</li> </li>
<li <li
class="enumerate" id="x5-8008x2"> class="enumerate" id="x5-8008x2">
<!--l. 499--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span <!--l. 498--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja(:)</span></span></span> and their associated local indices <span class="obeylines-h"><span class="verb"><span class="cmtt-10">ja(:)</span></span></span> and their associated local indices <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">lidx(:)</span></span></span> class="cmtt-10">lidx(:)</span></span></span>
with a(some) call(s) to <span class="obeylines-h"><span class="verb"><span with a(some) call(s) to <span class="obeylines-h"><span class="verb"><span
@ -644,7 +644,7 @@ class="cmtt-10">psb_cdins(nz,ja,desc,info,lidx=lidx)</span></span></span>;
</li> </li>
<li <li
class="enumerate" id="x5-8010x3"> class="enumerate" id="x5-8010x3">
<!--l. 502--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span <!--l. 501--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>; class="cmtt-10">psb_cdasb</span></span></span>;
</li> </li>
<li <li
@ -652,7 +652,7 @@ class="cmtt-10">psb_cdasb</span></span></span>;
<!--l. 503--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in <!--l. 502--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in
<span class="obeylines-h"><span class="verb"><span <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span> of the <span class="obeylines-h"><span class="verb"><span class="cmtt-10">psb_geins</span></span></span> of the <span class="obeylines-h"><span class="verb"><span
@ -661,19 +661,19 @@ class="cmtt-10">local</span></span></span> argument specifying that the
class="cmtt-10">ia</span></span></span>, <span class="obeylines-h"><span class="verb"><span class="cmtt-10">ia</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol> class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
<!--l. 510--><p class="noindent" > <!--l. 509--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">2.4 </span> <a <h4 class="subsectionHead"><span class="titlemark">2.4 </span> <a
id="x5-90002.4"></a>Programming model</h4> id="x5-90002.4"></a>Programming model</h4>
<!--l. 512--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD) <!--l. 511--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
programming model: each process participating in the computation performs the programming model: each process participating in the computation performs the
same actions on a chunk of data. Parallelism is thus data-driven. same actions on a chunk of data. Parallelism is thus data-driven.
<!--l. 517--><p class="indent" > Because of this structure, many subroutines coordinate their action across the <!--l. 516--><p class="indent" > Because of this structure, many subroutines coordinate their action across the
various processes, thus providing an implicit synchronization point, and therefore various processes, thus providing an implicit synchronization point, and therefore
<span <span
class="pplri7t-">must </span>be called simultaneously by all processes participating in the computation. This class="pplri7t-">must </span>be called simultaneously by all processes participating in the computation. This
is certainly true for the data allocation and assembly routines, for all the is certainly true for the data allocation and assembly routines, for all the
computational routines and for some of the tools routines. computational routines and for some of the tools routines.
<!--l. 525--><p class="indent" > However there are many cases where no synchronization, and indeed no <!--l. 524--><p class="indent" > However there are many cases where no synchronization, and indeed no
communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a
href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a> href="userhtmlse3.html#x9-100003">3<!--tex4ht:ref: sec:datastruct --></a>
are only acting on the local data structures, and thus may be called independently. are only acting on the local data structures, and thus may be called independently.
@ -681,21 +681,21 @@ The most important case is that of the coefficient insertion routines: since the
of coefficients in the sparse and dense matrices varies among the processors, and of coefficients in the sparse and dense matrices varies among the processors, and
since the user is free to choose an arbitrary order in builiding the matrix entries, since the user is free to choose an arbitrary order in builiding the matrix entries,
these routines cannot imply a synchronization. these routines cannot imply a synchronization.
<!--l. 535--><p class="indent" > Throughout this user&#8217;s guide each subroutine will be clearly indicated <!--l. 534--><p class="indent" > Throughout this user&#8217;s guide each subroutine will be clearly indicated
as: as:
<dl class="description"><dt class="description"> <dl class="description"><dt class="description">
<!--l. 538--><p class="noindent" > <!--l. 537--><p class="noindent" >
<span <span
class="pplb7t-">Synchronous:</span> </dt><dd class="pplb7t-">Synchronous:</span> </dt><dd
class="description"> class="description">
<!--l. 538--><p class="noindent" >must be called simultaneously by all the processes in the relevant <!--l. 537--><p class="noindent" >must be called simultaneously by all the processes in the relevant
communication context; communication context;
</dd><dt class="description"> </dd><dt class="description">
<!--l. 540--><p class="noindent" > <!--l. 539--><p class="noindent" >
<span <span
class="pplb7t-">Asynchronous:</span> </dt><dd class="pplb7t-">Asynchronous:</span> </dt><dd
class="description"> class="description">
<!--l. 540--><p class="noindent" >may be called in a totally independent manner.</dd></dl> <!--l. 539--><p class="noindent" >may be called in a totally independent manner.</dd></dl>

@ -21,7 +21,7 @@ href="userhtml.html#userhtmlse8.html" >up</a>] </p></div>
<!--l. 7--><p class="noindent" >The routines in this chapter implement various global communication operators on <!--l. 7--><p class="noindent" >The routines in this chapter implement various global communication operators on
vectors associated with a discretization mesh. For auxiliary communication routines vectors associated with a discretization mesh. For auxiliary communication routines
not tied to a discretization space see&#x00A0;<a not tied to a discretization space see&#x00A0;<a
href="userhtmlse6.html#x12-780006">6<!--tex4ht:ref: sec:toolsrout --></a>. href="userhtmlse7.html#x13-1060007">7<!--tex4ht:ref: sec:parenv --></a>.

File diff suppressed because one or more lines are too long

@ -6,7 +6,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
The routines in this chapter implement various global communication operators The routines in this chapter implement various global communication operators
on vectors associated with a discretization mesh. For auxiliary communication on vectors associated with a discretization mesh. For auxiliary communication
routines not tied to a discretization space see~\ref{sec:toolsrout}. routines not tied to a discretization space see~\ref{sec:parenv}.
\clearpage\subsection{psb\_halo --- Halo Data Communication} \clearpage\subsection{psb\_halo --- Halo Data Communication}

@ -28,8 +28,8 @@ accelerators through OpenMP as support from compilers improves.
The project is lead by Salvatore Filippone; a number of people have been contributing to this package over the The project is lead by Salvatore Filippone; a number of people have been contributing to this package over the
years; contributors in roughly reverse chronological order: years; contributors in roughly reverse chronological order:
\begin{obeylines} \begin{obeylines}
Luca Pepè Sciarria
Theophane Loloum Theophane Loloum
Fabio Durastante
Dimitri Walther Dimitri Walther
Andea Di Iorio Andea Di Iorio
Stefano Petrilli Stefano Petrilli
@ -37,7 +37,6 @@ years; contributors in roughly reverse chronological order:
Zaak Beekman Zaak Beekman
Ambra Abdullahi Hassan Ambra Abdullahi Hassan
Pasqua D'Ambra Pasqua D'Ambra
Alfredo Buttari
Daniela di Serafino Daniela di Serafino
Michele Martone Michele Martone
Michele Colajanni Michele Colajanni
@ -251,7 +250,7 @@ domain is usually a halo point for some other domain\footnote{This is
two variables is reciprocal. If the matrix pattern is non-symmetric two variables is reciprocal. If the matrix pattern is non-symmetric
we may have one-way interactions, and these could cause a situation we may have one-way interactions, and these could cause a situation
in which a boundary point is not a halo point for its neighbour.}; therefore in which a boundary point is not a halo point for its neighbour.}; therefore
the cardinality of the boundary points set denotes the amount of data the cardinality of the boundary points set determines the amount of data
sent to other domains. sent to other domains.
\item[Overlap.] An overlap point is a boundary point assigned to \item[Overlap.] An overlap point is a boundary point assigned to
multiple domains. Any operation that involves an overlap point multiple domains. Any operation that involves an overlap point

@ -136,7 +136,7 @@
by Salvatore Filippone\\ by Salvatore Filippone\\
Alfredo Buttari \\ Alfredo Buttari \\
Fabio Durastante}\\ Fabio Durastante}\\
Jun 1st, 2025 June 9th, 2025
\end{minipage}} \end{minipage}}
} }
%\addtolength{\textwidth}{\centeroffset} %\addtolength{\textwidth}{\centeroffset}

@ -106,7 +106,7 @@ Fabio Durastante } \\
%\today %\today
Software version: 3.9.0\\ Software version: 3.9.0\\
%\today %\today
Jun 1st, 2025 June 9th, 2025
\cleardoublepage \cleardoublepage
\begingroup \begingroup
\renewcommand*{\thepage}{toc} \renewcommand*{\thepage}{toc}

Loading…
Cancel
Save