diff --git a/docs/amg4psblas_1.0-guide.pdf b/docs/amg4psblas_1.0-guide.pdf index bf67aac6..33c53b30 100644 Binary files a/docs/amg4psblas_1.0-guide.pdf and b/docs/amg4psblas_1.0-guide.pdf differ diff --git a/docs/html/userhtml.css b/docs/html/userhtml.css index 2e1d7cd1..fe7f3289 100644 --- a/docs/html/userhtml.css +++ b/docs/html/userhtml.css @@ -13,32 +13,34 @@ .cmbx-12{font-size:109%; font-weight: bold;} .cmbx-12{ font-weight: bold;} .cmbx-12{ font-weight: bold;} -.cmtt-12{font-size:109%;font-family: monospace;} -.cmtt-12{font-family: monospace;} -.cmtt-12{font-family: monospace;} +.cmtt-12{font-size:109%;font-family: monospace,monospace;} +.cmtt-12{font-family: monospace,monospace;} +.cmtt-12{font-family: monospace,monospace;} .cmcsc-10x-x-120{font-size:109%;} .cmr-8{font-size:72%;} .cmmi-12{font-size:109%;font-style: italic;} .cmmi-8{font-size:72%;font-style: italic;} .cmsy-8{font-size:72%;} +.tctt-1200{font-size:109%;font-family: monospace,monospace;} .cmmi-10x-x-109{font-style: italic;} .cmsy-10x-x-109{} -.cmtt-10x-x-109{font-family: monospace;} -.cmtt-10x-x-109{font-family: monospace;} -.cmtt-10x-x-109{font-family: monospace;} +.cmtt-10x-x-109{font-family: monospace,monospace;} +.cmtt-10x-x-109{font-family: monospace,monospace;} +.cmtt-10x-x-109{font-family: monospace,monospace;} .cmcsc-10x-x-109{} -.cmtt-10{font-size:90%;font-family: monospace;} -.cmtt-10{font-family: monospace;} -.cmtt-10{font-family: monospace;} +.cmtt-10{font-size:90%;font-family: monospace,monospace;} +.cmtt-10{font-family: monospace,monospace;} +.cmtt-10{font-family: monospace,monospace;} .cmbx-10x-x-109{ font-weight: bold;} .cmbx-10x-x-109{ font-weight: bold;} .cmbx-10x-x-109{ font-weight: bold;} .cmcsc-10{font-size:90%;} .small-caps{font-variant: small-caps; } -p.noindent { text-indent: 0em } -td p.noindent { text-indent: 0em; margin-top:0em; } -p.nopar { text-indent: 0em; } -p.indent{ text-indent: 1.5em } +p{margin-top:0;margin-bottom:0} +p.indent{text-indent:0;} +p + p{margin-top:1em;} +p + div, p + pre {margin-top:1em;} +div + p, pre + p {margin-top:1em;} @media print {div.crosslinks {visibility:hidden;}} a img { border-top: 0; border-left: 0; border-right: 0; } center { margin-top:1em; margin-bottom:1em; } @@ -61,7 +63,7 @@ div.obeylines-v p { margin-top:0; margin-bottom:0; } td.displaylines {text-align:center; white-space:nowrap;} .centerline {text-align:center;} .rightline {text-align:right;} -div.verbatim {font-family: monospace; white-space: nowrap; text-align:left; clear:both; } +div.verbatim {font-family: monospace,monospace; white-space: nowrap; text-align:left; clear:both; } .fbox {padding-left:3.0pt; padding-right:3.0pt; text-indent:0pt; border:solid black 0.4pt; } div.fbox {display:table} div.center div.fbox {text-align:center; clear:both; padding-left:3.0pt; padding-right:3.0pt; text-indent:0pt; border:solid black 0.4pt; } @@ -94,18 +96,16 @@ td.td01{ padding-left:0pt; padding-right:5pt; } td.td10{ padding-left:5pt; padding-right:0pt; } td.td11{ padding-left:5pt; padding-right:5pt; } table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; } -.hline hr, .cline hr{ height : 1px; margin:0px; } +.hline hr, .cline hr{ height : 0px; margin:0px; } +.hline td, .cline td{ padding: 0; } +.hline hr, .cline hr{border:none;border-top:1px solid black;} .tabbing-right {text-align:right;} -span.TEX {letter-spacing: -0.125em; } -span.TEX span.E{ position:relative;top:0.5ex;left:-0.0417em;} -a span.TEX span.E {text-decoration: none; } -span.LATEX span.A{ position:relative; top:-0.5ex; left:-0.4em; font-size:85%;} -span.LATEX span.TEX{ position:relative; left: -0.4em; } div.float, div.figure {margin-left: auto; margin-right: auto;} div.float img {text-align:center;} div.figure img {text-align:center;} -.marginpar {width:20%; float:right; text-align:left; margin-left:auto; margin-top:0.5em; font-size:85%; text-decoration:underline;} -.marginpar p{margin-top:0.4em; margin-bottom:0.4em;} +.marginpar,.reversemarginpar {width:20%; float:right; text-align:left; margin-left:auto; margin-top:0.5em; font-size:85%; text-decoration:underline;} +.marginpar p,.reversemarginpar p{margin-top:0.4em; margin-bottom:0.4em;} +.reversemarginpar{float:left;} table.equation {width:100%;} .equation td{text-align:center; } td.equation { margin-top:1em; margin-bottom:1em; } @@ -148,10 +148,11 @@ div.abstract {width:100%;} .Ovalbox-thick { padding-left:3pt; padding-right:3pt; border:solid thick; } .shadowbox { padding-left:3pt; padding-right:3pt; border:solid thin; border-right:solid thick; border-bottom:solid thick; } .doublebox { padding-left:3pt; padding-right:3pt; border-style:double; border:solid thick; } +.rotatebox{display: inline-block;} .figure img.graphics {margin-left:10%;} .lstlisting .label{margin-right:0.5em; } -div.lstlisting{font-family: monospace; white-space: nowrap; margin-top:0.5em; margin-bottom:0.5em; } -div.lstinputlisting{ font-family: monospace; white-space: nowrap; } +div.lstlisting{font-family: monospace,monospace; white-space: nowrap; margin-top:0.5em; margin-bottom:0.5em; } +div.lstinputlisting{ font-family: monospace,monospace; white-space: nowrap; } .lstinputlisting .label{margin-right:0.5em;} #TBL-1 colgroup{border-left: 1px solid black;border-right:1px solid black;} #TBL-1{border-collapse:collapse;} @@ -185,9 +186,9 @@ div.lstinputlisting{ font-family: monospace; white-space: nowrap; } #TBL-5{border-collapse:collapse;} #TBL-5 colgroup{border-left: 1px solid black;border-right:1px solid black;} #TBL-5{border-collapse:collapse;} -td#TBL-5-10-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} -td#TBL-5-11-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} -td#TBL-5-12-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} +td#TBL-5-10-1{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} +td#TBL-5-11-1{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} +td#TBL-5-12-1{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} #TBL-6 colgroup{border-left: 1px solid black;border-right:1px solid black;} #TBL-6{border-collapse:collapse;} #TBL-6 colgroup{border-left: 1px solid black;border-right:1px solid black;} @@ -200,8 +201,8 @@ td#TBL-5-12-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} #TBL-6{border-collapse:collapse;} #TBL-6 colgroup{border-left: 1px solid black;border-right:1px solid black;} #TBL-6{border-collapse:collapse;} -td#TBL-6-5-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} -td#TBL-6-6-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} +td#TBL-6-5-1{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} +td#TBL-6-6-1{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} #TBL-7 colgroup{border-left: 1px solid black;border-right:1px solid black;} #TBL-7{border-collapse:collapse;} #TBL-7 colgroup{border-left: 1px solid black;border-right:1px solid black;} @@ -214,11 +215,11 @@ td#TBL-6-6-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} #TBL-7{border-collapse:collapse;} #TBL-7 colgroup{border-left: 1px solid black;border-right:1px solid black;} #TBL-7{border-collapse:collapse;} -td#TBL-7-5-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} -td#TBL-7-6-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} -td#TBL-7-7-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} -td#TBL-7-12-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} -td#TBL-7-13-5{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} +td#TBL-7-5-1{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} +td#TBL-7-6-1{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} +td#TBL-7-7-1{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} +td#TBL-7-12-1{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} +td#TBL-7-13-1{border-left:solid black 0.4pt;border-right:solid black 0.4pt;} #TBL-8 colgroup{border-left: 1px solid black;border-right:1px solid black;} #TBL-8{border-collapse:collapse;} #TBL-8 colgroup{border-left: 1px solid black;border-right:1px solid black;} diff --git a/docs/html/userhtml0x.png b/docs/html/userhtml0x.png index 96c5cb9b..f004430a 100644 Binary files a/docs/html/userhtml0x.png and b/docs/html/userhtml0x.png differ diff --git a/docs/html/userhtml1x.png b/docs/html/userhtml1x.png index 4c10c200..20f909a7 100644 Binary files a/docs/html/userhtml1x.png and b/docs/html/userhtml1x.png differ diff --git a/docs/html/userhtmlli1.html b/docs/html/userhtmlli1.html index aa1aacda..c8ce8f48 100644 --- a/docs/html/userhtmlli1.html +++ b/docs/html/userhtmlli1.html @@ -78,7 +78,7 @@ class="cmr-12">of a software development project started in 2007, named MLD2P4, implemented a multilevel version of some domain decomposition preconditioners of additive-Schwarz type and was based on a parallel decoupled version of the well known +class="cmr-12">additive-Schwarz type, and was based on a parallel decoupled version of the well known @@ -93,7 +93,7 @@ class="cmr-12">new algorithms and functionalities for the setup and application preconditioners with the final aims of improving efficiency and scalability when tens of thousands cores are used and of boosting reliability in dealing with general +class="cmr-12">thousands cores are used, and of boosting reliability in dealing with general symmetric positive definite linear systems. Due to the significant number  Pothen, Distributed-memory parallel algorithms for matching and coloring, in PCO11 New Trends in Parallel Computing and Optimization, +class="cmr-12">, in PCO’11 New Trends in Parallel Computing and Optimization, IEEE International Symposium on Parallel and Distributed Processing [11]   A. Buttari, P. D’Ambra, D. di Serafino, S. Filippone, 2LEV-D2P4: a + id="XDV2013">P. D’Ambra and P. S. Vassilevski, Adaptive AMG with coarsening based Package of High-Performance Preconditioners for Scientific and Engineering +class="cmti-12">on compatible weighted matching, Computing and Visualization in Science, Applications, Appl. Algebra Engrg. Comm. Comput., 18(3), 2007, 223–239. +class="cmr-12">16, (2013) 59–76.

[12]   P. D’Ambra, S. Filippone and P. S. Vassilevski, BootCMatch: a software + package for bootstrap AMG based on graph weighted matching, ACM + Transactions on Mathematical Software, 44, (2018) 39:1–39:25. + + + +

+

+ [13]   P., 2020, arXiv:2006.16147v3. - - -

[13][14]   )

[14][15]   SIAM Journal on Matrix Analysis and Applications, 20 (3), 1999, 7

[15][16]   Software, 16 (1) 1990, 1–17.

[16][17]   Transactions on Mathematical Software, 14 (1) 1988, 1–17.

[17][18]   .

[18][19]   . ACM Transactions on on Mathematical Software, 38 (4), 2012, art. 23. + + +

[19][20]   Library for Parallel Linear Algebra Computation on Sparse Matric class="cmr-12">, ACM Transactions on Mathematical Software, 26 (4), 2000, 527–550. - - -

[20][21]   2016, 23:501-518

[21][22]   , MIT Press, 1998.

[22][23]   Mathematical Software, 5 (3), 1979, 308–323.

[23][24]   ACM Transactions on Mathematical Software, 29 (2), 2003, 110̵

[24][25]   Numerical Linear Algebra with Applications, 15 (5), 2008, 473R

[25][26]   , 2nd edition, SIAM, 2003.

+ + +

[26][27]   , Cambridge University Press, 1996.

- - -

[27][28]   Press, 1998.

[28][29]    Oosterlee, Multigrid, Academic Press, 2001.

[29][30]   editor, Proceedings of SuperComputing 2000, Dallas, 2000.

[30][31]   Algebraic Multigrid by Smoothed class="cmti-12">Aggregation for Second and Fourth Order Elliptic Problems, Computing, 56 (3) 1996, 179–196. -

-

- [31]   P. D’Ambra and P. S. Vassilevski, Adaptive AMG with coarsening based - on compatible weighted matching, Computing and Visualization in Science, - 16, (2013) 59–76. -

-

- [32]   P. D’Ambra, S. Filippone and P. S. Vassilevski, BootCMatch: a software - package for bootstrap AMG based on graph weighted matching, ACM - Transactions on Mathematical Software, 44, (2018) 39:1–39:25.

+class="cmr-12">(3) 1996, 179–196.

diff --git a/docs/html/userhtmlse1.html b/docs/html/userhtmlse1.html index 5f8f1c19..5fed24a9 100644 --- a/docs/html/userhtmlse1.html +++ b/docs/html/userhtmlse1.html @@ -88,7 +88,7 @@ class="cmr-12">4, 2829]), to be used in the iterative solution of linear systems, 4, 2425]; they can be combined with Jacobi hybrid forward/backward +class="cmr-12">; they can be combined with Jacobi, hybrid forward/backwardGauss-Seidel, block-Jacobi, and additive Schwarz smoothers. The Jacobi, block-Jacobi +class="cmr-12">Gauss-Seidel, block-Jacobi and additive Schwarz smoothers with various versions ofand Gauss-Seidel smoothers are also available in the local incomplete factorizations and approximate inverses on the blocks. The +Jacobi, block-Jacobi and Gauss-Seidel smoothers are also available in the 1version. -

1 +version [13]. +

An algebraic approach is used to generate a hierarchy of coarse-level matrices and operators, without explicitly using any information on the geometry of the original @@ -150,19 +159,22 @@ class="cmr-12">,  3031], and already included in the previous versions of the package [116, 10]; + + +

  • a coupled, parallel implementation of the Coarsening based on Compatible @@ -171,33 +183,32 @@ class="cmr-12">Weighted Matching introduced in [3111, 3212] and described in detail in [1213];
  • - - - -

    Either exact or approximate solvers can be used on the coarsest-level system. We provide interfaces to various sparse LU factorizations from external packages, native incomplete +class="cmr-12">interfaces to various parallel and sequential sparse LU factorizations from external +packages, sequential native incomplete LU and approximate inverse factorizations, LU and approximate inverse factorizations, weighted Jacobi, hybrid Gauss-Seidel, +class="cmr-12">parallel weighted Jacobi, hybrid Gauss-Seidel, block-Jacobi solvers and calls to block-Jacobi solvers and a recursive call to preconditioned Krylov methods; all +class="cmr-12">preconditioned Krylov methods; all smoothers can be also exploited as one-level smoothers can be also exploited as one-level preconditioners. -

    preconditioners. +

    AMG4PSBLAS is written in Fortran 2003, following an object-oriented design Single and double precision implementations of AMG4PSBLAS are ava class="cmr-12">for both the real and the complex case, which can be used through a single interface. -

    AMG4PSBLAS has been designed to implement scalable and easy-to-use multilevel preconditioners in the context of the PSBLAS (Parallel Sparse BLAS) @@ -221,11 +232,11 @@ class="cmr-12">computational framework [1920, 1819]. PSBLAS provides basic linear algebra operators In the most recent version of PSBLAS (release 3.7), a plug-in for included; it includes CUDA versions of main vector operations and of sparse matrix-vector multiplication, so that Krylov methods coupled with AMG4PBLAS +class="cmr-12">matrix-vector multiplication, so that Krylov methods coupled with AMG4PSBLAS preconditioners relying on Jacobi and block-Jacobi smoothers with sparse approximate inverses on the blocks can be efficiently executed on cluster of GPUs. -

    AMG4PSBLAS has a layered and modular software architecture where three main layers can be identified. The lower layer consists of the PSBLAS kernels, the middle @@ -274,6 +285,9 @@ class="cmr-12">layers can be identified. The lower layer consists of the PSBLAS class="cmr-12">one implements the construction and application phases of the preconditioners, and the upper one provides a uniform interface to all the preconditioners. This architecture + + + allows for different levels of use of the package: few black-box routines at the upper layer allow all users to easily build and apply any preconditione class="cmr-12">AMG4PSBLAS; facilities are also available allowing expert users to extend the set of smoothers and solvers for building new versions of the preconditioners (see - - - Section 6). -

    This guide is organized as follows. General information on the distribution of the source code is reported in Sectionand multilevel (i.e., AMG) preconditioners with the Krylov solver class="cmr-12">PSBLAS [1718].

    type

    e

    string

    g

    der




    No preconditioner No preconditioner

    NONE

    Considered to use the PSBLAS Krylov solvers with no preconditioner.




    Diagonal Diagonal

    DIAGL1-JACOBI

    Diagonal preconditioner. For any zero diagonal entry of the matrix to be preconditioned, the corresponding entry @@ -308,7 +308,7 @@ of the preconditioner is set to 1.




    Gauss-Seidel Gauss-Seidel

    GSL1-GS

    Hybrid Gauss-Seidel (forward), that is, global block Jacobi with Gauss-Seidel as local solver.




    Symmetrized Gauss-SeidelSymmetrized Gauss-Seidel

    FBGSL1-FBGS

    Symmetrized hybrid Gauss-Seidel, that is, forward Gauss-Seidel followed by backward Gauss-Seidel.




    Block Jacobi Block Jacobi

    BJACL1-BJAC

    Block-Jacobi with ILU(0) on the local blocks.




    Additive Schwarz Additive Schwarz

    AS

    Additive Schwarz (AS), with overlap 1 and ILU(0) on the local blocks.




    Multilevel Multilevel

    ML

    V-cycle with one hybrid forward Gauss-Seidel (GS) sweep as pre-smoother and one hybrid backward diff --git a/docs/html/userhtmlse6.html b/docs/html/userhtmlse6.html index cf4b33c9..b4614460 100644 --- a/docs/html/userhtmlse6.html +++ b/docs/html/userhtmlse6.html @@ -170,33 +170,33 @@ cellspacing="0" cellpadding="0" >

    smoother

    smoother

    class(amg_x_base_smoother_type)

    The user-defined new smoother to be employed in the preconditioner.

    solver

    solver

    class(amg_x_base_solver_type)

    The user-defined new solver to be employed in the preconditioner.

    PSBLAS error handling routines; for further details see the PSBLA [1718]. diff --git a/docs/html/userhtmlse9.html b/docs/html/userhtmlse9.html index 220606fb..21676acb 100644 --- a/docs/html/userhtmlse9.html +++ b/docs/html/userhtmlse9.html @@ -91,7 +91,7 @@ class="cmr-12">Trolling, insulting or derogatory comments, and personal or polit class="cmr-12">Public or private harassment

  • Publishing others private information, such as a physical or email address, +class="cmr-12">Publishing others’ private information, such as a physical or email address, without their explicit permission
  • @@ -234,7 +234,7 @@ class="cmr-12">_of_conduct .html. Community Impact Guidelines were inspired by Mozillas code of conduct +class="cmr-12">. Community Impact Guidelines were inspired by Mozilla’s code of conduct enforcement ladder. For answers to common questions about this code of conduct, see BLAS
    [1516, 1617, 2223] Many vendors provide optimized versions of BLAS; if no MPI
    [2122, 2728] A version of MPI is available on most high-performance computing PSBLAS
    [1718, 1920] Parallel Sparse BLAS (PSBLAS) is available from

    a a

    type(in).

    The sparse matrix structure containing the local part of the matrix , single/double precision version of AMG4PSBLAS class="cmr-12">under use. See the PSBLAS User’s Guide for details [1718].

    desc_adesc_a

    type(in).

    The communication descriptor of a. See the PSBLAS User’s Guide class="cmr-12">for details [1718].

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details.

    amold amold

    class( optional.

    The desired dynamic type for internal matrix components; this allows e.g. running on GPUs; it needs not be the same class="cmr-12">processes. See the PSBLAS User’s Guide for details [1718].

    vmold vmold

    class( optional.

    The desired dynamic type for internal vector components; this allows e.g. running on GPUs.

    imold imold

    class( optional.

    The desired dynamic type for internal integer vector components; this allows e.g. running on GPUs. diff --git a/docs/html/userhtmlsu11.html b/docs/html/userhtmlsu11.html index 3dc932dd..6576be86 100644 --- a/docs/html/userhtmlsu11.html +++ b/docs/html/userhtmlsu11.html @@ -90,9 +90,9 @@ cellspacing="0" cellpadding="0" >

    a a

    type(in).

    The sparse matrix structure containing the local part of the matrix , single/double precision version of AMG4PSBLAS class="cmr-12">under use. See the PSBLAS User’s Guide for details [1718].

    desc_adesc_a

    type(in).

    The communication descriptor of a. See the PSBLAS User’s Guide class="cmr-12">for details [1718].

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details.

    amold amold

    class( optional.

    The desired dynamic type for internal matrix components; this allows e.g. running on GPUs; it needs not be the same class="cmr-12">processes. See the PSBLAS User’s Guide for details [1718].

    vmold vmold

    class( optional.

    The desired dynamic type for internal vector components; this allows e.g. running on GPUs.

    imold imold

    class( optional.

    The desired dynamic type for internal integer vector components; this allows e.g. running on GPUs.

    The method can be used to build multilevel preconditioners too. diff --git a/docs/html/userhtmlsu12.html b/docs/html/userhtmlsu12.html index 6643e608..2689d6c3 100644 --- a/docs/html/userhtmlsu12.html +++ b/docs/html/userhtmlsu12.html @@ -96,9 +96,9 @@ cellspacing="0" cellpadding="0" >

    x x

    type(kind_parameter), dimension(:), intent(in)—.

    The local part of the vector x, single/double precision version of AMG4PSBLAS under use.

    y y

    type(kind_parameter), dimension(:), intent(out)—.

    The local part of the vector y, single/double precision version of AMG4PSBLAS under use.

    desc_adesc_a

    type(in).

    The communication descriptor associated to the matrix to be preconditioned.

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details.

    trans trans

    character((in).

    If trans -1).

    work work

    type(kind_parameter), dimension(:), optional, target—.

    Workspace. Its size should be at least 4, single/double precision version of AMG4PSBLAS under use. diff --git a/docs/html/userhtmlsu13.html b/docs/html/userhtmlsu13.html index b73c6bc6..2a82b611 100644 --- a/docs/html/userhtmlsu13.html +++ b/docs/html/userhtmlsu13.html @@ -57,9 +57,9 @@ cellspacing="0" cellpadding="0" >

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section for details. diff --git a/docs/html/userhtmlsu14.html b/docs/html/userhtmlsu14.html index 87be8c82..054451ae 100644 --- a/docs/html/userhtmlsu14.html +++ b/docs/html/userhtmlsu14.html @@ -75,9 +75,9 @@ cellspacing="0" cellpadding="0" >

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details.

    iout iout

    integer, optional.

    The id of the file where the preconditioner description will be printed; the default is the standard output.

    root root

    integer, optional.

    The id of the process where the preconditioner description will be printed; the default is psb_root_.

    verbosity verbosity

    integer, optional.

    The verbosity level of the description. Default value is 0. For a distributed multilevel preconditioner the size of the coarsematrices on every process. diff --git a/docs/html/userhtmlsu15.html b/docs/html/userhtmlsu15.html index 24cf551a..0a318d17 100644 --- a/docs/html/userhtmlsu15.html +++ b/docs/html/userhtmlsu15.html @@ -77,9 +77,9 @@ cellspacing="0" cellpadding="0" >

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details.

    amold amold

    class( optional.

    The desired dynamic type for internal matrix components; this allows e.g. running on GPUs; it needs not be the same class="cmr-12">processes. See the PSBLAS User’s Guide for details [1718].

    pout pout

    type(out).

    The copy of the preconditioner data structure. Note that x , single/double precision version of AMG4PSBLAS under use.

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details.

    global global

    logical, optional.

    Whether the global or local preconditioner memory occupatio is false.. Return memory footprint in bytes. @@ -333,9 +333,9 @@ cellspacing="0" cellpadding="0" >

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details.

    vmold vmold

    class( optional.

    The desired dynamic type for internal vector components; this allows e.g. running on GPUs.

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details. diff --git a/docs/html/userhtmlsu2.html b/docs/html/userhtmlsu2.html index 2278f896..9e4552ba 100644 --- a/docs/html/userhtmlsu2.html +++ b/docs/html/userhtmlsu2.html @@ -44,7 +44,7 @@ class="cmbx-12">UMFPACK

    [1314] A sparse LU factorization package included in the SuiteSparse SuperLU
    [1415] A sparse LU factorization package available from _Dist
    [2324] A sparse LU factorization package available from the same  ‘--prefix=$HOME$HOME’. 
    program.

    The part of the code dealing with reading and assembling the sparse matrix and the right-hand side vector and the deallocation of the relevant data structured, performed +class="cmr-12">right-hand side vector and the deallocation of the relevant data structures, performed through the PSBLAS routines for sparse matrix and vector management, is not the use of the PSBLAS routines, see the PSBLAS User’s Guide class="cmr-12"> [1718].

    2 shows how to set a V-cycle preconditioner which applies 1 block-Jacobi sweep as pre- and +class="cmr-12">how to set a V-cycle preconditioner which applies 1 block-Jacobi sweep as pre- post-smoother, and solves the coarsest-level system with 8 block-Jacobi sweeps. Note +class="cmr-12">and post-smoother, and solves the coarsest-level system with 8 block-Jacobi that the ILU(0) factorization (plus triangular solve) is used as local solver for the +class="cmr-12">sweeps. Note that the ILU(0) factorization (plus triangular solve) is used as block-Jacobi sweeps, since this is the default associated with block-Jacobi +class="cmr-12">local solver for the block-Jacobi sweeps, since this is the default associated and set bywith block-Jacobi and set by P%init. Furthermore, specifying block-Jacobi as coarsest-level +class="cmr-12">. Furthermore, specifying block-Jacobi as solver implies that the coarsest-level matrix is distributed among the processes. +class="cmr-12">coarsest-level solver implies that the coarsest-level matrix is distributed among Figurethe processes. Figure 3 shows how to set a W-cycle preconditioner using the Coarsening +class="cmr-12">shows how to set a W-cycle preconditioner using the based on Compatible Weighted Matching. It applies 2 hybrid Gauss-Seidel +class="cmr-12">Coarsening based on Compatible Weighted Matching, aggregates of size at sweeps as pre- and post-smoother, and solves the coarsest-level system with the +class="cmr-12">most 8 and smoothed prolongators. It applies 2 hybrid Gauss-Seidel sweeps as multifrontal LU factorization implemented in MUMPS. It is specified that the +class="cmr-12">pre- and post-smoother, and solves the coarsest-level system with the parallel coarsest-level matrix is distributed, since MUMPS can be used on both replicated and +class="cmr-12">flexible Conjugate Gradient method (KRM) coupled with the block-Jacobi distributed matrices, and by default it is used on replicated ones. The code +class="cmr-12">preconditioner having ILU(0) on the blocks. Default parameters are used for stopping fragments shown in Figurescriterion of the coarsest solver. Note that, also in this case, specifying KRM as +coarsest-level solver implies that the coarsest-level matrix is distributed among the +processes. +

    The code fragments shown in Figures 2 and 3 are included in the example program file -amg_dexample_ml.f90 too. DA CORREGGERE NEL CODICE ESEMPIO +class="cmr-12">are included in the example program 3 -

    file amg_dexample_ml.f90 too. +

    Finally, Figure nonsymmetric. The corresponding example program is available in t amg_dexample_1lev.f90. -

    For all the previous preconditioners, example programs where the sparse matrix and the right-hand side are generated by discretizing a PDE with Dirichlet @@ -276,7 +280,7 @@ class="cmr-12">. -


    @@ -284,7 +288,7 @@ class="cmr-12">.
    -

    +

    ... ...  
    ! build a V-cycle preconditioner with 1 block-Jacobi sweep (with @@ -298,7 +302,7 @@ class="cmr-12">.  
      call P%smoothers_build(A,desc_A,info)  
    ... ...
    -

    +


    Listing 2: setup of a multilevel preconditioner based on the default decoupled coarsening
    @@ -309,7 +313,7 @@ class="content">setup of a multilevel preconditioner based on the default decoup -


    @@ -317,7 +321,7 @@ class="content">setup of a multilevel preconditioner based on the default decoup
    -

    +

    ... ...  
    ! build a W-cycle preconditioner with 2 hybrid Gauss-Seidel sweeps @@ -325,20 +329,21 @@ class="content">setup of a multilevel preconditioner based on the default decoup  
    ! matrix, and MUMPS as coarsest-level solver  
      call P%init(’ML’,info)  
      call P%set(’PAR_AGGR_ALG’,’COUPLED’,info) + 
    call P%set(’AGGR_TYPE’,’MATCHBOXP’,info) + 
    call P%set(’AGGR_SIZE’,8,info)  
      call P%set(’ML_CYCLE’,’WCYCLE’,info)  
      call P%set(’SMOOTHER_TYPE’,’FBGS’,info)  
      call P%set(’SMOOTHER_SWEEPS’,2,info) - 
      call P%set(’COARSE_SOLVE’,’MUMPS’,info) - 
      call P%set(’COARSE_MAT’,’DIST’,info) + 
      call P%set(’COARSE_SOLVE’,’KRM’,info)  
      call P%hierarchy_build(A,desc_A,info)  
      call P%smoothers_build(A,desc_A,info)  
    ... ...
    -

    +


    Listing 3: setup of a multilevel preconditioner based on the coupled coarsening based -on weighted matching
    +class="content">setup of a multilevel preconditioner based on the coupled coarsening using +weighted matching @@ -346,7 +351,7 @@ on weighted matching -


    @@ -354,7 +359,7 @@ on weighted matching
    -

    +

    ... ...  
    ! set RAS with overlap 2 and ILU(0) on the local blocks @@ -365,7 +370,7 @@ on weighted matching
     
    ! solve Ax=b with preconditioned BiCGSTAB  
      call psb_krylov(’BICGSTAB’,A,P,b,x,tol,desc_A,info)
    -

    +


    Listing 4: setup of a one-level Schwarz preconditioner.
    diff --git a/docs/html/userhtmlsu7.html b/docs/html/userhtmlsu7.html index 828ca2bd..55791dc3 100644 --- a/docs/html/userhtmlsu7.html +++ b/docs/html/userhtmlsu7.html @@ -54,9 +54,9 @@ cellspacing="0" cellpadding="0" >

    contxt contxt

    type(in).

    The communication context.

    ptype ptype

    character(in) .

    The type of preconditioner. Its values are specified in Table 1.

    Note that strings are case insensitive.

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details. diff --git a/docs/html/userhtmlsu8.html b/docs/html/userhtmlsu8.html index 7b85b5fb..d3f36cc0 100644 --- a/docs/html/userhtmlsu8.html +++ b/docs/html/userhtmlsu8.html @@ -79,9 +79,9 @@ cellspacing="0" cellpadding="0" >

    what what

    character(len=*).

    The parameter to be set. It can be specified through its name; the 8.

    val   

    integer or in).

    The value of the parameter to be set. The list of allowed values and len=*), it is also treated as case insensitive.

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details.

    ilev ilev

    integer,in).

    For the multilevel preconditioner, the level at which the 8).

    ilmax ilmax

    integer,in).

    For the multilevel preconditioner, when both ilev The levels are numbered in increasing order starting from the fin one, i.e., level 1 is the finest level.

    pos pos

    character(in).

    Whether the other arguments apply only to the pre-smoother not concern the smoothers, class="cmtt-12">pos is ignored.

    idx idx

    integer,in).

    An auxiliary input argument that can be passed to the underlying






    what a type e

    val val

    dfault t

    cnts






    ML_CYCLE

    character(len=*) =*)

    VCYCLE

    ADD

    VCYCLE

    Multilevel cycle: V-cycle, W-cycle, K-cycle, and additive composition.






    CYCLE_SWEEPS

    integer integer

    Any integer

    number 1

    1 ≥ 1

    1

    Number of multilevel cycles.







    Table 2:




    what a type e

    val val

    dfault t

    cnts






    MIN_COARSE_SIZE_PER_PROCESS

    integer integer

    Any number

    > 0

    200 > 0

    200

    Coarse size threshold per process. The aggregation stops if the global number of variables of the computed coarsest matrix @@ -723,17 +723,17 @@ multiplied by the number of processes (see Note).






    MIN_COARSE_SIZE

    integer integer

    Any number

    > 0

    -1 > 0

    -1

    Coarse size threshold. The aggregation stops if the global number of variables of the computed coarsest matrix is lower @@ -746,17 +746,17 @@ class="cmtt-10x-x-109">MIN_COARSE_SIZE_PER_PROCESS.






    MIN_CR_RATIO

    real real

    Any number

    > 1

    1.5 > 1

    1.5

    Minimum coarsening ratio. The aggregation stops if the ratio between the global matrix dimensions at @@ -764,23 +764,23 @@ two consecutive levels is lower than or equal to this threshold (see Note).






    MAX_LEVS

    integer integer

    Any integer

    number > 1

    20 > 1

    20

    Maximum number of levels. The aggregation stops if the number of levels reaches this value (see Note).






    PAR_AGGR_ALG

    character(len=*) =*)

    ’DEC’, ’SYMDEC’, ’COUPLED’ ’COUPLED’

    ’DEC’ ’DEC’

    Parallel aggregation algorithm.

    the SYMDEC option applies decoupled @@ -808,7 +808,7 @@ class="cmmi-10x-x-109">AT .






    AGGR_TYPE

    character(len=*) =*)

    SOC1

    SOC1, , MATCHBOXP

    SOC1

    Type of aggregation algorithm: currently, for the decoupled aggregation we implement two measures of strength of connection, the one by Vaněk, Mandel and Brezina [30], +href="userhtmlli5.html#XVANEK_MANDEL_BREZINA">31], and the one by Gratton et al [20]. The +href="userhtmlli5.html#XGrHeJi:16">21]. The coupled aggregation is based on a parallel version of the half-approximate matching implemented in the MatchBox-P software @@ -849,13 +849,13 @@ package [8].






    AGGR_SIZE

    integer integer

    Any integer

    power of 2, with @@ -863,8 +863,8 @@ with class="cmtt-10x-x-109">aggr_size -2

    4

    4

    Maximum size of aggregates when the coupled aggregation based on matching is applied. For aggressive coarsening @@ -876,7 +876,7 @@ and ’MATCHBOXP’






    AGGR_PROL

    character(len=*) =*)

    SMOOTHED, UNSMOOTHED

    SMOOTHED

    Prolongator used by the aggregation algorithm: smoothed or unsmoothed (i.e., tentative prolongator).






    Note. The aggregation algorithm stops when at least one of the following criteria is met: the coarse size threshold,
    -
    the minimum coarsening ratio, or the maximum number of levels is reached.
    -
    Therefore, the actual number of levels may be smaller than the specified maximum number of levels.
    -
    the minimum coarsening ratio, or the maximum number of levels is reached.
    +
    Therefore, the actual number of levels may be smaller than the specified maximum number of levels.
    +




    @@ -951,7 +951,7 @@ id="TBL-6-3">




    what a type e

    val val

    dfault t

    cnts






    AGGR_ORD

    character(len=*) =*)

    ’NATURAL’

    ’DEGREE’ ’DEGREE’

    ’NATURAL’ ’NATURAL’

    Initial ordering of indices for the decoupled aggregation algorithm: either natural ordering or sorted by @@ -1009,7 +1009,7 @@ descending degrees of the nodes in the matrix graph.






    AGGR_THRESH

    real(kind_parameter) )

    Any real

    number  [0,1]

    0.01 ,1]

    0.01

    The threshold θ in the strength of connection algorithm. See also the note at the bottom of this table.






    AGGR_FILTER

    character(len=*) =*)

    ’FILTER’

    ’NOFILTER’ ’NOFILTER’

    ’NOFILTER’ ’NOFILTER’

    Matrix used in computing the smoothed prolongator: filtered or unfiltered.






    Note. Different thresholds at different levels, such as those used in [30, Section 5.1], can be easily set by invoking the rou-
    -
    tine 31, Section 5.1], can be easily set by invoking the rou-
    +
    tine set with the parameter ilev.
    -





    Table 4:




    what a type e

    val val

    dfault t

    cnts






    COARSE_MAT

    character(len=*) =*)

    DIST

    REPL

    REPL

    Coarsest matrix layout: distributed among the processes or replicated on each of them.






    COARSE_SOLVE

    character(len=*) =*)

    MUMPS

    KRM

    See Note.’

    See Note.

    Solver used at the coarsest level: sequential LU from MUMPS, UMFPACK, or SuperLU (plus triangular solve); distributed LU from @@ -1208,7 +1208,7 @@ MUMPS or SuperLU_Dist (plus triangular solve); point-Jacobi, hybrid Gauss-Seidel or block-Jacobi and related 1-versions; Krylov Method (Flexible +class="cmr-8">1-versions; Krylov Method (flexible Conjugate Gradient) coupled with the block-Jacobi preconditioner with ILU(0) on the blocks. Note that






    COARSE_SUBSOLVE

    character(len=*) =*)

    ILU

    AINV

    See Note.

    Solver for the diagonal blocks of the coarse matrix, -in case the block Jacobi solver is chosen as -coarsest-level solver: ILU(p), ILU(p,t), MILU(

    See Note.

    Solver for the diagonal blocks of the coarsest +matrix, in case the block Jacobi solver is chosen +as coarsest-level solver: ILU(p), ILU(p,t), MILU(p), LU from MUMPS, SuperLU or UMFPACK (plus triangular solve), Approximate Inverses @@ -1303,29 +1303,29 @@ and SuperLU_Dist are available only in double precision.






    Note. Defaults for COARSE_SOLVE and COARSE_SUBSOLVE are chosen in the following order:
    -
    single precision version –
    single precision version – MUMPS if installed, then SLU if installed, ILU otherwise;
    -
    double precision version –
    double precision version – UMF if installed, then MUMPS if installed, then SLU if installed, ILU otherwise.
    -




    what a type e

    val val

    dfault t

    cts






    COARSE_SWEEPS

    integer integer

    Any integer

    number > -0

    10

    10

    Number of sweeps when JACOBI, GS or BJAC is chosen as coarsest-level solver.






    COARSE_FILLIN

    integer integer

    Any integer

    number -0

    0

    0

    Fill-in level p of the ILU factorizations and first fill-in for the approximate inverses.






    COARSE_ILUTHRS

    real(kind_parameter) )

    Any real

    number -0

    0

    0

    Drop tolerance t in the ILU(p,t) factorization and first drop-tolerance for the approximate inverses.






    Note. Further options for coarse solvers are contained in Table 6.
    -
    For a first use it is suggested to use the default options obtained by simply selecting the solver type.
    -
    For a first use it is suggested to use the default options obtained by simply selecting the solver type.
    +





    Table 5: Parameters defining the coarse-space correction at the coarsest level -(continued).
    +>Table 5: Parameters defining the solver at the coarsest level (continued).
    @@ -1466,7 +1465,7 @@ id="TBL-8-3">




    what a type e

    val val

    dfault t

    cnts






    BJAC_STOP

    character(len=*) =*)

    FALSE

    TRUE

    FALSE

    Select whether to use a stopping criterion for the Block-Jacobi method used as a coarse solver.






    BJAC_TRACE

    character(len=*) =*)

    FALSE

    TRUE

    FALSE

    Select whether to print a trace for the calculated residual for the Block-Jacobi method used as a coarse solver.






    BJAC_ITRACE

    integer integer

    Any integer > -0

    -1

    -1

    Number of iterations after which a trace is to be printed.






    BJAC_RESCHECK

    integer integer

    Any integer > -0

    -1

    -1

    Number of iterations after which a residual is to be calculated.






    BJAC_STOPTOL

    real(kind_parameter) )

    Any real < 1

    0 < 1

    0

    Tolerance for the stopping criterion on the residual.












    Table 6: Additional parameters defining the coarse-space correction at the coarsest -level.
    +class="content">Additional parameters defining the solver at the coarsest level.
    @@ -1638,7 +1636,7 @@ id="TBL-9-3">




    what a type e

    val val

    dfault t

    cnts






    SMOOTHER_TYPE

    character(len=*) =*)

    JACOBIL1-FBGS

    FBGS

    Type of smoother used in the multilevel -point-Jacobi and Additive Schwarz. class="cmr-10">It is ignored by one-level preconditioners.






    SUB_SOLVE

    character(len=*) =*)

    JACOBI

    AINV

    GS and and Additive Schwarz one-level preconditioners preconditioners

    The local solver to be used with the . See class="cmr-10">Note for details on hybrid Gauss-Seidel.






    SMOOTHER_SWEEPS

    integer integer

    Any integer

    number 00

    1 1

    Number of sweeps of the smoother or , class="cmr-10">respectively.






    SUB_OVR

    integer integer

    Any integer

    number 00

    1 1

    Number of overlap layers, for Additive Schwarz only.







    Table 7:




    what a type e

    val val

    dfault t

    cnts






    SUB_RESTR

    character(len=*) =*)

    HALO

    NONE

    HALO

    Type of restriction operator, for Additive classical Addditive Schwarz smoother and class="cmr-10">its RAS variant.






    SUB_PROL

    character(len=*) =*)

    SUM

    NONE

    NONE

    Type of prolongation operator, for Additive for its RAS variant.






    SUB_FILLIN

    integer integer

    Any integer

    number 0 0

    0 0

    Fill-in level p of the incomplete LU class="cmr-10">factorizations.






    SUB_ILUTHRS

    real(kind_parameter) )

    Any real number 0 0

    0 0

    Drop tolerance t ) class="cmr-10">factorization.






    MUMPS_LOC_GLOB

    character(len=*) =*)

    LOCAL_SOLVER

    GLOBAL_SOLVER

    GLOBAL_SOLVER

    Whether MUMPS should be used as a only on the part of the matrix local to each class="cmr-10">process.






    MUMPS_IPAR_ENTRY

    integer integer

    Any integer number number

    0 0

    Set an entry in the MUMPS integer control optional class="cmr-10">argument.






    MUMPS_RPAR_ENTRY

    real real

    Any real number Any real number

    0 0

    Set an entry in the MUMPS real control optional class="cmr-10">argument.







    Table 8:

    a a

    type(in).

    The sparse matrix structure containing the local part of the matrix , single/double precision version of AMG4PSBLAS class="cmr-12">under use. See the PSBLAS User’s Guide for details [1718].

    desc_adesc_a

    type(in).

    The communication descriptor of a. See the PSBLAS User’s Guide class="cmr-12">for details [1718].

    info info

    integer,out).

    Error code. If no error, 0 is returned. See Section 7 for details.

    diff --git a/docs/src/abstract.tex b/docs/src/abstract.tex index bfe73427..74e85b80 100644 --- a/docs/src/abstract.tex +++ b/docs/src/abstract.tex @@ -6,11 +6,11 @@ \textsc{AMG4PSBLAS (Algebraic MultiGrid Preconditioners Package based on PSBLAS}) is a package of parallel algebraic multilevel preconditioners included in the PSCToolkit (Parallel Sparse Computation Toolkit) software framework. It is a progress of a software development project started in 2007, named MLD2P4, which originally implemented a -multilevel version of some domain decomposition preconditioners of additive-Schwarz type and was based on a parallel decoupled version of the well known smoothed +multilevel version of some domain decomposition preconditioners of additive-Schwarz type, and was based on a parallel decoupled version of the well known smoothed aggregation method to generate the multilevel hierarchy of coarser matrices. In the last years, within the context of the EU-H2020 EoCoE project (Energy Oriented Center of Excellence), the package was extended for including new algorithms and functionalities for the setup and application new AMG preconditioners with the final aims of improving efficiency and scalability when tens of thousands cores are -used and of boosting reliability in dealing with general symmetric positive definite linear systems. +used, and of boosting reliability in dealing with general symmetric positive definite linear systems. Due to the significant number of changes and the increase in scope, we decided to rename the package as AMG4PSBLAS. AMG4PSBLAS has been designed to provide scalable and easy-to-use preconditioners diff --git a/docs/src/bibliography.tex b/docs/src/bibliography.tex index f2bf8fba..e82aa550 100644 --- a/docs/src/bibliography.tex +++ b/docs/src/bibliography.tex @@ -43,6 +43,7 @@ A.~Buttari, P.~D'Ambra, D.~di~Serafino, S.~Filippone, for scientific and engineering applications}, Applicable Algebra in Engineering, Communications and Computing, 18 (3) 2007, 223--239. +% %Published online: 13 February 2007, {\tt http://dx.doi.org/10.1007/s00200-007-0035-z} % \bibitem{CAI_SARKIS} @@ -76,11 +77,15 @@ P.~D'Ambra, D.~di~Serafino, S.~Filippone, Algebraic Domain Decomposition Preconditioners in Fortran 95}, ACM Trans. Math. Softw., 37(3), 2010, art. 30. % -\bibitem{BDDF2007} -A.~Buttari, P.~D'Ambra, D.~di Serafino, S.~Filippone, -\emph{2{LEV}-{D}2{P}4: a Package of High-Performance Preconditioners -for Scientific and Engineering Applications}, Appl. Algebra Engrg. Comm. Comput., -18(3), 2007, 223--239. +\bibitem{DV2013} +P.~D'Ambra and P.\,S.~Vassilevski, +{\em Adaptive AMG with coarsening based on compatible weighted matching}, +Computing and Visualization in Science, 16, (2013) 59--76. +% +\bibitem{DFV2018} +P.~D'Ambra, S.~Filippone and P.\,S.~Vassilevski, +{\em BootCMatch: a software package for bootstrap AMG based on graph weighted matching}, +ACM Transactions on Mathematical Software, 44, (2018) 39:1--39:25. % \bibitem{DDF2020} P.~D'Ambra, F~Durastante, S.~Filippone, @@ -139,7 +144,6 @@ ACM Transactions on Mathematical Software, 26 (4), 2000, 527--550. S. Gratton, P. Henon, P. Jiranek and X. Vasseur, {\em Reducing complexity of algebraic multigrid by aggregation}, Numerical Lin. Algebra with Applications, 2016, 23:501-518 - % \bibitem{MPI2} W.~Gropp, S.~Huss-Lederman, A.~Lumsdaine, E.~Lusk, B.~Nitzberg, W.~Saphir, M.~Snir, @@ -192,14 +196,4 @@ P.~Van\v{e}k, J.~Mandel, M.~Brezina, {\em Algebraic Multigrid by Smoothed Aggregation for Second and Fourth Order Elliptic Problems}, Computing, 56 (3) 1996, 179--196. % -\bibitem{DV2013} -P.~D'Ambra and P.\,S.~Vassilevski, -{\em Adaptive AMG with coarsening based on compatible weighted matching}, -Computing and Visualization in Science, 16, (2013) 59--76. -% -\bibitem{DFV2018} -P.~D'Ambra, S.~Filippone and P.\,S.~Vassilevski, -{\em BootCMatch: a software package for bootstrap AMG based on graph weighted matching}, -ACM Transactions on Mathematical Software, 44, (2018) 39:1--39:25. -% \end{thebibliography} diff --git a/docs/src/gettingstarted.tex b/docs/src/gettingstarted.tex index 66dcf7fc..c7fc2d14 100644 --- a/docs/src/gettingstarted.tex +++ b/docs/src/gettingstarted.tex @@ -125,7 +125,7 @@ must be used by the example program. The part of the code dealing with reading and assembling the sparse matrix and the right-hand side vector and the deallocation of the -relevant data structured, performed +relevant data structures, performed through the PSBLAS routines for sparse matrix and vector management, is not reported here for the sake of conciseness. The complete code can be found in the example program file \verb|amg_dexample_ml.f90|, @@ -272,18 +272,24 @@ with block-Jacobi and set by~\fortinline|P%init|. Furthermore, specifying block-Jacobi as coarsest-level solver implies that the coarsest-level matrix is distributed among the processes. -Figure~\ref{fig:ex3} shows how to set a W-cycle preconditioner using the Coarsening based on Compatible Weighted Matching. It applies +Figure~\ref{fig:ex3} shows how to set a W-cycle preconditioner using the Coarsening based on Compatible Weighted Matching, aggregates of size at most $8$ and smoothed prolongators. It applies 2 hybrid Gauss-Seidel sweeps as pre- and post-smoother, -and solves the coarsest-level system with the multifrontal LU factorization -implemented in MUMPS. It is specified that the coarsest-level -matrix is distributed, since MUMPS can be used on both -replicated and distributed matrices, and by default -it is used on replicated ones. +and solves the coarsest-level system with the parallel flexible Conjugate Gradient method (KRM) coupled with the block-Jacobi preconditioner having ILU(0) on the blocks. Default parameters are used for stopping criterion of the coarsest solver. +Note that, also in this case, specifying KRM as coarsest-level +solver implies that the coarsest-level matrix is distributed +among the processes. +%It is specified that the coarsest-level + +%matrix is distributed, since MUMPS can be used on both + +%replicated and distributed matrices, and by default + +%it is used on replicated ones. %Note the use of the parameter \fortinline|pos| %to specify a property only for the pre-smoother or the post-smoother %(see Section~\ref{sec:precset} for more details). The code fragments shown in Figures~\ref{fig:ex2} and \ref{fig:ex3} are -included in the example program file \verb|amg_dexample_ml.f90| too. \textbf{DA CORREGGERE NEL CODICE ESEMPIO 3} +included in the example program file \verb|amg_dexample_ml.f90| too. Finally, Figure~\ref{fig:ex4} shows the setup of a one-level additive Schwarz preconditioner, i.e., RAS with overlap 2. @@ -334,16 +340,19 @@ call P%smoothers_build(A,desc_A,info) \begin{listing}[h!] \ifpdf \begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} -! build a W-cycle preconditioner with 2 hybrid Gauss-Seidel sweeps -! as pre- and post-smoother, a distributed coarsest -! matrix, and MUMPS as coarsest-level solver +!build a W-cycle using the coupled coarsening based on weighted matching, +!aggregates of size at most 8 and smoothed prolongators, +!2 hybrid Gauss-Seidel sweeps as pre- and post-smoother, +!and parallel flexible Conjugate Gradient coupled with the block-Jacobi +!preconditioner having ILU(0) on the blocks as coarsest solver. call P%init('ML',info) call P%set('PAR_AGGR_ALG','COUPLED',info) +call P%set('AGGR_TYPE','MATCHBOXP',info) +call P%set('AGGR_SIZE',8,info) call P%set('ML_CYCLE','WCYCLE',info) call P%set('SMOOTHER_TYPE','FBGS',info) call P%set('SMOOTHER_SWEEPS',2,info) -call P%set('COARSE_SOLVE','MUMPS',info) -call P%set('COARSE_MAT','DIST',info) +call P%set('COARSE_SOLVE','KRM',info) call P%hierarchy_build(A,desc_A,info) call P%smoothers_build(A,desc_A,info) \end{minted} @@ -358,11 +367,12 @@ call P%smoothers_build(A,desc_A,info) ! matrix, and MUMPS as coarsest-level solver call P%init('ML',info) call P%set('PAR_AGGR_ALG','COUPLED',info) +call P%set('AGGR_TYPE','MATCHBOXP',info) +call P%set('AGGR_SIZE',8,info) call P%set('ML_CYCLE','WCYCLE',info) call P%set('SMOOTHER_TYPE','FBGS',info) call P%set('SMOOTHER_SWEEPS',2,info) - call P%set('COARSE_SOLVE','MUMPS',info) - call P%set('COARSE_MAT','DIST',info) + call P%set('COARSE_SOLVE','KRM',info) call P%hierarchy_build(A,desc_A,info) call P%smoothers_build(A,desc_A,info) ... ... @@ -371,23 +381,18 @@ call P%smoothers_build(A,desc_A,info) \end{minipage} \end{center} \fi\vspace{-2em}% -\caption{setup of a multilevel preconditioner based on the coupled coarsening based on weighted matching\label{fig:ex3}} +\caption{setup of a multilevel preconditioner based on the coupled coarsening using weighted matching\label{fig:ex3}} \end{listing}\vspace*{-2em} \begin{listing}[h!] \ifpdf \begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} -! build a W-cycle preconditioner with 2 hybrid Gauss-Seidel sweeps -! as pre- and post-smoother, a distributed coarsest -! matrix, and MUMPS as coarsest-level solver -call P%init('ML',info) -call P%set('PAR_AGGR_ALG','COUPLED',info) -call P%set('ML_CYCLE','WCYCLE',info) -call P%set('SMOOTHER_TYPE','FBGS',info) -call P%set('SMOOTHER_SWEEPS',2,info) -call P%set('COARSE_SOLVE','MUMPS',info) -call P%set('COARSE_MAT','DIST',info) -call P%hierarchy_build(A,desc_A,info) -call P%smoothers_build(A,desc_A,info) +! build a one-level RAS with overlap 2 and ILU(0) on the local blocks. +call P%init('AS',info) +call P%set('SUB_OVR',2,info) +call P%build(A,desc_A,info) +... ... +! solve Ax=b with preconditioned BiCGSTAB + call psb_krylov('BICGSTAB',A,P,b,x,tol,desc_A,info) \end{minted} \else \begin{center} diff --git a/docs/src/overview.tex b/docs/src/overview.tex index 7602ef22..ef31a157 100644 --- a/docs/src/overview.tex +++ b/docs/src/overview.tex @@ -20,11 +20,12 @@ The preconditioners implemented in AMG4PSBLAS are obtained by combining 3 different types of AMG cycles with smoothers and coarsest-level solvers. Available multigrid cycles include the V-, W-, and a version of a Krylov-type cycle (K-cycle)~\cite{Briggs2000,Notay2008}; they can be -combined with Jacobi hybrid +combined with Jacobi, hybrid %\footnote{see Note 2 in Table~\ref{tab:p_coarse}, p.~28.} -forward/backward Gauss-Seidel, block-Jacobi, and additive Schwarz -smoothers. The Jacobi, block-Jacobi and -Gauss-Seidel smoothers are also available in the $\ell_1$ version. +forward/backward Gauss-Seidel, block-Jacobi and additive Schwarz +smoothers with various versions of local incomplete factorizations and approximate inverses +on the blocks. The Jacobi, block-Jacobi and +Gauss-Seidel smoothers are also available in the $\ell_1$ version~\cite{DDF2020}. An algebraic approach is used to generate a hierarchy of coarse-level matrices and operators, without explicitly using any information on the @@ -34,16 +35,16 @@ two different coarsening strategies, based on aggregation, are available: \item a decoupled version of the smoothed aggregation procedure proposed in~\cite{BREZINA_VANEK,VANEK_MANDEL_BREZINA}, and already included in the previous versions of the - package~\cite{BDDF2007,MLD2P4_TOMS}; + package~\cite{aaecc_07,MLD2P4_TOMS}; \item a coupled, parallel implementation of the Coarsening based on Compatible Weighted Matching introduced in~\cite{DV2013,DFV2018} and described in detail in~\cite{DDF2020}; \end{itemize} Either exact or approximate solvers can be used on the coarsest-level -system. We provide interfaces to various sparse LU factorizations from external -packages, native incomplete LU and approximate inverse factorizations, -weighted Jacobi, hybrid Gauss-Seidel, block-Jacobi solvers and -a recursive call to preconditioned Krylov methods; all +system. We provide interfaces to various parallel and sequential sparse LU factorizations from external +packages, sequential native incomplete LU and approximate inverse factorizations, +parallel weighted Jacobi, hybrid Gauss-Seidel, block-Jacobi solvers and +calls to preconditioned Krylov methods; all smoothers can be also exploited as one-level preconditioners. AMG4PSBLAS is written in Fortran~2003, following an @@ -76,7 +77,7 @@ therefore, AMG4PSBLAS can be run on any parallel machine where PSBLAS implementations are available. In the most recent version of PSBLAS (release 3.7), a plug-in for GPU is included; it includes CUDA versions of main vector operations and of sparse matrix-vector -multiplication, so that Krylov methods coupled with AMG4PBLAS +multiplication, so that Krylov methods coupled with AMG4PSBLAS preconditioners relying on Jacobi and block-Jacobi smoothers with sparse approximate inverses on the blocks can be efficiently executed on cluster of GPUs. diff --git a/docs/src/userguide.pdf b/docs/src/userguide.pdf new file mode 120000 index 00000000..7b032aa3 --- /dev/null +++ b/docs/src/userguide.pdf @@ -0,0 +1 @@ +tmp/userguide.pdf \ No newline at end of file diff --git a/docs/src/userinterface.tex b/docs/src/userinterface.tex index 77361bce..fb9e0051 100644 --- a/docs/src/userinterface.tex +++ b/docs/src/userinterface.tex @@ -297,10 +297,10 @@ of $A+A^T$.\\ \hline \textsc{comments} \\ \hline \fi \fortinline|'AGGR_TYPE'| & \fortinline|character(len=*)| \hspace*{-3mm} -& \fortinline|'SOC1'| & -\fortinline|'SOC1'|, + &\fortinline|'SOC1'|, \fortinline|'SOC2'|, \fortinline|'MATCHBOXP'| +& \fortinline|'SOC1'| & Type of aggregation algorithm: currently, for the decoupled aggregation we implement two measures of strength of connection, the one by Van\v{e}k, Mandel @@ -403,7 +403,7 @@ the parameter \texttt{ilev}.} \\ (plus triangular solve); point-Jacobi, hybrid Gauss-Seidel or block-Jacobi and related $\ell_1$-versions; - Krylov Method (Flexible Conjugate Gradient) coupled with + Krylov Method (flexible Conjugate Gradient) coupled with the block-Jacobi preconditioner with ILU(0) on the blocks. Note that \texttt{UMF} and \texttt{SLU} require the coarsest @@ -431,7 +431,7 @@ the parameter \texttt{ilev}.} \\ \fortinline|'MUMPS'| \par \fortinline|'SLU'| \par \fortinline|'UMF'| \par \fortinline|'INVT'| \par \fortinline|'INVK'| \par \fortinline|'AINV'| & See~Note. - & Solver for the diagonal blocks of the coarse matrix, + & Solver for the diagonal blocks of the coarsest matrix, in case the block Jacobi solver is chosen as coarsest-level solver: ILU($p$), ILU($p,t$), MILU($p$), LU from MUMPS, SuperLU or UMFPACK @@ -486,7 +486,7 @@ the parameter \texttt{ilev}.} \\ \hline \end{tabular} \end{center} -\caption{Parameters defining the coarse-space correction at the coarsest +\caption{Parameters defining the solver at the coarsest level (continued).\label{tab:p_coarse_1}} \esideways @@ -505,7 +505,7 @@ level (continued).\label{tab:p_coarse_1}} \hline \end{tabular} \end{center} -\caption{Additional parameters defining the coarse-space correction at the coarsest +\caption{Additional parameters defining the solver at the coarsest level.\label{tab:p_coarse_2}} \esideways diff --git a/examples/fileread/amg_cexample_1lev.f90 b/examples/fileread/amg_cexample_1lev.f90 index 9341d723..23d3654d 100644 --- a/examples/fileread/amg_cexample_1lev.f90 +++ b/examples/fileread/amg_cexample_1lev.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -38,8 +38,8 @@ ! File: amg_cexample_1lev.f90 ! ! This sample program solves a linear system by using BiCGStab preconditioned by -! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 5.1 -! of the MLD2P4 User's and Reference Guide. +! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 4.1 +! of the AMG4PSBLAS User's and Reference Guide. ! ! The matrix and the rhs are read from files (if an rhs is not available, the ! unit rhs is set). @@ -108,7 +108,7 @@ program amg_cexample_1lev ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if diff --git a/examples/fileread/amg_cexample_ml.f90 b/examples/fileread/amg_cexample_ml.f90 index d31807a6..1f1d646d 100644 --- a/examples/fileread/amg_cexample_ml.f90 +++ b/examples/fileread/amg_cexample_ml.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -38,25 +38,27 @@ ! File: amg_cexample_ml.f90 ! ! This sample program solves a linear system by using CG coupled with -! one of the following multi-level preconditioner, as explained in Section 6.1 -! of the MLD2P4 User's and Reference Guide: +! one of the following multi-level preconditioner, as explained in Section 4.1 +! of the AMG4PSBLAS User's and Reference Guide: ! ! - choice = 1, the default multi-level preconditioner solver, i.e., -! V-cycle with basic smoothed aggregation, 1 hybrid forward/backward +! V-cycle with decoupled smoothed aggregation, 1 hybrid forward/backward ! GS sweep as pre/post-smoother and UMFPACK as coarsest-level -! solver (Sec. 5.1, Fig. 2) +! solver (Sec. 4.1, Listing 1) ! ! - choice = 2, a V-cycle preconditioner with 1 block-Jacobi sweep ! (with ILU(0) on the blocks) as pre- and post-smoother, and 8 block-Jacobi -! sweeps (with ILU(0) on the blocks) as coarsest-level solver (Sec. 5.1, Fig. 3) +! sweeps (with ILU(0) on the blocks) as coarsest-level solver (Sec. 4.1, Listing 2) ! -! - choice = 3, a W-cycle preconditioner with 2 hybrid forward/backward -! GS sweeps as pre/post-smoother, a distributed coarsest -! matrix, and MUMPS as coarsest-level solver (Sec. 5.1, Fig. 4) +! - choice = 3, W-cycle preconditioner based on the coupled aggregation relying on matching, +! with maximum size of aggregates equal to 8 and smoothed prolongators, +! 2 hybrid forward/backward GS sweeps as pre/post-smoother, a distributed coarsest +! matrix, and preconditioned Flexible Conjugate Gradient as coarsest-level solver (Sec. 4.1, Listing 3) ! ! The matrix and the rhs are read from files (if an rhs is not available, the ! unit rhs is set). ! +! program amg_cexample_ml use psb_base_mod use amg_prec_mod @@ -123,7 +125,7 @@ program amg_cexample_ml ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if @@ -230,14 +232,18 @@ program amg_cexample_ml case(3) - ! initialize a W-cycle preconditioner with 2 hybrid forward/backward - ! GS sweeps as pre/post-smoother, a distributed coarsest - ! matrix, and MUMPS as coarsest-level solver + ! initialize a W-cycle preconditioner based on the coupled aggregation relying on matching, + ! with maximum size of aggregates equal to 8 and smoothed prolongators, + ! 2 hybrid forward/backward GS sweeps as pre/post-smoother, a distributed coarsest + ! matrix, and preconditioned Flexible Conjugate Gradient as coarsest-level solver call P%init(ctxt,'ML',info) + call P%set('PAR_AGGR_ALG','COUPLED',info) + call P%set('AGGR_TYPE','MATCHBOXP',info) + call P%set('AGGR_SIZE',8,info) call P%set('ML_CYCLE','WCYCLE',info) call P%set('SMOOTHER_SWEEPS',2,info) - call P%set('COARSE_SOLVE','MUMPS',info) + call P%set('COARSE_SOLVE','KRM',info) call P%set('COARSE_MAT','DIST',info) kmethod = 'CG' end select diff --git a/examples/fileread/amg_dexample_1lev.f90 b/examples/fileread/amg_dexample_1lev.f90 index edb4d9bf..6687e759 100644 --- a/examples/fileread/amg_dexample_1lev.f90 +++ b/examples/fileread/amg_dexample_1lev.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -38,8 +38,8 @@ ! File: amg_dexample_1lev.f90 ! ! This sample program solves a linear system by using BiCGStab preconditioned by -! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 5.1 -! of the MLD2P4 User's and Reference Guide. +! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 4.1 +! of the AMG4PSBLAS User's and Reference Guide. ! ! The matrix and the rhs are read from files (if an rhs is not available, the ! unit rhs is set). @@ -108,7 +108,7 @@ program amg_dexample_1lev ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if diff --git a/examples/fileread/amg_dexample_ml.f90 b/examples/fileread/amg_dexample_ml.f90 index 9380d991..386f7aea 100644 --- a/examples/fileread/amg_dexample_ml.f90 +++ b/examples/fileread/amg_dexample_ml.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -38,21 +38,22 @@ ! File: amg_dexample_ml.f90 ! ! This sample program solves a linear system by using CG coupled with -! one of the following multi-level preconditioner, as explained in Section 6.1 -! of the MLD2P4 User's and Reference Guide: +! one of the following multi-level preconditioner, as explained in Section 4.1 +! of the AMG4PSBLAS User's and Reference Guide: ! ! - choice = 1, the default multi-level preconditioner solver, i.e., -! V-cycle with basic smoothed aggregation, 1 hybrid forward/backward +! V-cycle with decoupled smoothed aggregation, 1 hybrid forward/backward ! GS sweep as pre/post-smoother and UMFPACK as coarsest-level -! solver (Sec. 5.1, Fig. 2) +! solver (Sec. 4.1, Listing 1) ! ! - choice = 2, a V-cycle preconditioner with 1 block-Jacobi sweep ! (with ILU(0) on the blocks) as pre- and post-smoother, and 8 block-Jacobi -! sweeps (with ILU(0) on the blocks) as coarsest-level solver (Sec. 5.1, Fig. 3) +! sweeps (with ILU(0) on the blocks) as coarsest-level solver (Sec. 4.1, Listing 2) ! -! - choice = 3, a W-cycle preconditioner with 2 hybrid forward/backward -! GS sweeps as pre/post-smoother, a distributed coarsest -! matrix, and MUMPS as coarsest-level solver (Sec. 5.1, Fig. 4) +! - choice = 3, W-cycle preconditioner based on the coupled aggregation relying on matching, +! with maximum size of aggregates equal to 8 and smoothed prolongators, +! 2 hybrid forward/backward GS sweeps as pre/post-smoother, a distributed coarsest +! matrix, and preconditioned Flexible Conjugate Gradient as coarsest-level solver (Sec. 4.1, Listing 3) ! ! The matrix and the rhs are read from files (if an rhs is not available, the ! unit rhs is set). @@ -123,7 +124,7 @@ program amg_dexample_ml ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if @@ -209,7 +210,7 @@ program amg_dexample_ml case(1) ! initialize the default multi-level preconditioner, i.e. V-cycle - ! with basic smoothed aggregation, 1 hybrid forward/backward + ! with decoupled smoothed aggregation, 1 hybrid forward/backward ! GS sweep as pre/post-smoother and UMFPACK as coarsest-level ! solver @@ -230,14 +231,18 @@ program amg_dexample_ml case(3) - ! initialize a W-cycle preconditioner with 2 hybrid forward/backward - ! GS sweeps as pre/post-smoother, a distributed coarsest - ! matrix, and MUMPS as coarsest-level solver + ! initialize a W-cycle preconditioner based on the coupled aggregation relying on matching, + ! with maximum size of aggregates equal to 8 and smoothed prolongators, + ! 2 hybrid forward/backward GS sweeps as pre/post-smoother, a distributed coarsest + ! matrix, and preconditioned Flexible Conjugate Gradient as coarsest-level solver call P%init(ctxt,'ML',info) + call P%set('PAR_AGGR_ALG','COUPLED',info) + call P%set('AGGR_TYPE','MATCHBOXP',info) + call P%set('AGGR_SIZE',8,info) call P%set('ML_CYCLE','WCYCLE',info) call P%set('SMOOTHER_SWEEPS',2,info) - call P%set('COARSE_SOLVE','MUMPS',info) + call P%set('COARSE_SOLVE','KRM',info) call P%set('COARSE_MAT','DIST',info) kmethod = 'CG' end select diff --git a/examples/fileread/amg_sexample_1lev.f90 b/examples/fileread/amg_sexample_1lev.f90 index c52b2935..90cff0f8 100644 --- a/examples/fileread/amg_sexample_1lev.f90 +++ b/examples/fileread/amg_sexample_1lev.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -38,8 +38,8 @@ ! File: amg_sexample_1lev.f90 ! ! This sample program solves a linear system by using BiCGStab preconditioned by -! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 5.1 -! of the MLD2P4 User's and Reference Guide. +! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 4.1 +! of the AMG4PSBLAS User's and Reference Guide. ! ! The matrix and the rhs are read from files (if an rhs is not available, the ! unit rhs is set). @@ -108,7 +108,7 @@ program amg_sexample_1lev ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if diff --git a/examples/fileread/amg_sexample_ml.f90 b/examples/fileread/amg_sexample_ml.f90 index f3156631..0ebf35f4 100644 --- a/examples/fileread/amg_sexample_ml.f90 +++ b/examples/fileread/amg_sexample_ml.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -38,8 +38,8 @@ ! File: amg_sexample_ml.f90 ! ! This sample program solves a linear system by using CG coupled with -! one of the following multi-level preconditioner, as explained in Section 6.1 -! of the MLD2P4 User's and Reference Guide: +! one of the following multi-level preconditioner, as explained in Section 4.1 +! of the AMG4PSBLAS User's and Reference Guide: ! ! - choice = 1, the default multi-level preconditioner solver, i.e., ! V-cycle with basic smoothed aggregation, 1 hybrid forward/backward @@ -123,7 +123,7 @@ program amg_sexample_ml ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if diff --git a/examples/fileread/amg_zexample_1lev.f90 b/examples/fileread/amg_zexample_1lev.f90 index 5d265ce7..b88dd706 100644 --- a/examples/fileread/amg_zexample_1lev.f90 +++ b/examples/fileread/amg_zexample_1lev.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -38,8 +38,8 @@ ! File: amg_zexample_1lev.f90 ! ! This sample program solves a linear system by using BiCGStab preconditioned by -! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 5.1 -! of the MLD2P4 User's and Reference Guide. +! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 4.1 +! of the AMG4PSBLAS User's and Reference Guide. ! ! The matrix and the rhs are read from files (if an rhs is not available, the ! unit rhs is set). @@ -108,7 +108,7 @@ program amg_zexample_1lev ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if diff --git a/examples/fileread/amg_zexample_ml.f90 b/examples/fileread/amg_zexample_ml.f90 index b69c334a..d1b1c79b 100644 --- a/examples/fileread/amg_zexample_ml.f90 +++ b/examples/fileread/amg_zexample_ml.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -38,25 +38,27 @@ ! File: amg_zexample_ml.f90 ! ! This sample program solves a linear system by using CG coupled with -! one of the following multi-level preconditioner, as explained in Section 6.1 -! of the MLD2P4 User's and Reference Guide: +! one of the following multi-level preconditioner, as explained in Section 4.1 +! of the AMG4PSBLAS User's and Reference Guide: ! ! - choice = 1, the default multi-level preconditioner solver, i.e., -! V-cycle with basic smoothed aggregation, 1 hybrid forward/backward +! V-cycle with decoupled smoothed aggregation, 1 hybrid forward/backward ! GS sweep as pre/post-smoother and UMFPACK as coarsest-level -! solver (Sec. 5.1, Fig. 2) +! solver (Sec. 4.1, Listing 1) ! ! - choice = 2, a V-cycle preconditioner with 1 block-Jacobi sweep ! (with ILU(0) on the blocks) as pre- and post-smoother, and 8 block-Jacobi -! sweeps (with ILU(0) on the blocks) as coarsest-level solver (Sec. 5.1, Fig. 3) +! sweeps (with ILU(0) on the blocks) as coarsest-level solver (Sec. 4.1, Listing 2) ! -! - choice = 3, a W-cycle preconditioner with 2 hybrid forward/backward -! GS sweeps as pre/post-smoother, a distributed coarsest -! matrix, and MUMPS as coarsest-level solver (Sec. 5.1, Fig. 4) +! - choice = 3, W-cycle preconditioner based on the coupled aggregation relying on matching, +! with maximum size of aggregates equal to 8 and smoothed prolongators, +! 2 hybrid forward/backward GS sweeps as pre/post-smoother, a distributed coarsest +! matrix, and preconditioned Flexible Conjugate Gradient as coarsest-level solver (Sec. 4.1, Listing 3) ! ! The matrix and the rhs are read from files (if an rhs is not available, the ! unit rhs is set). ! +! program amg_zexample_ml use psb_base_mod use amg_prec_mod @@ -123,7 +125,7 @@ program amg_zexample_ml ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if @@ -230,14 +232,18 @@ program amg_zexample_ml case(3) - ! initialize a W-cycle preconditioner with 2 hybrid forward/backward - ! GS sweeps as pre/post-smoother, a distributed coarsest - ! matrix, and MUMPS as coarsest-level solver + ! initialize a W-cycle preconditioner based on the coupled aggregation relying on matching, + ! with maximum size of aggregates equal to 8 and smoothed prolongators, + ! 2 hybrid forward/backward GS sweeps as pre/post-smoother, a distributed coarsest + ! matrix, and preconditioned Flexible Conjugate Gradient as coarsest-level solver call P%init(ctxt,'ML',info) + call P%set('PAR_AGGR_ALG','COUPLED',info) + call P%set('AGGR_TYPE','MATCHBOXP',info) + call P%set('AGGR_SIZE',8,info) call P%set('ML_CYCLE','WCYCLE',info) call P%set('SMOOTHER_SWEEPS',2,info) - call P%set('COARSE_SOLVE','MUMPS',info) + call P%set('COARSE_SOLVE','KRM',info) call P%set('COARSE_MAT','DIST',info) kmethod = 'CG' end select diff --git a/examples/fileread/data_input.f90 b/examples/fileread/data_input.f90 index b31a5f9a..4e91498b 100644 --- a/examples/fileread/data_input.f90 +++ b/examples/fileread/data_input.f90 @@ -1,14 +1,13 @@ ! -! -! MLD2P4 version 2.2 -! MultiLevel Domain Decomposition Parallel Preconditioners Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! -! (C) Copyright 2008-2018 +! (C) Copyright 2020 ! ! Salvatore Filippone ! Pasqua D'Ambra -! Daniela di Serafino +! Fabio Durastante ! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions @@ -18,14 +17,14 @@ ! 2. Redistributions in binary form must reproduce the above copyright ! notice, this list of conditions, and the following disclaimer in the ! documentation and/or other materials provided with the distribution. -! 3. The name of the MLD2P4 group or the names of its contributors may +! 3. The name of the AMG4PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. ! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS ! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS @@ -34,6 +33,7 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! +! ! module data_input diff --git a/examples/pdegen/amg_dexample_1lev.f90 b/examples/pdegen/amg_dexample_1lev.f90 index e7554ae6..1733f9f1 100644 --- a/examples/pdegen/amg_dexample_1lev.f90 +++ b/examples/pdegen/amg_dexample_1lev.f90 @@ -1,9 +1,7 @@ - -! ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -40,8 +38,8 @@ ! ! This sample program solves a linear system obtained by discretizing a ! PDE with Dirichlet BCs. The solver is BiCGStab preconditioned by -! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 5.1 -! of the MLD2P4 User's and Reference Guide. +! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 4.1 +! of the AMG4PSBLAS User's and Reference Guide. ! ! ! The PDE is a general second order equation in 3d @@ -116,7 +114,7 @@ program amg_dexample_1lev ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if diff --git a/examples/pdegen/amg_dexample_ml.f90 b/examples/pdegen/amg_dexample_ml.f90 index 5d00b1bc..1d649df4 100644 --- a/examples/pdegen/amg_dexample_ml.f90 +++ b/examples/pdegen/amg_dexample_ml.f90 @@ -1,8 +1,7 @@ ! -! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -38,22 +37,27 @@ ! File: amg_dexample_ml.f90 ! ! This sample program solves a linear system obtained by discretizing a -! PDE with Dirichlet BCs. The solver is CG, coupled with one of the -! following multi-level preconditioner, as explained in Section 5.1 of -! the MLD2P4 User's and Reference Guide: +! PDE with Dirichlet BCs. The solver is CG, coupled with one of the +! following multi-level preconditioner, as explained in Section 4.1 of +! the AMG4PSBLAS User's and Reference Guide: ! ! - choice = 1, the default multi-level preconditioner solver, i.e., -! V-cycle with basic smoothed aggregation, 1 hybrid forward/backward +! V-cycle with decoupled smoothed aggregation, 1 hybrid forward/backward ! GS sweep as pre/post-smoother and UMFPACK as coarsest-level -! solver (Sec. 5.1, Fig. 2) +! solver (Sec. 4.1, Listing 1) ! ! - choice = 2, a V-cycle preconditioner with 1 block-Jacobi sweep ! (with ILU(0) on the blocks) as pre- and post-smoother, and 8 block-Jacobi -! sweeps (with ILU(0) on the blocks) as coarsest-level solver (Sec. 5.1, Fig. 3) +! sweeps (with ILU(0) on the blocks) as coarsest-level solver (Sec. 4.1, Listing 2) +! +! - choice = 3, W-cycle preconditioner based on the coupled aggregation relying on matching, +! with maximum size of aggregates equal to 8 and smoothed prolongators, +! 2 hybrid forward/backward GS sweeps as pre/post-smoother, a distributed coarsest +! matrix, and preconditioned Flexible Conjugate Gradient as coarsest-level solver (Sec. 4.1, Listing 3) +! +! The matrix and the rhs are read from files (if an rhs is not available, the +! unit rhs is set). ! -! - choice = 3, a W-cycle preconditioner with 2 hybrid forward/backward -! GS sweeps as pre/post-smoother, a distributed coarsest matrix, -! and MUMPS as coarsest-level solver (Sec. 5.1, Fig. 4) ! ! The PDE is a general second order equation in 3d ! @@ -136,7 +140,7 @@ program amg_dexample_ml ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if @@ -187,17 +191,21 @@ program amg_dexample_ml case(3) - ! initialize a W-cycle preconditioner with 2 hybrid forward/backward - ! GS sweeps as pre/post-smoother, a distributed coarsest - ! matrix, and MUMPS as coarsest-level solver + ! initialize a W-cycle preconditioner based on the coupled aggregation relying on matching, + ! with maximum size of aggregates equal to 8 and smoothed prolongators, + ! 2 hybrid forward/backward GS sweeps as pre/post-smoother, a distributed coarsest + ! matrix, and preconditioned Flexible Conjugate Gradient as coarsest-level solver call P%init(ctxt,'ML',info) + call P%set('PAR_AGGR_ALG','COUPLED',info) + call P%set('AGGR_TYPE','MATCHBOXP',info) + call P%set('AGGR_SIZE',8,info) call P%set('ML_CYCLE','WCYCLE',info) call P%set('SMOOTHER_SWEEPS',2,info) - call P%set('COARSE_SOLVE','MUMPS',info) + call P%set('COARSE_SOLVE','KRM',info) call P%set('COARSE_MAT','DIST',info) kmethod = 'CG' - + end select end select call psb_barrier(ctxt) diff --git a/examples/pdegen/amg_dpde_mod.f90 b/examples/pdegen/amg_dpde_mod.f90 index 85d3f48f..437b71a2 100644 --- a/examples/pdegen/amg_dpde_mod.f90 +++ b/examples/pdegen/amg_dpde_mod.f90 @@ -1,8 +1,7 @@ ! -! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! diff --git a/examples/pdegen/amg_sexample_1lev.f90 b/examples/pdegen/amg_sexample_1lev.f90 index 7debe6b2..e2fe9ca7 100644 --- a/examples/pdegen/amg_sexample_1lev.f90 +++ b/examples/pdegen/amg_sexample_1lev.f90 @@ -3,7 +3,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -40,8 +40,8 @@ ! ! This sample program solves a linear system obtained by discretizing a ! PDE with Dirichlet BCs. The solver is BiCGStab preconditioned by -! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 5.1 -! of the MLD2P4 User's and Reference Guide. +! RAS with overlap 2 and ILU(0) on the local blocks, as explained in Section 4.1 +! of the AMG4PSBLAS User's and Reference Guide. ! ! ! The PDE is a general second order equation in 3d @@ -116,7 +116,7 @@ program amg_sexample_1lev ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if diff --git a/examples/pdegen/amg_sexample_ml.f90 b/examples/pdegen/amg_sexample_ml.f90 index 2c34bf56..4b58d5ba 100644 --- a/examples/pdegen/amg_sexample_ml.f90 +++ b/examples/pdegen/amg_sexample_ml.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! @@ -39,21 +39,26 @@ ! ! This sample program solves a linear system obtained by discretizing a ! PDE with Dirichlet BCs. The solver is CG, coupled with one of the -! following multi-level preconditioner, as explained in Section 5.1 of -! the MLD2P4 User's and Reference Guide: +! following multi-level preconditioner, as explained in Section 4.1 of +! the AMG4PSBLAS User's and Reference Guide: ! ! - choice = 1, the default multi-level preconditioner solver, i.e., -! V-cycle with basic smoothed aggregation, 1 hybrid forward/backward +! V-cycle with decoupled smoothed aggregation, 1 hybrid forward/backward ! GS sweep as pre/post-smoother and UMFPACK as coarsest-level -! solver (Sec. 5.1, Fig. 2) +! solver (Sec. 4.1, Listing 1) ! ! - choice = 2, a V-cycle preconditioner with 1 block-Jacobi sweep ! (with ILU(0) on the blocks) as pre- and post-smoother, and 8 block-Jacobi -! sweeps (with ILU(0) on the blocks) as coarsest-level solver (Sec. 5.1, Fig. 3) +! sweeps (with ILU(0) on the blocks) as coarsest-level solver (Sec. 4.1, Listing 2) +! +! - choice = 3, W-cycle preconditioner based on the coupled aggregation relying on matching, +! with maximum size of aggregates equal to 8 and smoothed prolongators, +! 2 hybrid forward/backward GS sweeps as pre/post-smoother, a distributed coarsest +! matrix, and preconditioned Flexible Conjugate Gradient as coarsest-level solver (Sec. 4.1, Listing 3) +! +! The matrix and the rhs are read from files (if an rhs is not available, the +! unit rhs is set). ! -! - choice = 3, a W-cycle preconditioner with 2 hybrid forward/backward -! GS sweeps as pre/post-smoother, a distributed coarsest matrix, -! and MUMPS as coarsest-level solver (Sec. 5.1, Fig. 4) ! ! The PDE is a general second order equation in 3d ! @@ -136,7 +141,7 @@ program amg_sexample_ml ! Hello world ! if (iam == psb_root_) then - write(*,*) 'Welcome to MLD2P4 version: ',amg_version_string_ + write(*,*) 'Welcome to AMG4PSBLAS version: ',amg_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if @@ -187,19 +192,21 @@ program amg_sexample_ml case(3) - ! initialize a W-cycle preconditioner with 2 hybrid forward/backward - ! GS sweeps as pre/post-smoother, a distributed coarsest - ! matrix, and MUMPS as coarsest-level solver + ! initialize a W-cycle preconditioner based on the coupled aggregation relying on matching, + ! with maximum size of aggregates equal to 8 and smoothed prolongators, + ! 2 hybrid forward/backward GS sweeps as pre/post-smoother, a distributed coarsest + ! matrix, and preconditioned Flexible Conjugate Gradient as coarsest-level solver call P%init(ctxt,'ML',info) + call P%set('PAR_AGGR_ALG','COUPLED',info) + call P%set('AGGR_TYPE','MATCHBOXP',info) + call P%set('AGGR_SIZE',8,info) call P%set('ML_CYCLE','WCYCLE',info) call P%set('SMOOTHER_SWEEPS',2,info) - call P%set('COARSE_SOLVE','MUMPS',info) + call P%set('COARSE_SOLVE','KRM',info) call P%set('COARSE_MAT','DIST',info) kmethod = 'CG' - end select - call psb_barrier(ctxt) t1 = psb_wtime() diff --git a/examples/pdegen/amg_spde_mod.f90 b/examples/pdegen/amg_spde_mod.f90 index 21079dd5..9f7e0557 100644 --- a/examples/pdegen/amg_spde_mod.f90 +++ b/examples/pdegen/amg_spde_mod.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! ! (C) Copyright 2020 ! diff --git a/examples/pdegen/data_input.f90 b/examples/pdegen/data_input.f90 index b31a5f9a..fe95b557 100644 --- a/examples/pdegen/data_input.f90 +++ b/examples/pdegen/data_input.f90 @@ -1,14 +1,13 @@ ! -! -! MLD2P4 version 2.2 -! MultiLevel Domain Decomposition Parallel Preconditioners Package -! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) ! -! (C) Copyright 2008-2018 +! (C) Copyright 2020 ! ! Salvatore Filippone ! Pasqua D'Ambra -! Daniela di Serafino +! Fabio Durastante ! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions @@ -18,14 +17,14 @@ ! 2. Redistributions in binary form must reproduce the above copyright ! notice, this list of conditions, and the following disclaimer in the ! documentation and/or other materials provided with the distribution. -! 3. The name of the MLD2P4 group or the names of its contributors may +! 3. The name of the AMG4PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. ! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS ! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS @@ -33,8 +32,6 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! module data_input interface read_data @@ -183,4 +180,3 @@ contains trim_string = adjustl(string(idx:)) end function trim_string end module data_input -