From ef5a0f3d996f500c8b026cdb3686d8cdc38f61ac Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 24 Jul 2008 09:32:33 +0000 Subject: [PATCH] mld2p4: html/img1.png html/img32.png html/img52.png html/img58.png html/img71.png html/img83.png html/node11.html html/node12.html html/node3.html pdf pdf/Makefile pdf/abstract.tex pdf/background.tex pdf/bibliography.tex pdf/building.tex pdf/distribution.tex pdf/errors.tex pdf/figures pdf/gettingstarted.tex pdf/intro.tex pdf/license.tex pdf/overview.tex pdf/precs.tex pdf/title.tex pdf/tmp pdf/userguide.tex pdf/userhtml.tex pdf/userinterface.tex src src/Makefile src/abstract.tex src/background.tex src/bibliography.tex src/building.tex src/distribution.tex src/errors.tex src/figures src/gettingstarted.tex src/intro.tex src/license.tex src/overview.tex src/precs.tex src/title.tex src/tmp src/userguide.tex src/userhtml.tex src/userinterface.tex Docs: created SRC subdir. --- docs/html/img1.png | Bin 376 -> 368 bytes docs/html/img32.png | Bin 233 -> 241 bytes docs/html/img52.png | Bin 763 -> 804 bytes docs/html/img58.png | Bin 1177 -> 1355 bytes docs/html/img71.png | Bin 1299 -> 1387 bytes docs/html/img83.png | Bin 569 -> 564 bytes docs/html/node11.html | 6 +- docs/html/node12.html | 4 +- docs/html/node3.html | 2 +- docs/src/Makefile | 386 +++++++++++++++++++++++++++++++ docs/src/abstract.tex | 27 +++ docs/src/background.tex | 348 ++++++++++++++++++++++++++++ docs/src/bibliography.tex | 207 +++++++++++++++++ docs/src/building.tex | 242 ++++++++++++++++++++ docs/src/distribution.tex | 19 ++ docs/src/errors.tex | 20 ++ docs/src/gettingstarted.tex | 308 +++++++++++++++++++++++++ docs/src/intro.tex | 34 +++ docs/src/license.tex | 44 ++++ docs/src/overview.tex | 90 ++++++++ docs/src/precs.tex | 280 +++++++++++++++++++++++ docs/src/title.tex | 72 ++++++ docs/src/userguide.tex | 173 ++++++++++++++ docs/src/userhtml.tex | 149 ++++++++++++ docs/src/userinterface.tex | 443 ++++++++++++++++++++++++++++++++++++ 25 files changed, 2848 insertions(+), 6 deletions(-) create mode 100644 docs/src/Makefile create mode 100644 docs/src/abstract.tex create mode 100644 docs/src/background.tex create mode 100644 docs/src/bibliography.tex create mode 100644 docs/src/building.tex create mode 100644 docs/src/distribution.tex create mode 100644 docs/src/errors.tex create mode 100644 docs/src/gettingstarted.tex create mode 100644 docs/src/intro.tex create mode 100644 docs/src/license.tex create mode 100644 docs/src/overview.tex create mode 100644 docs/src/precs.tex create mode 100644 docs/src/title.tex create mode 100644 docs/src/userguide.tex create mode 100644 docs/src/userhtml.tex create mode 100644 docs/src/userinterface.tex diff --git a/docs/html/img1.png b/docs/html/img1.png index be2b1fe905c791d598ade04560d5f42fc5259b0a..0423c177022d6bced713f8ddddf7b0f4a5b46097 100644 GIT binary patch delta 303 zcmV+~0nq;V0`LMMiBL{Q4GJ0x0000DNk~Le0000v0000U1Oos70TY;;u8|>3f9^>{ zK~yM_V_<*-eg=jO3|$Tk9Sp<)?hOe*l8;E$AixHtIY0y#1H*+C4Gav-t|XX%4nCl< zHn6cAVCd-RU|K!Lra0u~22GXO+1zjkYa3L zVDNX~Dfz-&fGoR-@fd@Z0$#r~;FA=`tCfKP0MeK+RSlb~2><{907*qo1w^hwV1hjQ Bc31!a delta 311 zcmV-70m%OF0{8+UiBL{Q4GJ0x0000DNk~Le0000w0000U1Oos70r;;Vt&t&2fA&d4 zK~yM_V_<*-76yg|4EYln?lGW&j0AKMFyQzABI~irVF!#35SoAE1Tg*K&jTRYK!OSA z0HGIbG&2XQ9F+fY1K1`810eDL{|Bf7WRO6Cz2t%qaMl43!G_=`fCz&R9Ay{4T;?et z66o#*h%^^7kl+CFJ_IoQ|NozXf1ej5&Ger?4kFL|fE8gD1T;|F@dF0rFol@U!&&(b zn9>z6Mgo5U1KYdg+bFUx-Wq^uRtBC43=>!{@GyWA$$xMPj6i|3W5Aj|SP&SYTyf!8m5_#_z{@#obuGr-TCmrII^fq{Y7)59eQNQ(opJPR`e17l}Eb;d+Rk@}XFmWqmsoSdA5 zgoKcg5DyOz8ygz~0|ONm6$uFmpmII)QWhY^SQ6wH%;50sMjDV)zopr0NS}sr~m)} delta 223 zcmey!_>xhvGr-TCmrII^fq{Y7)59eQNQ(inJPR`e17q4Gfv|~+BJ~v&6*)OM2?+@y zAt4?f9yT^M1_lNyDk>5Z5{VwJ0CL4IJwHE(ex>UGo#BK@5bW|HyIB-D}7Pyk-p((!-b<#4GG2Z3uh%Wc)#So U?fNTP7icYmr>mdKI;Vst0OI*c761SM diff --git a/docs/html/img52.png b/docs/html/img52.png index bf879aacc0b658748bda21d7609c647b212b27ce..a71e354237dec1b45269fca7f77e9268938db765 100644 GIT binary patch delta 707 zcmV;!0zCcu1*8U$ZGWOkL_t(YiS1N9h!a5+{`M!y+$CWbLD9-p5Mq_c;ZK((pqvO^ zECd^I8$k=@(8j`pF@|Hr3m34l5VX89l|P7>N-Qjbfmwz?c1L|mx4j}$G^nR~J z1L2ehrl#L&YIJc7?G};C!sN2Uo2Zcv&jn>e zZ~M>bjAD2%hhc%)5!XA|9Wt}|fl0^%SJOgpAe*YFan@h1=J40<_EyU)8{OVc3V&uX6w8`8 zEB`5Uk%VF?Dfa-xsSU*$+`$W&CB2GA$(%McFFetrI7!rw$ejN5XY2TecxOI!(yx+3 pZ;APll;*O~m+vk;80oM@`~u~bpb(?kQkDP!002ovPDHLkV1jq-U}69O delta 666 zcmV;L0%iTA2Kxn&ZGU-5L_t(YiS1P}Z__{&{w}d&6DK%`fg*-@0mK%NI@feVLaNyi zOM}!OU0gHZkGt zYRP$eqJyQD_kYje45hOSb?V5~p)1&({9sq`fIppMW!DBq#0NJc5NZljwnAPVS(svd zaKw+j?9FmzYZu^Cn!Yf`V}*$`3Bn7h85=D)NGnT}QsdT8`RS2}HImZ0`<-_ia5Hro z$CaIgBFLC-5;30_Dv*gOr;3?;6Lgr9qBLMMY|z~G=zogFywEgT_>aE_PX9EIgp0=b2?Ypr(L z;~PxeDt|^#XN1+mhvbMa?Dfh0d}opwxxv&W_Y%zr?}5SPA#53KYovv=LwrF!2Kflw zn8{Pgh5bs-Gz;2F4{eW+7crjM2+8e~t-WBA!x@e5;Ts zeHL#p_S0s*!{Soa)05}(L-yGoKhkJE;rH+hi4!8q7%RPclK=n!07*qoM6N<$f(F@3 A$^ZZW diff --git a/docs/html/img58.png b/docs/html/img58.png index 0e817929d585ff18de72e0c9bc194d975b78ed47..f220846e6f3a0b538ba50a77a924fa8e5a3fd902 100644 GIT binary patch delta 1297 zcmV+s1@8Kp3Cjv0iBL{Q4GJ0x0000DNk~Le0003!0000X1Oos70WQ%|Y>^>Mf5}Ni zK~z|U?O0D}990wq%WM$BsV-ihpGS~t> zh2BPB#zy}gwfjwZJ!V|`8=dn^tTtdGO31Mkq*F#+4N7TEi~gKlJNrLUVXMgVV27JS zHb8N@h>}86EN~0pT*l6xP3gTO9~DlSS%v!$@l^t9N#WY`M42+escKHqfAHEI<%$pm z@f-8tu4lQrup*F0zAghb+&vqEKHBBu2hr31PvXAws82<9Q8`B|`wgGMKp(*K7)jx| zG540@NNZHS2Vr$=&Cn>1m=K(Hg>^6JqT-x69jKX{lO|-!i)v6*7Ey=XLv}QvOT}$G>Q~~lz5uoV9e5eU-<{y z^pAJ<5wTagi`ssfGY$6<9&^LC79Nl@i!N&*D3WbykTDnDVBptdwQsc-xre(FwE6?_ z#1bpiJZa|y&Qbo}e`9Q;DCI>pU9+_!YE1BbkgJBU))h0wqQ`71GamLSxw2u(HVH9X zW`mj^CD%o>W@9#`*G`&}uF{R{MB})~3z=hckM?MfZbxslcCBi@vO|S4YyD=~tGVLs zUQs!ey|q5NokGV}A=|BX!P!-L=2nF+j+ zq;1VKk_569I?1&ia`cDxvTqBLw~GsW<>jY&V^ zk6-m+|G=OBMGaA1S`49y1+pUM`4K9M6gIfAei-U=nU_!J_*c!ugNwt6w{Ncl|3+Gv>i~DX<%zwnxPt z;(ItPRVrU}#PRpNg)QaBabg@ltMy0Y>_m&A@5?Z71rt6V43nONb*F>XupnP6;*&&l zO)O!I+K38_4+(atS4Tnitj(&5{hR8sSgpsGSwV-s%hfSLUt5QR5x=7v*&HjuxkBCN zko?cLXScm#h^I5sdO!aN+v&0No4lWZ^xyt&%v@ji(f&MqF8L~z&*-5o(OKk*g)*jp z<@x69>b!YDRAw{c+*3EQRTX|^WpS+|NT1}^vdH{ub?4GQo;e*@L$(PB00000NkvXX Hu0mjfwlsLF delta 1118 zcmV-k1fl!O3YiHZiBL{Q4GJ0x0000DNk~Le0003z0000X1Oos70l^bGQjsA{e-}wa zK~z|U?N~jG8$}fU*86*2`^E(#hlDsI1f+>%MWl!*Vp4$=l)Zu`Ldqgh(8Y8VNZE)2 zr#Q|P^)_jsh>H|R6t;-Cf+hxpglOUlQSRmLP~eD$H#58T?t0hB-W?*5?n$0`^X9$p zy_xZQ<1tWyBGs|DwYtDX;{3S?e;>I~6~sdNP<5IIc%r>5JdZHOK*eyIT5JE}k^KO( zB~_bp>BAl}2RxGeR(jH;C>w*ever@)f;Z{&=AncA52-$9lXW-haEEiTQ>XJ~>Zlzt z^&Bt`#n9+ypstIC3ly&S1K{mIB5yHqh<9nzIyC%ykIt0IdC4=vcOIG&f8^99YPxtM zP?)Vt^B5%$*sbGP`O`&*%bo%&2De*|_Zil6+8H@n1qf##g8Dg&EV*`)jshu!lb7ob z<#GnZ->`O#7TyvD1}19_=j=PK3C8KG<1jCBVYFhrC|O2MEyf z>3|i7YK@JMy3sJf`#2*fD=QNDt3X^)u;&?HKu(}#9YuM0CldKK2eBE3D5q`^D28(` zx~ZxYDY>W-DG*U9n!{DnG-$MH;Jed}=F7ex+gWC7gw+|m<|i8wf6E;z9jXlJ0}09N zY>%d?;s}R&!vQn2_EgXk=F>7XTl z;3UN0)nYb8v0PC*VW^63sG2u{^U7pA)>;|6$wtxECi1vmHLU9QYQ0G4RfajC6M6>e z+rz(j+|jZ6Duz44e}gQ4VHGBNkuNhWhqL9|;41HMSok1J`sLa}-J{t|uVOTX5I87)SV z!1ca?7W`f%su|gCfVEXWCym3qB|;oNR^Mkm;H%O|bnjD~e{&`kg5sRVm-;z-tb3dJ z`Dorewbq&t{=|J0j6u`N^-TS29?{_n{ zJ^jH4+CFBs3*r5M$LQ;Ud{MJZBy0D6xsjNBj%J9j>0SN81UH4?CX8eSeblDfHgE2e|q>$o*$EDx$r`AxQpfb+Lt8ViIY5u`I8-N!v_#S4xx@^oia}4}LDR>9$t_ypELt-j5|TrWLed{b3t0S2FgBZU6uP07*qoM6N<$f-|inyZ`_I diff --git a/docs/html/img71.png b/docs/html/img71.png index 1fd3d052c4f6c60a1555134c04e223416718e204..b7d510e1613cb98f20f29e8d6aefc61f425e9ccb 100644 GIT binary patch delta 1330 zcmV-215(B!f9Xj? zK~z|U?O0ui8&woOlljSHCfONq(IUlWL6-P~21F2inFlM|KA3r4lm@D>Xu&+z;=^=> zn(ekUD898YwTNO7ujtm|`jbb&mpmx;G1Zs$Ep%I~MTqCzJCn>zl1VZw?Ft^q+@EvK zcfNb>-%J3eiPi7-2aTcyCKg@Ze_;6Y0OX%w7KKsaLj1iRC&xYK6|{Q2;02&9U11JhE<6h0+R7ybl_tV zmAQ%Sium!SJ8IDv0@DmtHmsLv0a&4-jVTf$auG4)8FbJX3}6f2LM6mDe=#5Kk2d5x zh)jr{4A2pX9wI7;rC%sg+R*%pYZUwf>dq5le)3=

b)ZjCya1k;SQ(qElx9NE(f# zYlEgcY0E6WN5Yf`g+|V9Z27YgkS`dA9!i}a&8A1}%>vNWLT?knHMB330>DF9{M<1x zs0nqN&7vhVmWXG;F`WbIf6&x_2moC=LO9@#<_H2kcmMheRrJU)kaK0|xnNqle9idS z$vda?7M(2!=pf)ycn%Hs^=J+m@oYvGnY~qYhuV+8Qp*h50`43T}kE6xiRf4<8&zixKUQ>B0>)B^IOH>1MJ_wROlZLPH?hHT>*En|f{T?4-~ zOviDNL$GPMbX9-$HNYm82986+n0E;K#6#%6c3f@0CIFwlN}2KquuC{vEWb(dl=mVn zvC~X`lvb)uNX~LS<~!3mOiJKHN8j=UT_(V}b(x&9oYc!+XkH2`#)vBJ zlOXteP(D*0Al(9GgkOx{BbT=uSm<3)?!xf%BbDG=@JdkgXCg>7{-ts|BoK1a;lh~j zdf~ROiQgCSf0Qk{@;!KcJGdLq1fcJ{Klt+GEDnDl8d?X=iJHMxQuEYcVEZ=Si27J= zlHkVCjcSx&k-ap+PaHBknvc(R(5Kwa+h?N}E`yq_d&3r&C%)n1Z%B8+AQYdg&OkAo z5}ir6Vm?RQ2*D?>gZ6McZ-Ws6p9Hu`bvs5-<#(d;e?=rR4z2;`79&Os{(=K`@F&Z_ ztg?lV8W`BNe6fX@vIDOIHx}slZ2K<4q zrDJw+Y?DZ1`K)jVKFPDQ z^S{3N=eY)^A`C+=o(uFdEm5lue}!%rsG!#K1K+{rFYBfn?pz)%KW67PICGaGFof1!pVmXv3m&swI-F_f^+8NM;qosO`{-H1!7vde{p?R*9$ zjw9PQvFA?W-;G)Fk)N(I4PgRPwzRDvNWN~*As=DHk_wT`^ns6L?Im?gCee+yjHJVo zSQz$bhoaTU1{64Zo%9%-ghP4cbZVm+@LbY_m@`miMEO1ma%QqAuU7b{yIU zW4Ddxd=z|o`q3Qce}|4QQ71TZqV)u-PL1RuIDO2$7 zZ8zL5o|@5a=SSlJlh5s0uV-^4I5|ht2wLHHu*2Un?OB2>f84afzt9fdk*-sLiofWK zZQdKpfo1K=ky*W`TlQZ7p2^yd$lwL}%{M$RCJ{6*4nFygj8D>#Ak#4ix&s2O^4j3z zVTgP{#d0VFg|BO>tqrr`8ckX6DPPvF#}Wij+=_G`R^PtR=UGeX~ie1 zB`m0{-wkT(h3*`eZ^Zd-W_>~!pJ6b9xLx+#LdT`d{w z6X3l1U`r|9!1095?}M61&Exd@T)adCjn`-lui>F9e`$tXe9hHG7M|3zy;4P~6V=ra zmmWF$;yEy|gCnG{Qf|z-D*C1d+Zme1Ff@1FUuL4oR{n zXa0qp7T;ESGmz#8SWj|mm2b{$lHf6M{!VNjhS*_ZM~{mxj5l)M^>~iO=`xL8Jt1Jv z`c96si~Joap0v2(nv%CdN`Dic&=&%I=b# q`Y}0M^qD=yWSE~!Zr0N|qwxs&6!V0000?A~t3y~?WfAz_58c363Q(6>+s|Q??Xwd(`A+F>2zf{PG`Uq)u zp&dbgs*|fK0wa;R(OyRhx)btuq@EF5VR;Xn)4|W^g{!WEQaorB*ciqCE!b0Sr5L9X z&S^#7gFiT*uSz>Yj^<0d+FMYfg}|n#L&A-WI7V$v4dw=3{|3keS?Y%Lf1ShkE2!%g zDW-9u)Ay9Tzq7bjd79e^yUQia|6>fCMai6zh{dN4UN@_zqriDt~pyuS-v8Q=waE$^l5l9 zX{*AcYRU(OKZ~L)X9-wfPqFkxlZG}u1{8lv>bBv|FnVzd<=m^h#LI8GrgdwbSRt~H vu=dNwuJ-7hyI7BB{ysWC3!g0~b4KGG0*7%(>h3ml00000NkvXXu0mjf-&x}- diff --git a/docs/html/node11.html b/docs/html/node11.html index 1a083452..005983e5 100644 --- a/docs/html/node11.html +++ b/docs/html/node11.html @@ -276,7 +276,7 @@ three steps: ALT="$i=1,\ldots,m$">;

  • prolongation and sum of the $w_i$'s, i.e. \begin{displaymath}
 M_{2LH-POST}^{-1} = M_{1L}^{-1} + \left( I - M_{1L}^{-1}A \right) M_{C}^{-1}.
diff --git a/docs/html/node12.html b/docs/html/node12.html
index a7ca5236..42cf3ddc 100644
--- a/docs/html/node12.html
+++ b/docs/html/node12.html
@@ -142,7 +142,7 @@ N_r = \left\{s \in W: |a_{rs}| > \theta \sqrt{|a_{rr}a_{ss}|} \right\}
  -->
 
 <IMG
- WIDTH= \begin{displaymath}
 S = I - \omega D^{-1} A ,
diff --git a/docs/html/node3.html b/docs/html/node3.html
index 65043f34..2c2e40ac 100644
--- a/docs/html/node3.html
+++ b/docs/html/node3.html
@@ -75,7 +75,7 @@ Ax=b,
  -->
 <TABLE WIDTH= \begin{displaymath}
 Ax=b,
diff --git a/docs/src/Makefile b/docs/src/Makefile
new file mode 100644
index 00000000..93b19db5
--- /dev/null
+++ b/docs/src/Makefile
@@ -0,0 +1,386 @@
+## $Id: Makefile 1524 2007-01-17 17:06:06Z sfilippo $
+##---------------------------------------------------------------------------
+## LaTeX Makefile
+## Copyright (C) 1996-2001  Michael Forman	Michael.Forman@Colorado.EDU
+## 
+## This program is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License
+## as published by the Free Software Foundation; either version 2
+## of the License, or (at your option) any later version.
+## 
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+##
+## This copyright applies to this Makefile, and all perl scripts.  
+## The GPL does NOT apply to the actual content of the paper or thesis.  
+##---------------------------------------------------------------------------
+## 01.Dec,1996	forman	 Initial Makefile 
+## 01.Jun,1997	forman	 Added support for print, preview, and bibtex
+## 01.Jan,1998	stones	 tmp and lib directories to reduce clutter
+## 05.Feb,1998	forman	 Added the following functions:
+##			 .PHONY to prevent errors
+##			 generic TEXFILE definition with following patsubst's
+##			 vpath definitions
+##			 gzip, gunzip, tar, ci
+##			 search for \bibliography in tex file
+##			 conditional ifeq omits bibtex if unnecessary
+##			 documentation added
+## 30.Nov,1998	forman	 Added TOPFILE and SECFILE definitions to support
+##        		 texfiles with &1) | $(FILTER) +endef + +define latex-filter + @echo + @echo "----- latex -------------------------------------------------------" + @echo -n "Starting: "; date + @echo + cd tmp; ($(LATEX) $(HTMLFILE) 2>&1) | $(FILTER) +endef + +define ltx2html-filter + @echo + @echo "----- latex -------------------------------------------------------" + @echo -n "Starting: "; date + @echo + cd tmp; ($(LTX2HTML) $(HTMLFLAGS) -dir ../$(HTMLDIR) $(HTMLFILE) 2>&1) | $(FILTER) +endef + +#---------------------------------------------------------------------------- + +define bibtex + @echo + @echo "----- bibtex ---------------------------------------------------------" + @echo -n "Starting: "; date + @echo + cd tmp; $(BIBTEX) $(BASEFILE) +endef + +#---------------------------------------------------------------------------- + +define glosstex + @echo + @echo "----- glosstex -------------------------------------------------------" + @echo -n "Starting: "; date + @echo + cd tmp; $(GLOSSTEX) $(BASEFILE) $(GLOFILES) + cd tmp; $(MAKEIDX) $(GXS) -o $(GLX) -s glosstex.ist +endef + +#---------------------------------------------------------------------------- + +define makeindex + @echo + @echo "----- makeindex ------------------------------------------------------" + @echo -n "Starting: "; date + @echo + cd tmp; mv $(IDX) $(IDX)-; $(CLEANIDX) < $(IDX)- > $(IDX) + cd tmp; $(MAKEIDX) $(IDX) +endef + +#---------------------------------------------------------------------------- + +define finish + @ln -sf tmp/$@ . + @echo + @echo "----- finish ---------------------------------------------------------" + @echo -n "Start: "$(START); echo + @echo -n "Finish: "; date + @echo -n "Output: "; ls -l -o tmp/$@ + @echo -n "Target: " +endef + +define ltx2html-finish + @ln -sf tmp/$@ . + @echo + @echo "----- finish ---------------------------------------------------------" + @echo -n "Start: "$(START); echo + @echo -n "Finish: "; date + @echo -n "Output: "; ls -l -o tmp/$@ + @echo -n "Target: " +endef + + diff --git a/docs/src/abstract.tex b/docs/src/abstract.tex new file mode 100644 index 00000000..45856568 --- /dev/null +++ b/docs/src/abstract.tex @@ -0,0 +1,27 @@ +\section*{Abstract} +\addcontentsline{toc}{section}{Abstract} +\textsc{MLD2P4 (Multi-Level Domain Decomposition Parallel Preconditioners Package based on +PSBLAS}) is a package of parallel algebraic multi-level preconditioners. +It implements various versions of one-level additive and of multi-level additive +and hybrid Schwarz algorithms. In the multi-level case, a purely algebraic approach +is applied to generate coarse-level corrections, so that no geometric background is needed +concerning the matrix to be preconditioned. The matrix is required to be square, real +or complex, with a symmetric sparsity pattern. + +MLD2P4 has been designed to provide scalable and easy-to-use preconditioners in the +context of the PSBLAS (Parallel Sparse Basic Linear Algebra Subprograms) +computational framework and can be used in conjuction with the Krylov solvers +available in this framework. MLD2P4 enables the user to easily specify different aspects +of a generic algebraic multilevel Schwarz preconditioner, thus allowing to search +for the ``best'' preconditioner for the problem at hand. + +The package has been designed employing object-oriented techniques, +using Fortran 95, with interfaces to additional third party libraries +such as UMFPACK, SuperLU and SuperLU\_Dist, that +can be exploited in building multi-level preconditioners. The parallel +implementation is based on a Single Program Multiple Data (SPMD) +paradigm for distributed-memory architectures; the inter-process data +communication is based on MPI and is managed mainly through PSBLAS. + +This guide provides a brief description of the functionalities and +the user interface of MLD2P4. diff --git a/docs/src/background.tex b/docs/src/background.tex new file mode 100644 index 00000000..52e7674b --- /dev/null +++ b/docs/src/background.tex @@ -0,0 +1,348 @@ +\section{Multi-level Domain Decomposition Background\label{sec:background}} +\markboth{\textsc{MLD2P4 User's and Reference Guide}} + {\textsc{\ref{sec:background} Multi-level Domain Decomposition Background}} + +\emph{Domain Decomposition} (DD) preconditioners, coupled with Krylov iterative +solvers, are widely used in the parallel solution of large and sparse linear systems. +These preconditioners are based on the divide and conquer technique: the matrix +to be preconditioned is divided into submatrices, a ``local'' linear system +involving each submatrix is (approximately) solved, and the local solutions are used +to build a preconditioner for the whole original matrix. This process +often corresponds to dividing a physical domain associated to the original matrix +into subdomains, e.g. in a PDE discretization, to (approximately) solving the +subproblems corresponding to the subdomains and to building an approximate +solution of the original problem from the local solutions +\cite{Cai_Widlund_92,dd1_94,dd2_96}. + +\emph{Additive Schwarz} preconditioners are DD preconditioners using overlapping +submatrices, i.e.\ with some common rows, to couple the local information +related to the submatrices (see, e.g., \cite{dd2_96}). +The main motivation for choosing Additive Schwarz preconditioners is their +intrinsic parallelism. A drawback of these +preconditioners is that the number of iterations of the preconditioned solvers +generally grows with the number of submatrices. This may be a serious limitation +on parallel computers, since the number of submatrices usually matches the number +of available processors. Optimal convergence rates, i.e.\ iteration numbers +independent of the number of submatrices, can be obtained by correcting the +preconditioner through a suitable approximation of the original linear system +in a coarse space, which globally couples the information related to the single +submatrices. + +\emph{Two-level Schwarz} preconditioners are obtained +by combining basic (one-level) Sch\-warz preconditioners with a coarse-level +correction. In this context, the one-level preconditioner is often +called `smoother'. Different two-level preconditioners are obtained by varying the +choice of the smoother and of the coarse-level correction, and the +way they are combined \cite{dd2_96}. The same reasoning can be applied starting +from the coarse-level system, i.e.\ a coarse-space correction can be built +from this system, thus obtaining \emph{multi-level} preconditioners. + +It is worth noting that optimal preconditioners do not necessarily correspond +to minimum execution times. Indeed, to obtain effective multi-level preconditioners +a tradeoff between optimality of convergence and the cost of building and applying +the coarse-space corrections must be achieved. The choice of the number of levels, +i.e.\ of the coarse-space corrections, also affects the effectiveness of the +preconditioners. One more goal is to get convergence rates as less sensitive +as possible to variations in the matrix coefficients. + +Two main approaches can be used to build coarse-space corrections. The geometric approach +applies coarsening strategies based on the knowledge of some physical grid associated +to the matrix and requires the user to define grid transfer operators from the fine +to the coarse levels and vice versa. This may result difficult for complex geometries; +furthermore, suitable one-level preconditioners may be required to get efficient +interplay between fine and coarse levels, e.g.\ when matrices with highly varying coefficients +are considered. The algebraic approach builds coarse-space corrections using only matrix +information. It performs a fully automatic coarsening and enforces the interplay between +the fine and coarse levels by suitably choosing the coarse space and the coarse-to-fine +interpolation \cite{StubenGMD69_99}. + +MLD2P4 uses a pure algebraic approach for building the sequence of coarse matrices +starting from the original matrix. The algebraic approach is based on the \emph{smoothed +aggregation} algorithm \cite{BREZINA_VANEK,VANEK_MANDEL_BREZINA}. A decoupled version +of this algorithm is implemented, where the smoothed aggregation is applied locally +to each submatrix \cite{TUMINARO_TONG}. In the next two subsections we provide +a brief description of the multi-level Schwarz preconditioners and of the smoothed +aggregation technique as implemented in MLD2P4. For further details the user +is referred to \cite{para_04,aaecc_07,apnum_07,dd2_96}. + + +\subsection{Multi-level Schwarz Preconditioners\label{sec:multilevel}} + +The Multilevel preconditioners implemented in MLD2P4 are obtained by combining +AS preconditioners with coarse-space corrections; therefore +we first provide a sketch of the AS preconditioners. + +Given the linear system \Ref{system1}, +where $A=(a_{ij}) \in \Re^{n \times n}$ is a +nonsingular sparse matrix with a symmetric nonzero pattern, +let $G=(W,E)$ be the adjacency graph of $A$, where $W=\{1, 2, \ldots, n\}$ +and $E=\{(i,j) : a_{ij} \neq 0\}$ are the vertex set and the edge set of $G$, +respectively. Two vertices are called adjacent if there is an edge connecting +them. For any integer $\delta > 0$, a $\delta$-overlap +partition of $W$ can be defined recursively as follows. +Given a 0-overlap (or non-overlapping) partition of $W$, +i.e.\ a set of $m$ disjoint nonempty sets $W_i^0 \subset W$ such that +$\cup_{i=1}^m W_i^0 = W$, a $\delta$-overlap +partition of $W$ is obtained by considering the sets +$W_i^\delta \supset W_i^{\delta-1}$ obtained by including the vertices that +are adjacent to any vertex in $W_i^{\delta-1}$. + +Let $n_i^\delta$ be the size of $W_i^\delta$ and $R_i^{\delta} \in +\Re^{n_i^\delta \times n}$ the restriction operator that maps +a vector $v \in \Re^n$ onto the vector $v_i^{\delta} \in \Re^{n_i^\delta}$ +containing the components of $v$ corresponding to the vertices in +$W_i^\delta$. The transpose of $R_i^{\delta}$ is a +prolongation operator from $\Re^{n_i^\delta}$ to $\Re^n$. +The matrix $A_i^\delta=R_i^\delta A (R_i^\delta)^T \in +\Re^{n_i^\delta \times n_i^\delta}$ can be considered +as a restriction of $A$ corresponding to the set $W_i^{\delta}$. + +The \emph{classical one-level AS} preconditioner is defined by +\[ +M_{AS}^{-1}= \sum_{i=1}^m (R_i^{\delta})^T +(A_i^\delta)^{-1} R_i^{\delta}, +\] +where $A_i^\delta$ is assumed to be nonsingular. Its application +to a vector $v \in \Re^n$ within a Krylov solver requires the following +three steps: +\begin{enumerate} + \item restriction of $v$ as $v_i = R_i^{\delta} v$, $i=1,\ldots,m$; + \item solution of the linear systems $A_i^\delta w_i = v_i$, + $i=1,\ldots,m$; + \item prolongation and sum of the $w_i$'s, i.e. $w = \sum_{i=1}^m (R_i^{\delta})^T w_i$. +\end{enumerate} +Note that the linear systems at step 2 are usually solved approximately, +e.g.\ using incomplete LU factorizations such as ILU($p$), MILU($p$) and +ILU($p,t$) \cite[Chapter 10]{Saad_book}. + +A variant of the classical AS preconditioner that outperforms it +in terms of convergence rate and of computation and communication +time on parallel distributed-memory computers is the so-called \emph{Restricted AS +(RAS)} preconditioner~\cite{CAI_SARKIS,EFSTATHIOU}. It +is obtained by zeroing the components of $w_i$ corresponding to the +overlapping vertices when applying the prolongation. Therefore, +RAS differs from classical AS by the prolongation operators, +which are substituted by $(\tilde{R}_i^0)^T \in \Re^{n_i^\delta \times n}$, +where $\tilde{R}_i^0$ is obtained by zeroing the rows of $R_i^\delta$ +corresponding to the vertices in $W_i^\delta \backslash W_i^0$: +\[ +M_{RAS}^{-1}= \sum_{i=1}^m (\tilde{R}_i^0)^T +(A_i^\delta)^{-1} R_i^{\delta}. +\] +Analogously, the AS variant called \emph{AS with Harmonic extension (ASH)} +is defined by +\[ M_{ASH}^{-1}= \sum_{i=1}^m (R_i^{\delta})^T +(A_i^\delta)^{-1} \tilde{R}_i^0. +\] +We note that for $\delta=0$ the three variants of the AS preconditioner are +all equal to the block-Jacobi preconditioner. + +As already observed, the convergence rate of the one-level Schwarz +preconditioned iterative solvers deteriorates as the number $m$ of partitions +of $W$ increases \cite{dd1_94,dd2_96}. To reduce the dependency +of the number of iterations on the degree of parallelism we may +introduce a global coupling among the overlapping partitions by defining +a coarse-space approximation $A_C$ of the matrix $A$. +In a pure algebraic setting, $A_C$ is usually built with +a Galerkin approach. Given a set $W_C$ of \emph{coarse vertices}, +with size $n_C$, and a suitable restriction operator +$R_C \in \Re^{n_C \times n}$, $A_C$ is defined as +\[ +A_C=R_C A R_C^T +\] +and the coarse-level correction matrix to be combined with a generic +one-level AS preconditioner $M_{1L}$ is obtained as +\[ +M_{C}^{-1}= R_C^T A_C^{-1} R_C, +\] +where $A_C$ is assumed to be nonsingular. The application of $M_{C}^{-1}$ +to a vector $v$ corresponds to a restriction, a solution and +a prolongation step; the solution step, involving the matrix $A_C$, +may be carried out also approximately. + +The combination of $M_{C}$ and $M_{1L}$ may be +performed in either an additive or a multiplicative framework. +In the former case, the \emph{two-level additive} Schwarz preconditioner +is obtained: +\[ +M_{2LA}^{-1} = M_{C}^{-1} + M_{1L}^{-1}. +\] +Applying $M_{2L-A}^{-1}$ to a vector $v$ within a Krylov solver +corresponds to applying $M_{C}^{-1}$ +and $M_{1L}^{-1}$ to $v$ independently and then summing up +the results. + +In the multiplicative case, the combination can be +performed by first applying the smoother $M_{1L}^{-1}$ and then +the coarse-level correction operator $M_{C}^{-1}$: +\[ +\begin{array}{l} +w = M_{1L}^{-1} v, \\ +z = w + M_{C}^{-1} (v-Aw); +\end{array} +\] +this corresponds to the following \emph{two-level hybrid pre-smoothed} +Schwarz preconditioner: +\[ +M_{2LH-PRE}^{-1} = M_{C}^{-1} + \left( I - M_{C}^{-1}A \right) M_{1L}^{-1}. +\] +On the other hand, by applying the smoother after the coarse-level correction, +i.e.\ by computing +\[ +\begin{array}{l} +w = M_{C}^{-1} v , \\ +z = w + M_{1L}^{-1} (v-Aw) , +\end{array} +\] +the \emph{two-level hybrid post-smoothed} +Schwarz preconditioner is obtained: +\[ +M_{2LH-POST}^{-1} = M_{1L}^{-1} + \left( I - M_{1L}^{-1}A \right) M_{C}^{-1}. +\] +One more variant of two-level hybrid preconditioner is obtained by applying +the smoother before and after the coarse-level correction. In this case, the +preconditioner is symmetric if $A$, $M_{1L}$ and $M_{C}$ are symmetric. + +As previously noted, on parallel computers the number of submatrices usually matches +the number of available processors. When the size of the system to be preconditioned +is very large, the use of many processors, i.e.\ of many small submatrices, often +leads to a large coarse-level system, whose solution may be computationally expensive. +On the other hand, the use of few processors often leads to local sumatrices that +are too expensive to be processed on single processors, because of memory and/or +computing requirements. Therefore, it seems natural to use a recursive approach, +in which the coarse-level correction is re-applied starting from the current +coarse-level system. The corresponding preconditioners, called \emph{multi-level} +preconditioners, can significantly reduce the computational cost of preconditioning +with respect to the two-level case (see \cite[Chapter 3]{dd2_96}). +Additive and hybrid multilevel preconditioners +are obtained as direct extensions of the two-level counterparts. +For a detailed descrition of them, the reader is +referred to \cite[Chapter 3]{dd2_96}. +The algorithm for the application of a multi-level hybrid +post-smoothed preconditioner $M$ to a vector $v$, i.e.\ for the +computation of $w=M^{-1}v$, is reported, for +example, in Figure~\ref{fig:mlhpost_alg}. Here the number of levels +is denoted by $nlev$ and the levels are numbered in increasing order starting +from the finest one, i.e.\ the finest level is level 1; the coarse matrix +and the corresponding basic preconditioner at each level $l$ are denoted by $A_l$ and +$M_l$, respectively, with $A_1=A$. +% +\begin{figure}[t] +\begin{center} +\framebox{ +\begin{minipage}{.85\textwidth} {\small +\begin{tabbing} +\quad \=\quad \=\quad \=\quad \\[-1mm] +% +%! assign the finest matrix\\ +%$A_1 \leftarrow A$;\\[1mm] +%! define the number of levels $nlev$ \\[1mm] +%! define $nlev-1$ prolongators\\ +%$R_l^T, l=2, \ldots, nlev$;\\[1mm] +%! define $nlev-1$ coarser matrices\\ +%$A_l \leftarrow R_lA_{l-1}R_l^T, \; l=2, \ldots, nlev$;\\[1mm] +%! define the $nlev-1$ basic Schwarz preconditioners\\ +%$M_l$, basic preconditioner for $A_l \; l=1, \ldots, nlev-1$;\\[1mm] +%$! assign a vector $v$\\ +% +$v_1 = v$; \\[2mm] +\textbf{for $l=2, nlev$ do}\\[1mm] +\> ! transfer $v_{l-1}$ to the next coarser level\\ +\> $v_l = R_lv_{l-1}$ \\[1mm] +\textbf{endfor} \\[2mm] +! apply the coarsest-level correction\\[1mm] +$y_{nlev} = A_{nlev}^{-1} v_{nlev}$\\[2mm] +\textbf{for $l=nlev -1 , 1, -1$ do}\\[1mm] +\> ! transfer $y_{l+1}$ to the next finer level\\ +\> $y_l = R_{l+1}^T y_{l+1}$;\\[1mm] +\> ! compute the residual at the current level\\ +\> $r_l = v_l-A_l^{-1} y_l$;\\[1mm] +\> ! apply the basic Schwarz preconditioner to the residual\\ +\> $r_l = M_l^{-1} r_l$\\[1mm] +\> ! update $y_l$\\ +\> $y_l = y_l+r_l$\\ +\textbf{endfor} \\[1mm] +$w = y_1$; +\end{tabbing} +} +\end{minipage} +} +\caption{Application of the multi-level hybrid post-smoothed preconditioner.\label{fig:mlhpost_alg}} +\end{center} +\end{figure} +% + + +\subsection{Smoothed Aggregation\label{sec:aggregation}} + +In order to define the restriction operator $R_C$, which is used to compute +the coarse-level matrix $A_C$, MLD2P4 uses the \emph{smoothed aggregation} +algorithm described in \cite{BREZINA_VANEK,VANEK_MANDEL_BREZINA}. +The basic idea of this algorithm is to build a coarse set of vertices +$W_C$ by suitably grouping the vertices of $W$ into disjoint subsets +(aggregates), and to define the coarse-to-fine space transfer operator $R_C^T$ by +applying a suitable smoother to a simple piecewise constant +prolongation operator, to improve the quality of the coarse-space correction. + +Three main steps can be identified in the smoothed aggregation procedure: +\begin{enumerate} + \item coarsening of the vertex set $W$, to obtain $W_C$; + \item construction of the prolongator $R_C^T$; + \item application of $R_C$ and $R_C^T$ to build $A_C$. +\end{enumerate} +%\textbf{NOTA: Controllare cosa fa trilinos dopo il primo passo.} + +To perform the coarsening step, we have implemented the aggregation algorithm sketched +in \cite{apnum_07}. According to \cite{VANEK_MANDEL_BREZINA}, a modification of +this algorithm has been actually considered, +in which each aggregate $N_r$ is made of vertices of $W$ that are \emph{strongly coupled} +to a certain root vertex $r \in W$, i.e.\ +\[ N_r = \left\{s \in W: |a_{rs}| > \theta \sqrt{|a_{rr}a_{ss}|} \right\} + \cup \left\{ r \right\} , +\] +for a given $\theta \in [0,1]$. +Since this algorithm has a sequential nature, a \emph{decoupled} version of +it has been chosen, where each processor $i$ independently applies the algorithm to +the set of vertices $W_i^0$ assigned to it in the initial data distribution. This +version is embarrassingly parallel, since it does not require any data communication. +On the other hand, it may produce non-uniform aggregates near boundary vertices, +i.e.\ near vertices adjacent to vertices in other processors, and is strongly +dependent on the number of processors and on the initial partitioning of the matrix $A$. +Nevertheless, this algorithm has been chosen for the implementation in MLD2P4, +since it has been shown to produce good results in practice +\cite{aaecc_07,apnum_07,TUMINARO_TONG}. + +The prolongator $P_C=R_C^T$ is built starting from a \emph{tentative prolongator} +$P \in \Re^{n \times n_C}$, defined as +\begin{equation} +P=(p_{ij}), \quad p_{ij}= +\left\{ \begin{array}{ll} +1 & \quad \mbox{if} \; i \in V^j_C \\ +0 & \quad \mbox{otherwise} +\end{array} \right. . +\label{eq:tent_prol} +\end{equation} +$P_C$ is obtained by +applying to $P$ a smoother $S \in \Re^{n \times n}$: +\begin{equation} +P_C = S P, +\label{eq:smoothed_prol} +\end{equation} +in order to remove oscillatory components from the range of the prolongator +and hence to improve the convergence properties of the multi-level +Schwarz method \cite{BREZINA_VANEK,StubenGMD69_99}. +A simple choice for $S$ is the damped Jacobi smoother: +\begin{equation} +S = I - \omega D^{-1} A , +\label{eq:jac_smoother} +\end{equation} +where the value of $\omega$ can be chosen +using some estimate of the spectral radius of $D^{-1}A$ \cite{BREZINA_VANEK}. +% +%\textbf{NOTA: filtering di $A$ nello smoothing, da implementare?} +% + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "userguide" +%%% End: diff --git a/docs/src/bibliography.tex b/docs/src/bibliography.tex new file mode 100644 index 00000000..b68ae5e9 --- /dev/null +++ b/docs/src/bibliography.tex @@ -0,0 +1,207 @@ +%\section{Bibliography\label{sec:bib}} +\begin{thebibliography}{99} +\addcontentsline{toc}{section}{\refname} +\markboth{\textsc{MLD2P4 User's and Reference Guide}} + {\textsc{References}} + +%\let\refname\relax + +% +%\bibitem{PARA04FOREST} +%G.~Bella, S.~Filippone, A.~De Maio, A., Testa, M.: +%A Simulation Model for Forest Fires. +%In: Dongarra, J., Madsen, K., Wasniewski, J. (eds.): +%Proceedings of PARA~04 Workshop on State of the Art +%in Scientific Computing. Lecture Notes in Computer Science, 3732. Berlin: +%Springer, 2005 +% +\bibitem{BREZINA_VANEK} +M.~Brezina, P.~Van{\v e}k, +{\em A Black-Box Iterative Solver Based on a Two-Level Schwarz Method}, +Computing, 63, 1999, 233--263. +% +\bibitem{para_04} +A.~Buttari, P.~D'Ambra, D.~di Serafino, S.~Filippone, +{\em Extending PSBLAS to Build Parallel Schwarz Preconditioners}, +in , J.~Dongarra, K.~Madsen, J.~Wasniewski, editors, +Proceedings of PARA~04 Workshop on State of the Art +in Scientific Computing, Lecture Notes in Computer Science, +Springer, 2005, 593--602. +% +\bibitem{aaecc_07} A.~Buttari, P.~D'Ambra, D.~di~Serafino, S.~Filippone, +{\em 2LEV-D2P4: a package of high-performance preconditioners +for scientific and engineering applications}, +Applicable Algebra in Engineering, Communications and Computing, +18, 3, 2007, 223--239. +%Published online: 13 February 2007, {\tt http://dx.doi.org/10.1007/s00200-007-0035-z} +% +\bibitem{apnum_07} P.~D'Ambra, S.~Filippone, D.~di~Serafino, +{\em On the Development of PSBLAS-based Parallel Two-level Schwarz Preconditioners}, +Applied Numerical Mathematics, Elsevier Science, +57, 11-12, 2007, 1181-1196. +%published online 3 February 2007, {\tt +% http://dx.doi.org/10.1016/j.apnum.2007.01.006} + +%% \bibitem{DOUGLAS} +%% R.E.~Bank and C.C.~Douglas, +%% {\em SMMP: Sparse Matrix Multiplication Package}, +%% Advances in Computational Mathematics, 1993, 1, 127-137. +%% (See also {\tt http://www.mgnet.org/~douglas/ccd-codes.html}) +% +% +%% \bibitem{CAI_SAAD} +%% X.~C.~Cai and Y.~Saad, +%% {\em Overlapping Domain Decomposition Algorithms for General Sparse Matrices}, +%% Numerical Linear Algebra with Applications, 3(3), pp.~221--237, 1996. +% +\bibitem{CAI_SARKIS} +X.~C.~Cai, M.~Sarkis, +{\em A Restricted Additive Schwarz Preconditioner for General Sparse Linear Systems}, +SIAM Journal on Scientific Computing, 21, 2, 1999, 792--797. +% +\bibitem{Cai_Widlund_92} +X.~C.~Cai, O.~B.~Widlund, +{\em Domain Decomposition Algorithms for Indefinite Elliptic Problems}, +SIAM Journal on Scientific and Statistical Computing, 13, 1, 1992, 243--258. +% +\bibitem{dd1_94} +T.~Chan and T.~Mathew, +{\em Domain Decomposition Algorithms}, +in A.~Iserles, editor, Acta Numerica 1994, 61--143. +Cambridge University Press. +% +\bibitem{UMFPACK} +T.A.~Davis, +{\em Algorithm 832: UMFPACK - an Unsymmetric-pattern Multifrontal +Method with a Column Pre-ordering Strategy}, +ACM Transactions on Mathematical Software, 30, 2004, 196--199. +(See also {\tt http://www.cise.ufl.edu/~davis/}) +% +\bibitem{SUPERLU} +J.W.~Demmel, S.C.~Eisenstat, J.R.~Gilbert, X.S.~Li and J.W.H.~Liu, +A supernodal approach to sparse partial pivoting, +SIAM Journal on Matrix Analysis and Applications, 20, 3, 1999, 720--755. +% +\bibitem{blas3} +J.~J.~Dongarra, J.~Du Croz, I.~S.~Duff, S.~Hammarling, +\emph{A set of Level 3 Basic Linear Algebra Subprograms}, +ACM Transactions on Mathematical Software, 16, 1990, 1--17. +% +\bibitem{blas2} +J.~J.~Dongarra, J.~Du Croz, S.~Hammarling, R.~J.~Hanson, +\emph{An extended set of FORTRAN Basic Linear Algebra Subprograms}, +ACM Transactions on Mathematical Software, 14, 1988, 1--17. +% +\bibitem{BLACS} +J.~J.~Dongarra and R.~C.~Whaley, +{\em A User's Guide to the BLACS v.~1.1}, +Lapack Working Note 94, Tech.\ Rep.\ UT-CS-95-281, University of +Tennessee, March 1995 (updated May 1997). +% +%\bibitem{sblas_97} +%I.~Duff, M.~Marrone, G.~Radicati and C.~Vittoli, +%{\em Level 3 Basic Linear Algebra Subprograms for Sparse Matrices: +%a User Level Interface}, +%ACM Transactions on Mathematical Software, 23(3), pp.~379--401, 1997. +% +%\bibitem{sblas_02} +%I.~Duff, M.~Heroux and R.~Pozo, +%{\em An Overview of the Sparse Basic Linear +%Algebra Subprograms: the New Standard from the BLAS Technical Forum}, +%ACM Transactions on Mathematical Software, 28(2), pp.~239--267, 2002. +% +\bibitem{EFSTATHIOU} +E.~Efstathiou, J.~G.~Gander, +{\em Why Restricted Additive Schwarz Converges Faster than Additive Schwarz}, +BIT Numerical Mathematics, 43, 2003, 945--959. +% +\bibitem{PSBLASGUIDE} +S.~Filippone, A.~Buttari, +{\em PSBLAS-2.3 User's Guide. A Reference Guide for the Parallel Sparse BLAS Library}, 2008, +available from \texttt{http://www.ce.uniroma2.it/psblas/}. +% +\bibitem{psblas_00} +S.~Filippone, M.~Colajanni, +{\em PSBLAS: A Library for Parallel Linear Algebra +Computation on Sparse Matrices}, +ACM Transactions on Mathematical Software, 26, 4, 2000, 527--550. +% +\bibitem{MPI2} +W.~Gropp, S.~Huss-Lederman, A.~Lumsdaine, E.~Lusk, B.~Nitzberg, W.~Saphir, M.~Snir, +{\em MPI: The Complete Reference. Volume 2 - The MPI-2 Extensions}, +MIT Press, 1998. +% +\bibitem{blas1} +C.~L.~Lawson, R.~J.~Hanson, D.~Kincaid, F.~T.~Krogh, +\emph{Basic Linear Algebra Subprograms for FORTRAN usage}, +ACM Transactions on Mathematical Software, 5, 1979, 308--323. +% +\bibitem{SUPERLUDIST} +X.~S.~Li, J.~W.~Demmel, {\em SuperLU\_DIST: A Scalable Distributed-memory +Sparse Direct Solver for Unsymmetric Linear Systems}, +ACM Transactions on Mathematical Software, 29, 2, 2003, 110--140. +% +%\bibitem{KIVA3PSBLAS} +%S.~Filippone, P.~D'Ambra, M.~Colajanni, +%{\em Using a Parallel Library of Sparse Linear Algebra in a Fluid Dynamics +%Applications Code on Linux Clusters}, +%in G.~Joubert, A.~Murli, F.~Peters, M.~Vanneschi, editors, +%Parallel Computing - Advances \& Current Issues, +%pp.~441--448, Imperial College Press, 2002. +% +%\bibitem{METIS} +%Karypis, G. and Kumar, V., +%{\em {METIS}: Unstructured Graph Partitioning and Sparse Matrix +% Ordering System}. +%Minneapolis, MN 55455: University of Minnesota, Department of +% Computer Science, 1995. +%Internet Address: {\verb|http://www.cs.umn.edu/~karypis|}. +%\bibitem{BLAS1} +%Lawson, C., Hanson, R., Kincaid, D. and Krogh, F., +% Basic {L}inear {A}lgebra {S}ubprograms for {F}ortran usage, +%{ACM Trans. Math. Softw.} vol.~{5}, 38--329, 1979. +% +%\bibitem{machiels} +%{Machiels, L. and Deville, M.} +%{\em Fortran 90: An entry to object-oriented programming for the solution +% of partial differential equations.} +%{ACM Trans. Math. Softw.} vol.~{23}, 32--49. +%\bibitem{metcalf} +%{Metcalf, M., Reid, J. and Cohen, M.} +%{\em Fortran 95/2003 explained.} +%{Oxford University Press}, 2004. +% +\bibitem{Saad_book} +Y.~Saad, +\emph{Iterative methods for sparse linear systems}, 2nd edition, +SIAM, 2003 + +\bibitem{dd2_96} +B.~Smith, P.~Bjorstad, W.~Gropp, +{\em Domain Decomposition: Parallel Multilevel Methods for Elliptic +Partial Differential Equations}, +Cambridge University Press, 1996. +% +\bibitem{MPI1} +M.~Snir, S.~Otto, S.~Huss-Lederman, D.~Walker, J.~Dongarra, +{\em MPI: The Complete Reference. Volume 1 - The MPI Core}, second edition, +MIT Press, 1998. +%% +\bibitem{StubenGMD69_99} +K.~St\"{u}ben, +{\em Algebraic Multigrid (AMG): an Introduction with Applications}, +in A.~Sch\"{u}ller, U.~Trottenberg, C.~Oosterlee, editors, Multigrid, +Academic Press, 2000. +% +\bibitem{TUMINARO_TONG} +R.~S.~Tuminaro, C.~Tong, +{\em Parallel Smoothed Aggregation Multigrid: Aggregation Strategies on Massively Parallel Machines}, +in J. Donnelley, editor, Proceedings of SuperComputing 2000, Dallas, 2000. +% +\bibitem{VANEK_MANDEL_BREZINA} +P.~Van{\v e}k, J.~Mandel and M.~Brezina, +{\em Algebraic Multigrid by Smoothed Aggregation for Second and Fourth Order Elliptic Problems}, +Computing, 56, 1996, 179-196. +% + +\end{thebibliography} diff --git a/docs/src/building.tex b/docs/src/building.tex new file mode 100644 index 00000000..7c1d699e --- /dev/null +++ b/docs/src/building.tex @@ -0,0 +1,242 @@ +\section{Configuring and Building MLD2P4\label{sec:building}} +\markboth{\textsc{MLD2P4 User's and Reference Guide}} + {\textsc{\ref{sec:building} Configuring and Building MLD2P4}} +To build MLD2P4 it is necessary to set up a Makefile with appropriate +values for your system; this is done by means of the \verb|configure| +script. The distribution also includes the autoconf and automake +sources employed to generate the script, but usually this is not needed +to build the software. + +MLD2P4 is implemented almost entirely in Fortran~95, with some +interfaces to external libraries in C; the Fortran compiler +must support the Fortran~95 standard plus the extension TR15581, which +enhances the usability of \verb|ALLOCATABLE| variables. Most modern +Fortran compilers support this language level. In particular, this is +supported by the GNU Fortran compiler as of version 4.2.0; however we +recommend to use the latest available release (4.3.1 at the time of +this writing). +The software defines data types and interfaces for +real and complex data, in both single and double precision. + +\subsection{Prerequisites} + +The following base libraries are needed: +\begin{description} +\item[BLAS] \cite{blas3,blas2,blas1} Many vendors provide optimized versions + of the Basic Linear Algebra Subprograms; if no vendor version is + available for a given platform, the ATLAS software + (\verb!http://math-atlas.sourceforge.net/!) + may be employed. The reference BLAS from Netlib + (\verb|http://www.netlib.org/blas|) are meant to define the standard + behaviour of the BLAS interface, so they are not optimized for any + particular plaftorm, and should only be used as a last + resort. Note that BLAS computations form a relatively small part of + the MLD2P4/PSBLAS computations; they are however critical when using + preconditioners based on the UMFPACK or SuperLU third party + libraries. +\item[MPI] \cite{MPI2,MPI1} A version of MPI is available on most + high-performance computing systems; only version 1.1 is required. +\item[BLACS] \cite{BLACS} The Basic Linear Algebra Communication Subprograms + are available in source form from \verb|http://www.netlib.org/blacs|; + some vendors include them in their parallel computing + support libraries. + \item[PSBLAS] \cite{PSBLASGUIDE,psblas_00} Parallel Sparse BLAS is + available from \\ \verb|http://www.ce.uniroma2.it/psblas|; version 2.3 + (or later) is required. Indeed, all the prerequisites + listed so far are also prerequisites of PSBLAS. + To build the MLD2P4 library it is necessary to get access to + the source PSBLAS directory employed to build the version under use; after + the MLD2P4 build process completes, only the compiled form of the + PSBLAS library is necessary to build user applications. +\end{description} + +Please note that the four previous libraries must have Fortran +interfaces compatible with MLD2P4; +usually this means that they should all be built with the same +compiler as MLD2P4. + +\subsection{Optional third party libraries} + +We provide interfaces to the following third-party software libraries; +note that these are optional, but if you enable them some defaults +for multilevel preconditioners may change to reflect their presence. + +\begin{description} +\item[UMFPACK] \cite{UMFPACK} + A sparse direct factorization package available from \\ + \verb|http://www.cise.ufl.edu/research/sparse/umfpack/|; + provides serial factorization and triangular system solution for double + precision real and complex data. We have tested + versions 4.4 and 5.1. +\item[SuperLU] \cite{SUPERLU} + A sparse direct factorization package available from \\ + \verb|http://crd.lbl.gov/~xiaoye/SuperLU/|; provides serial + factorization and triangular system solution for single and double precision, + real and complex data. We have tested versions 3.0 and 3.1. +\item[SuperLU\_Dist] \cite{SUPERLUDIST} + A sparse direct factorization package available + from the same site as SuperLU; provides parallel factorization and + triangular system solution for double precision real and complex data. + We have tested version 2.1. +\end{description} + +\subsection{Configuration options} + +To build MLD2P4 the first step is to use the \verb|configure| script +in the main directory to generate the necessary makefile(s). + +As a minimal example consider the following: +\begin{verbatim} +./configure --with-psblas=/home/user/PSBLAS/psblas-2.3 +\end{verbatim} +which assumes that the various MPI compilers and support libraries are +available in the standard directories on the system, and specifies +only the PSBLAS build directory (note that the latter directory must +be specified with an {\em absolute} path). +The full set of options may be looked at by issuing the command +\verb|./configure --help|, which produces: +\begin{verbatim} +`configure' configures MLD2P4 1.0 to adapt to many kinds of systems. + +Usage: ./configure [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print `checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for `--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or `..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [/usr/local] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, `make install' will install all the files in +`/usr/local/bin', `/usr/local/lib' etc. You can specify +an installation prefix other than `/usr/local' using `--prefix', +for instance `--prefix=$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/mld2p4] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-psblas The source directory for PSBLAS, for example, + --with-psblas=/opt/packages/psblas-2.3 + --with-libs List additional link flags here. For example, + --with-libs=-lspecial_system_lib or + --with-libs=-L/path/to/libs + --with-clibs additional CLIBS flags to be added: will prepend + to CLIBS + --with-flibs additional FLIBS flags to be added: will prepend + to FLIBS + --with-library-path additional LIBRARYPATH flags to be added: will + prepend to LIBRARYPATH + --with-include-path additional INCLUDEPATH flags to be added: will + prepend to INCLUDEPATH + --with-module-path additional MODULE_PATH flags to be added: will + prepend to MODULE_PATH + --with-umfpack=LIBNAME Specify the library name for UMFPACK library. + Default: "-lumfpack -lamd" + --with-umfpackdir=DIR Specify the directory for UMFPACK library and + includes. + --with-superlu=LIBNAME Specify the library name for SUPERLU library. + Default: "-lslu" + --with-superludir=DIR Specify the directory for SUPERLU library and + includes. + --with-superludist=LIBNAME + Specify the libname for SUPERLUDIST library. + Requires you also specify SuperLU. Default: "-lslud" + --with-superludistdir=DIR + Specify the directory for SUPERLUDIST library and + includes. + +Some influential environment variables: + FC Fortran compiler command + FCFLAGS Fortran compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CC C compiler command + CFLAGS C compiler flags + CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + MPICC MPI C compiler command + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +\end{verbatim} +Thus, a sample build with libraries in installation +directories specifics to the GNU 4.3 compiler suite might be as +follows, specifying only the UMFPACK external package: +\begin{verbatim} + ./configure --with-psblas=/home/user/psblas-2.3/ \ + --with-libs="-L/usr/local/BLAS/gnu43 -L/usr/local/BLACS/gnu43" \ + --with-blacs=-lmpiblacs --with-umfpackdir=/usr/local/UMFPACK/gnu43 +\end{verbatim} +Once the configure script has completed execution, it will have +generated the file \verb|Make.inc| which will then be used by all +Makefiles in the directory tree. + +To build the library the user will now enter +\begin{verbatim} +make +\end{verbatim} +followed (optionally) by +\begin{verbatim} +make install +\end{verbatim} + +\subsection{Example and test programs\label{sec:ex_and_test}} +The package contains the \verb|examples| and \verb|tests| directories; +both of them are further divided into \verb|fileread| and +\verb|pargen| subdirectories. Their purpose is as follows: +\begin{description} +\item[\tt examples] contains a set of simple example programs with a + predefined choice of preconditioners, selectable via integer + values. These are intended to get an acquaintance with the + multilevel preconditioners. +\item[\tt tests] contains a set of more sophisticated examples that + will allow the user, via the input files in the \verb|runs| + subdirectories, to experiment with the full range of preconditioners + implemented in the library. +\end{description} +The \verb|fileread| directories contain sample programs that read +sparse matrices from files, according to the Matrix Market or the +Harwell-Boeing storage format; the \verb|pdegen| instead generate +matrices in full parallel mode from the discretization of a sample PDE. diff --git a/docs/src/distribution.tex b/docs/src/distribution.tex new file mode 100644 index 00000000..6541de15 --- /dev/null +++ b/docs/src/distribution.tex @@ -0,0 +1,19 @@ +\section{Code Distribution\label{sec:distribution}} +\markboth{\textsc{MLD2P4 User's and Reference Guide}} + {\textsc{\ref{sec:distribution} Code Distribution}} + +\noindent +MLD2P4 is available from the web site +\begin{quotation} +\texttt{http://www.mld2p4.it} +\end{quotation} +where contact points for further information can be also found. +To report bugs or ask general usage questions, please, send an email to +\texttt{bugreport@mld2p4.it}. + + +The software is available under a modified BSD license, as specified +in Appendix~\ref{sec:license}; please note that some of the optional +third party libraries may be licensed under a different and more +stringent license, most notably the GPL, and this should be taken into +account when treating derived works. diff --git a/docs/src/errors.tex b/docs/src/errors.tex new file mode 100644 index 00000000..375a5f69 --- /dev/null +++ b/docs/src/errors.tex @@ -0,0 +1,20 @@ +\section{Error Handling\label{sec:errors}} +\markboth{\textsc{MLD2P4 User's and Reference Guide}} + {\textsc{\ref{sec:errors} Error handling}} + +The error handling in MLD2P4 is based on the PSBLAS (version 2) error +handling. Error conditions are signaled via an integer argument +\verb|info|; whenever an error condition is detected, an error trace +stack is built by the library up to the top-level, user-callable +routine. This routine will then decide, according to the user +preferences, whether the error should be handled by terminating the +program or by returning the error condition to the user code, which +will then take action, and whether +an error message should be printed. These options may be set by using +the PSBLAS error handling routines; for further details see the PSBLAS +User's Guide \cite{PSBLASGUIDE}. + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "userguide" +%%% End: diff --git a/docs/src/gettingstarted.tex b/docs/src/gettingstarted.tex new file mode 100644 index 00000000..df94329c --- /dev/null +++ b/docs/src/gettingstarted.tex @@ -0,0 +1,308 @@ +\section{Getting Started\label{sec:started}} +\markboth{\textsc{MLD2P4 User's and Reference Guide}} + {\textsc{\ref{sec:started} Getting Started}} + +We describe the basics for building and applying MLD2P4 one-level and multi-level +Schwarz preconditioners with the Krylov solvers included in PSBLAS \cite{PSBLASGUIDE}. +The following steps are required: +\begin{enumerate} +\item \emph{Declare the preconditioner data structure}. It is a derived data type, + \verb|mld_|\-\emph{x}\verb|prec_| \verb|type|, where \emph{x} may be \verb|s|, \verb|d|, \verb|c| + or \verb|z|, according to the basic data type of the sparse matrix + (\verb|s| = real single precision; \verb|d| = real double precision; + \verb|c| = complex single precision; \verb|z| = complex double precision). + This data structure is accessed by the user only through the MLD2P4 routines, + following an object-oriented approach. +\item \emph{Allocate and initialize the preconditioner data structure, according to + a preconditioner type chosen by the user}. This is performed by the routine + \verb|mld_precinit|, which also sets defaults for each preconditioner + type selected by the user. The defaults associated to each preconditioner + type are given in Table~\ref{tab:precinit}, where the strings used by + \verb|mld_precinit| to identify the preconditioner types are also given. + Note that these strings are valid also if uppercase letters are substituted by + corresponding lowercase ones. +\item \emph{Modify the selected preconditioner type, by properly setting + preconditioner parameters.} This is performed by the routine \verb|mld_precset|. + This routine must be called only if the user wants to modify the default values + of the parameters associated to the selected preconditioner type, to obtain a variant + of the preconditioner. Examples of use of \verb|mld_precset| are given in + Section~\ref{sec:examples}; a complete list of all the + preconditioner parameters and their allowed and default values is provided in + Section~\ref{sec:userinterface}, Tables~\ref{tab:p_type}-\ref{tab:p_coarse}. +\item \emph{Build the preconditioner for a given matrix.} This is performed by + the routine \verb|mld_precbld|. +\item \emph{Apply the preconditioner at each iteration of a Krylov solver.} + This is performed by the routine \verb|mld_precaply|. When using the PSBLAS Krylov solvers, + this step is completely transparent to the user, since \verb|mld_precaply| is called + by the PSBLAS routine implementing the Krylov solver (\verb|psb_krylov|). +\item \emph{Free the preconditioner data structure}. This is performed by + the routine \verb|mld_| \verb|precfree|. This step is complementary to step 1 and should + be performed when the preconditioner is no more used. +\end{enumerate} +A detailed description of the above routines is given in Section~\ref{sec:userinterface}. +Examples showing the basic use of MLD2P4 are reported in Section~\ref{sec:examples}. + +Note that the Fortran 95 module \verb|mld_prec_mod|, containing the definition of the +preconditioner data type and the interfaces to the routines of MLD2P4, +must be used in any program calling such routines. +The modules \verb|psb_base_mod|, for the sparse matrix and communication descriptor +data types, and \verb|psb_krylov_mod|, for interfacing with the +Krylov solvers, must be also used (see Section~\ref{sec:examples}). + +\ \\ +\textbf{Remark 1.} The coarsest-level solver used by the default two-level +preconditioner has been chosen by taking into account that, on parallel +machines, it often leads to the smallest execution time when applied to +linear systems coming from finite-difference discretizations of basic +elliptic PDE problems, considered as standard tests for multi-level Schwarz +preconditioners \cite{aaecc_07,apnum_07}. However, this solver does +not necessarily correspond to the smallest number of iterations of the +preconditioned Krylov method, which is usually obtained by applying +a direct solver to the coarsest-level system, e.g.\ based on the LU +factorization (see Section~\ref{sec:userinterface} +for the coarsest-level solvers available in MLD2P4). + +\ \\ +\textbf{Remark 2.} The include path for MLD2P4 must override +those for PSBLAS, e.g.\ the latter must come first in the sequence +passed to the compiler, as the MLD2P4 version of the Krylov solver +interfaces must override that of PSBLAS. This will change in the future +when the support for the \verb|class| statement becomes widespread in Fortran +compilers. + + +\begin{table}[th] +\begin{center} +%{\small +\begin{tabular}{|l|l|p{7.8cm}|} +\hline +\textsc{type} & \textsc{string} & \textsc{default preconditioner} \\ \hline +No preconditioner &\verb|'NOPREC'|& Considered only to use the PSBLAS + Krylov solvers with no preconditioner. \\ \hline +Diagonal & \verb|'DIAG'| & --- \\ \hline +Block Jacobi & \verb|'BJAC'| & Block Jacobi with ILU(0) on the local blocks.\\ \hline +Additive Schwarz & \verb|'AS'| & Restricted Additive Schwarz (RAS), + with overlap 1 and ILU(0) on the local blocks. \\ \hline +Multilevel &\verb|'ML'| & Multi-level hybrid preconditioner (additive on the + same level and multiplicative through the levels), + with post-smoothing only. + Number of levels: 2. + Post-smoother: RAS with overlap 1 and ILU(0) + on the local blocks. + Aggregation: decoupled smoothed aggregation with + threshold $\theta = 0$. + Coarsest matrix: distributed among the processors. + Coarsest-level solver: + 4 sweeps of the block-Jacobi solver, + with LU (or ILU) factorization of the blocks + (UMFPACK for the double precision versions and + SuperLU for the single precision ones, if the packages + have been installed; ILU(0), otherwise). \\ +\hline +\end{tabular} +%} +\end{center} + +\caption{Preconditioner types, corresponding strings and default choices. +\label{tab:precinit}} +\end{table} + +\subsection{Examples\label{sec:examples}} + +The code reported in Figure~\ref{fig:ex_default} shows how to set and apply the default +multi-level preconditioner available in the real double precision version +of MLD2P4 (see Table~\ref{tab:precinit}). This preconditioner is chosen +by simply specifying \verb|'ML'| as second argument of \verb|mld_precinit| +(a call to \verb|mld_precset| is not needed) and is applied with the BiCGSTAB +solver provided by PSBLAS. As previously observed, the modules \verb|psb_base_mod|, +\verb|mld_prec_mod| and \verb|psb_krylov_mod| must be used by the example program. + +The part of the code concerning the +reading and assembling of the sparse matrix and the right-hand side vector, performed +through the PSBLAS routines for sparse matrix and vector management, is not reported +here for brevity; the statements concerning the deallocation of the PSBLAS +data structure are neglected too. +The complete code can be found in the example program file \verb|mld_dexample_ml.f90|, +in the directory \verb|examples/fileread| of the MLD2P4 tree (see +Section~\ref{sec:ex_and_test}). +For details on the use of the PSBLAS routines, see the PSBLAS User's +Guide \cite{PSBLASGUIDE}. + +The setup and application of the default multi-level +preconditioners for the real single precision and the complex, single and double +precision, versions are obtained with straightforward modifications of the previous +example (see Section~\ref{sec:userinterface} for details). If these versions are installed, +the corresponding Fortran 95 codes are available in \verb|examples/fileread/|. + +\begin{figure}[tbp] +\begin{center} +\begin{minipage}{.90\textwidth} +{\small +\begin{verbatim} + use psb_base_mod + use mld_prec_mod + use psb_krylov_mod +... ... +! +! sparse matrix + type(psb_dspmat_type) :: A +! sparse matrix descriptor + type(psb_desc_type) :: desc_A +! preconditioner + type(mld_dprec_type) :: P +! right-hand side and solution vectors + real(kind(1.d0)) :: b(:), x(:) +... ... +! +! initialize the parallel environment + call psb_init(ictxt) + call psb_info(ictxt,iam,np) +... ... +! +! read and assemble the matrix A and the right-hand side b +! using PSBLAS routines for sparse matrix / vector management +... ... +! +! initialize the default multi-level preconditioner, i.e. hybrid +! Schwarz, using RAS (with overlap 1 and ILU(0) on the blocks) +! as post-smoother and 4 block-Jacobi sweeps (with UMFPACK LU +! on the blocks) as distributed coarse-level solver + call mld_precinit(P,'ML',info) +! +! build the preconditioner + call mld_precbld(A,desc_A,P,info) +! +! set the solver parameters and the initial guess + ... ... +! +! solve Ax=b with preconditioned BiCGSTAB + call psb_krylov('BICGSTAB',A,P,b,x,tol,desc_A,info) + ... ... +! +! deallocate the preconditioner + call mld_precfree(P,info) +! +! deallocate other data structures + ... ... +! +! exit the parallel environment + call psb_exit(ictxt) + stop +\end{verbatim} +} +\end{minipage} +\caption{Setup and application of the default multi-level Schwarz preconditioner. +\label{fig:ex_default}} +\end{center} +\end{figure} + +Different versions of multi-level preconditioners can be obtained by changing +the default values of the preconditioner parameters. The code reported in +Figure~\ref{fig:ex_3lh} shows how to set a three-level hybrid Schwarz +preconditioner, which uses block Jacobi with ILU(0) on the +local blocks as post-smoother, has a coarsest matrix replicated on the processors, +and solves the coarsest-level system with the LU factorization from UMFPACK~\cite{UMFPACK}. +The number of levels is specified by using \verb|mld_precinit|; the other +preconditioner parameters are set by calling \verb|mld_precset|. Note that +the type of multilevel framework (i.e.\ multiplicative among the levels +with post-smoothing only) is not specified since it is the default +set by \verb|mld_precinit|. + +Figure~\ref{fig:ex_3la} shows how to +set a three-level additive Schwarz preconditioner, +which uses RAS, with overlap 1 and ILU(0) on the blocks, +as pre- and post-smoother, and applies five block-Jacobi sweeps, with +the UMFPACK LU factorization on the blocks, as distributed coarsest-level +solver. Again, \verb|mld_precset| is used only to set +non-default values of the parameters (see Tables~\ref{tab:p_type}-\ref{tab:p_coarse}). +In both cases, the construction and the application of the preconditioner +are carried out as for the default multi-level preconditioner. +The code fragments shown in in Figures~\ref{fig:ex_3lh}-\ref{fig:ex_3la} are +included in the example program file \verb|mld_dexample_ml.f90| too. + +Finally, Figure~\ref{fig:ex_1l} shows the setup of a one-level +additive Schwarz preconditioner, i.e.\ RAS with overlap 2. The corresponding +example program is available in \verb|mld_dexample_| \verb|1lev.f90|. + +For all the previous preconditioners, example programs where the sparse matrix and +the right-hand side are generated by discretizing a PDE with Dirichlet +boundary conditions are also available in the directory \verb|examples/pdegen|. + +\ \\ +\textbf{Remark 3.} Any PSBLAS-based program using the basic preconditioners +implemented in PSBLAS 2.0, i.e.\ the diagonal and block-Jacobi ones, +can use the diagonal and block-Jacobi preconditioners +implemented in MLD2P4 without any change in the code. +The PSBLAS-based program must be only recompiled +and linked to the MLD2P4 library. +\\ + + +\begin{figure}[tbh] +\begin{center} +\begin{minipage}{.90\textwidth} +{\small +\begin{verbatim} +... ... +! set a three-level hybrid Schwarz preconditioner, which uses +! block Jacobi (with ILU(0) on the blocks) as post-smoother, +! a coarsest matrix replicated on the processors, and the +! LU factorization from UMFPACK as coarse-level solver + call mld_precinit(P,'ML',info,nlev=3) + call_mld_precset(P,mld_smoother_type_,'BJAC',info) + call mld_precset(P,mld_coarse_mat_,'REPL',info) + call mld_precset(P,mld_coarse_solve_,'UMF',info) +... ... +\end{verbatim} +} +\end{minipage} + +\caption{Setup of a hybrid three-level Schwarz preconditioner.\label{fig:ex_3lh}} +\end{center} +\end{figure} + +\begin{figure}[tbh] +\begin{center} +\begin{minipage}{.90\textwidth} +{\small +\begin{verbatim} +... ... +! set a three-level additive Schwarz preconditioner, which uses +! RAS (with overlap 1 and ILU(0) on the blocks) as pre- and +! post-smoother, and 5 block-Jacobi sweeps (with UMFPACK LU +! on the blocks) as distributed coarsest-level solver + call mld_precinit(P,'ML',info,nlev=3) + call mld_precset(P,mld_ml_type_,'ADD',info) + call_mld_precset(P,mld_smoother_pos_,'TWOSIDE',info) + call mld_precset(P,mld_coarse_sweeps_,5,info) +... ... +\end{verbatim} +} +\end{minipage} + +\caption{Setup of an additive three-level Schwarz preconditioner.\label{fig:ex_3la}} +\end{center} +\end{figure} + +\begin{figure}[tbh] +\begin{center} +\begin{minipage}{.90\textwidth} +{\small +\begin{verbatim} +... ... +! set RAS with overlap 2 and ILU(0) on the local blocks + call mld_precinit(P,'AS',info) + call mld_precset(P,mld_sub_ovr_,2,info) +... ... +\end{verbatim} +} +\end{minipage} +\caption{Setup of a one-level Schwarz preconditioner.\label{fig:ex_1l}} +\end{center} +\end{figure} + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "userguide" +%%% End: diff --git a/docs/src/intro.tex b/docs/src/intro.tex new file mode 100644 index 00000000..864ab263 --- /dev/null +++ b/docs/src/intro.tex @@ -0,0 +1,34 @@ +\section{Introduction}\label{sec:intro} +\markboth{\underline{MLD2P4 User's and Reference Guide}} + {\underline{\ref{sec:overview} Introduction}} + +The MLD2P4 library provides .... + + +\subsection{Programming model} + +The MLD2P4 librarary is based on the Single Program Multiple Data +(SPMD) programming model: each process participating in the +computation performs the same actions on a chunk of data. Parallelism +is thus data-driven. + +Because of this structure, many subroutines coordinate their action +across the various processes, thus providing an implicit +synchronization point, and therefore \emph{must} be +called simultaneously by all processes participating in the +computation. +However there are many cases where no synchronization, and indeed no +communication among processes, is implied. + +Throughout this user's guide each subroutine will be clearly indicated +as: +\begin{description} +\item[Synchronous:] must be called simultaneously by all the + processes in the relevant communication context; +\item[Asynchronous:] may be called in a totally independent manner. +\end{description} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "userguide" +%%% End: diff --git a/docs/src/license.tex b/docs/src/license.tex new file mode 100644 index 00000000..005ffa08 --- /dev/null +++ b/docs/src/license.tex @@ -0,0 +1,44 @@ +\section{License\label{sec:license}} +\markboth{\textsc{MLD2P4 User's and Reference Guide}} + {\textsc{\ref{sec:license} License}} + +The MLD2P4 is freely distributable under the following copyright +terms: {\small +\begin{verbatim} + MLD2P4 version 1.0 +MultiLevel Domain Decomposition Parallel Preconditioners Package + based on PSBLAS (Parallel Sparse BLAS version 2.3) + +(C) Copyright 2008 + + Salvatore Filippone University of Rome Tor Vergata + Alfredo Buttari University of Rome Tor Vergata + Pasqua D'Ambra ICAR-CNR, Naples + Daniela di Serafino Second University of Naples + + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the MLD2P4 group or the names of its contributors may + not be used to endorse or promote products derived from this + software without specific written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +\end{verbatim} +} \ No newline at end of file diff --git a/docs/src/overview.tex b/docs/src/overview.tex new file mode 100644 index 00000000..d4537981 --- /dev/null +++ b/docs/src/overview.tex @@ -0,0 +1,90 @@ +\section{General Overview\label{sec:overview}} +\markboth{\textsc{MLD2P4 User's and Reference Guide}} + {\textsc{\ref{sec:overview} General Overview}} + +The \textsc{Multi-Level Domain Decomposition Parallel Preconditioners Package based on +PSBLAS (MLD2P4}) provides \emph{multi-level Schwarz preconditioners}~\cite{dd2_96}, +to be used in the iterative solutions of sparse linear systems: +\begin{equation} +Ax=b, +\label{system1} +\end{equation} +where $A$ is a square, real or complex, sparse matrix with a symmetric sparsity pattern. +% +%\textbf{NOTA: Caso non simmetrico, aggregazione con $(A+A^T)$ fatta! +%Dovremmo implementare uno smoothed prolongator +%adeguato e fare qualcosa di consistente anche con 1-lev Schwarz.} +% +These preconditioners have the following general features: +\begin{itemize} +\item both \emph{additive and hybrid multilevel} variants are implemented, +i.e.\ variants that are additive among the levels and inside each level, and variants +that are multiplicative among the levels and additive inside each level; +the basic Additive Schwarz (AS) preconditioners are obtained by considering only one level; +\item a \emph{purely algebraic} approach is used to +generate a sequence of coarse-level corrections to a basic AS preconditioner, without +explicitly using any information on the geometry of the original problem (e.g.\ the +discretization of a PDE). The \emph{smoothed aggregation} technique is applied +as algebraic coarsening strategy~\cite{BREZINA_VANEK,VANEK_MANDEL_BREZINA}. +\end{itemize} + +The package is written in \emph{Fortran~95}, following an +\emph{object-oriented approach} through the exploitation of features +such as abstract data type creation, functional +overloading and dynamic memory management. +% , while providing a smooth +% path towards the integration in legacy application codes. +The parallel implementation is based +on a Single Program Multiple Data (SPMD) paradigm for distributed-memory architectures. +Single and double precision implementations of MLD2P4 are available for both the +real and the complex case, that can be used through a single interface. + + +MLD2P4 has been designed to implement scalable and easy-to-use multilevel preconditioners +in the context of the \emph{PSBLAS (Parallel Sparse BLAS) +computational framework}~\cite{psblas_00}. +PSBLAS is a library originally developed to address the parallel implementation of +iterative solvers for sparse linear system, by providing basic linear algebra +operators and data management facilities for distributed sparse matrices; it +also includes parallel Krylov solvers, built on the top of the basic PSBLAS kernels. +The preconditioners available in MLD2P4 can be used with these Krylov solvers. +The choice of PSBLAS has been mainly motivated by the need of having +a portable and efficient software infrastructure implementing ``de facto'' standard +parallel sparse linear algebra kernels, to pursue goals such as performance, +portability, modularity ed extensibility in the development of the preconditioner +package. On the other hand, the implementation of MLD2P4 has led to some +revisions and extentions of the PSBLAS kernels, leading to the +recent PSBLAS 2.0 version~\cite{PSBLASGUIDE}. The inter-process comunication required +by MLD2P4 is encapsulated into the PSBLAS routines, except few cases where +MPI~\cite{MPI1} is explicitly called. Therefore, MLD2P4 can be run on any parallel +machine where PSBLAS and MPI implementations are available. + +MLD2P4 has a layered and modular software architecture where three main layers can be identified. +The lower layer consists of the PSBLAS kernels, the middle one implements +the construction and application phases of the preconditioners, and the upper one +provides a uniform and easy-to-use interface to all the preconditioners. +This architecture allows for different levels of use of the package: +few black-box routines at the upper layer allow non-expert users to easily +build any preconditioner available in MLD2P4 and to apply it within a PSBLAS Krylov solver. +On the other hand, the routines of the middle and lower layer can be used and extended +by expert users to build new versions of multi-level Schwarz preconditioners. +We provide here a description of the upper-layer routines, but not of the +medium-layer ones. + +This guide is organized as follows. General information on the distribution of the source code +is reported in Section~\ref{sec:distribution}, while details on the configuration +and installation of the package are given in Section~\ref{sec:building}. A description of +multi-level Schwarz preconditioners based on smoothed aggregation is provided +in Section~\ref{sec:background}, to help the users in choosing among the different preconditioners +implemented in MLD2P4. The basics for building and applying the preconditioners +with the Krylov solvers implemented in PSBLAS are reported in Section~\ref{sec:started}, where the +Fortran 95 codes of a few sample programs are also shown. A reference guide for +the upper-layer routines of MLD2P4, that are the user interface, is provided +in Section~\ref{sec:userinterface}. The error handling mechanism used by the package is briefly described +in Section~\ref{sec:errors}. The copyright terms concerning the distribution and modification +of MLD2P4 are reported in Appendix~\ref{sec:license}. + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "userguide" +%%% End: diff --git a/docs/src/precs.tex b/docs/src/precs.tex new file mode 100644 index 00000000..510606cf --- /dev/null +++ b/docs/src/precs.tex @@ -0,0 +1,280 @@ +\section{Preconditioner routines} +\label{sec:precs} +\markboth{\underline{MLD2P4 User's and Reference Guide}} + {\underline{\ref{sec:precs} Preconditioners}} + +% \section{Preconditioners} +\label{sec:psprecs} +The MLD2P4 library contains the implementation of many preconditioning +techniques. The preconditioners may be applied as normal ``base'' +preconditioners; alternatively multiple ``base'' preconditioners may +be combined in a multilevel framework. + +The base (one-level) preconditioners include: +\begin{itemize} +\item Diagonal Scaling +\item Block Jacobi +\item Additive Schwarz, Restricted Additive Schwarz and + Additive Schwarz with Harmonic extensions; +\end{itemize} +The Jacobi and Additive Schwarz preconditioners can make use of the +following solvers: +\begin{itemize} +\item Level-$p$ Incomplete LU factorization ($ILU(p)$); +\item Threshold Incomplete LU factorization ($ILU(\tau,p)$); +\item Complete LU factorization by means of the following optional + external packages: +\begin{itemize} +\item UMFPACK; +\item SuperLU; +\item SuperLU\_Dist. +\end{itemize} +\end{itemize} + +The supporting data type and subroutine interfaces are defined in the +module \verb|mld_prec_mod|; the module also overrides the variables +and tyep definitions of \verb|psb_prec_mod| so as to function as a +drop-in replacement for the PSBLAS methods. Thus if the user does not +wish to employ the additional MLD2P4 capabitlities, it is possible to +migrate an existing PSBLAS program without any source code +modifications, only a recompilation is needed. + +%% We also provide a companion package of multi-level Additive +%% Schwarz preconditioners called MD2P4; this is actually a family of +%% preconditioners since there is the possibility to choose between +%% many variants, and is currently in an experimental stateIts +%% documentation is planned to appear after stabilization of the +%% package, which will characterize release 2.1 of our library. + + + + +\subroutine{mld\_precinit}{Initialize a preconditioner} + +\syntax{call mld\_precinit}{prec, ptype, info} +\syntax*{call mld\_precinit}{prec, ptype, info, nlev} + +\begin{description} +\item[Type:] Asynchronous. +\item[\bf On Entry] +\item[ptype] the type of preconditioner. +Scope: {\bf global} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: a character string, see usage notes. +\item[nlev] Number of levels in a multilevel precondtioner. +Scope: {\bf global} \\ +Type: {\bf optional}\\ +Specified as: an integer value, see usage notes. +%% \item[rs] +%% Scope: {\bf global} \\ +%% Type: {\bf optional}\\ +%% Specified as: a long precision real number. +\item[\bf On Exit] + +\item[prec] +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a preconditioner data structure \precdata. +\item[info] +Scope: {\bf global} \\ +Type: {\bf required}\\ +Intent: {\bf out}.\\ +Error code: if no error, 0 is returned. +\end{description} +\subsection*{Usage Notes} +%% The PSBLAS 2.0 contains a number of preconditioners, ranging from a +%% simple diagonal scaling to 2-level domain decomposition. These +%% preconditioners may use the SuperLU or the UMFPACK software, if +%% installed; see~\cite{SUPERLU,UMFPACK}. +Legal inputs to this subroutine are interpreted depending on the +$ptype$ string as follows\footnote{The string is case-insensitive}: +\begin{description} +\item[NONE] No preconditioning, i.e. the preconditioner is just a copy + operator. +\item[DIAG] Diagonal scaling; each entry of the input vector is + multiplied by the reciprocal of the sum of the absolute values of + the coefficients in the corresponding row of matrix $A$; +\item[BJAC] Precondition by a factorization of the + block-diagonal of matrix $A$, where block boundaries are determined + by the data allocation boundaries for each process; requires no + communication. +\item[AS] Additive Schwarz; default is to apply the Restricted + Additive Schwarz variant, with an $ILU(0)$ factorization +\item[ML] Multilevel preconditioner. +\end{description} + + + +\subroutine{mld\_precset}{Set preconditioner features} + +\syntax{call mld\_precset}{prec, what, val, info, ilev} + + +\begin{description} +\item[Type:] Asynchronous. +\item[\bf On Entry] +\item[prec] the preconditioner.\\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: an already initialized precondtioner data structure \precdata\\ +\item[what] The feature to be set. \\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: an integer constants. Symbolic names are available in +the library module, see usage notes for legal values. +\item[val] The value to set the chosen feature to. \\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: an integer, double precision or character variable. +Symbolic names for some choices are available in the library module, +see usage notes for legal values. +\item[ilev] The level of a multilevel preconditioner to which the + feature choice should apply.\\ +Scope: {\bf global} \\ +Type: {\bf optional}\\ +Specified as: an integer value, see usage notes. +\end{description} + +\begin{description} +\item[\bf On Return] +\item[prec] the preconditioner.\\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a precondtioner data structure \precdata\\ +\item[info] Error code.\\ +Scope: {\bf local} \\ +Type: {\bf required} \\ +Intent: {\bf out}.\\ +An integer value; 0 means no error has been detected. +\end{description} + +\subsection*{Usage Notes} +Legal inputs to this subroutine are interpreted depending on the value +of \verb|what| input as follows +\begin{description} +\item[mld\_coarse\_mat\_] +\end{description} + + +\subroutine{mld\_precbld}{Builds a preconditioner} + +\syntax{call mld\_precbld}{a, desc\_a, prec, info} + +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry] +\item[a] the system sparse matrix. +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}, target.\\ +Specified as: a sparse matrix data structure \spdata. +\item[prec] the preconditioner.\\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: an already initialized precondtioner data structure \precdata\\ +\item[desc\_a] the problem communication descriptor. +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}, target.\\ +Specified as: a communication descriptor data structure \descdata. +%% \item[upd] +%% Scope: {\bf global} \\ +%% Type: {\bf optional}\\ +%% Intent: {\bf in}.\\ +%% Specified as: a character. +\end{description} + +\begin{description} +\item[\bf On Return] +\item[prec] the preconditioner.\\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a precondtioner data structure \precdata\\ +\item[info] Error code.\\ +Scope: {\bf local} \\ +Type: {\bf required} \\ +Intent: {\bf out}.\\ +An integer value; 0 means no error has been detected. +\end{description} + + + +\subroutine{mld\_precaply}{Preconditioner application routine} + +\syntax{call mld\_precaply}{prec,x,y,desc\_a,info,trans,work} +\syntax*{call mld\_precaply}{prec,x,desc\_a,info,trans} + +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry] +\item[prec] the preconditioner. +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: a preconditioner data structure \precdata. +\item[x] the source vector. +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a double precision array. +\item[desc\_a] the problem communication descriptor. +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: a communication data structure \descdata. +\item[trans] +Scope: {\bf } \\ +Type: {\bf optional}\\ +Intent: {\bf in}.\\ +Specified as: a character. +\item[work] an optional work space +Scope: {\bf local} \\ +Type: {\bf optional}\\ +Intent: {\bf inout}.\\ +Specified as: a double precision array. +\end{description} + +\begin{description} +\item[\bf On Return] +\item[y] the destination vector. +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a double precision array. +\item[info] Error code.\\ +Scope: {\bf local} \\ +Type: {\bf required} \\ +Intent: {\bf out}.\\ +An integer value; 0 means no error has been detected. +\end{description} + + + +\subroutine{mld\_prec\_descr}{Prints a description of current preconditioner} + +\syntax{call mld\_prec\_descr}{prec} + +\begin{description} +\item[Type:] Asynchronous. +\item[\bf On Entry] +\item[prec] the preconditioner. +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: a preconditioner data structure \precdata. +\end{description} + + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "userguide" +%%% End: diff --git a/docs/src/title.tex b/docs/src/title.tex new file mode 100644 index 00000000..831759b8 --- /dev/null +++ b/docs/src/title.tex @@ -0,0 +1,72 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Contents: The title page +% $Id: title.tex 1999 2007-10-29 15:25:27Z sfilippo $ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\ifcase\pdfoutput % We're not running pdftex +{\Large\bfseries MLD2P4\\[.8ex] User's and Reference Guide}\\ +\emph{\large A guide for the Multi-Level Domain Decomposition \\[.6ex] +Parallel Preconditioners Package +based on PSBLAS} +{\bfseries Pasqua D'Ambra}\\ + ICAR-CNR, Naples, Italy\\[3ex] +{\bfseries Daniela di Serafino}\\ + Second University of Naples, Italy\\[3ex] +{\bfseries Salvatore Filippone} \\ + University of Rome ``Tor Vergata'', Italy +%\\[10ex] +%\today +Software version: 1.0\\ +%\today +July 24, 2008 +\or +\pdfbookmark{MLD2P4 User's and Reference Guide}{title} +\newlength{\centeroffset} +%\setlength{\centeroffset}{-0.5\oddsidemargin} +%\addtolength{\centeroffset}{0.5\evensidemargin} +%\addtolength{\textwidth}{-\centeroffset} +\thispagestyle{empty} +\vspace*{\stretch{1}} +\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth} +\flushright +{\Huge\bfseries MLD2P4\\[.8ex] User's and Reference Guide +} +\noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex] +\hfill\emph{\Large A guide for the Multi-Level Domain Decomposition \\[.6ex] +Parallel Preconditioners Package +based on PSBLAS} +\end{minipage}} + +\vspace{\stretch{1}} +\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth} +\flushright +{\large\bfseries Pasqua D'Ambra}\\ +\large ICAR-CNR, Naples, Italy\\[3ex] +{\large\bfseries Daniela di Serafino}\\ +\large Second University of Naples, Italy\\[3ex] +{\large\bfseries Salvatore Filippone} \\ +\large University of Rome ``Tor Vergata'', Italy +%\\[10ex] +%\today +\end{minipage}} + +\vspace{\stretch{1}} +\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth} +\flushright +\large Software version: 1.0\\ +%\today +\large July 24, 2008 +\end{minipage}} +%\addtolength{\textwidth}{\centeroffset} +\vspace{\stretch{2}} +\fi + +\endinput + +% + +% Local Variables: +% TeX-master: "userguide" +% mode: latex +% mode: flyspell +% End: diff --git a/docs/src/userguide.tex b/docs/src/userguide.tex new file mode 100644 index 00000000..b040f2ae --- /dev/null +++ b/docs/src/userguide.tex @@ -0,0 +1,173 @@ +\documentclass[a4paper,twoside,11pt]{article} +\usepackage{pstricks} +\usepackage{fancybox} +\usepackage{amsfonts} +\usepackage{ifpdf} +% \usepackage{minitoc} +% \setcounter{minitocdepth}{2} +\usepackage[bookmarks=true, + bookmarksnumbered=true, + bookmarksopen=false, + plainpages=false, + pdfpagelabels, + colorlinks, + citecolor=red, + linkcolor=blue]{hyperref} +\usepackage{ifthen} +\usepackage{graphicx} +\newtheorem{theorem}{Theorem} +\newtheorem{corollary}{Corollary} +\usepackage{rotating} +%\newboolean{mtc} +%\setboolean{mtc}{true} + +\pdfoutput=1 +\relax +\pdfcompresslevel=0 %-- 0 = none, 9 = best +\pdfinfo{ %-- Info dictionary of PDF output /Author (PD, DdS, SF) + /Title (MultiLevel Domain Decomposition Parallel Preconditioners Package + based on PSBLAS, V. 1.0) + /Subject (MultiLevel Domain Decomposition Parallel Preconditioners Package) + /Keywords (Parallel Numerical Software, Algebraic Multilevel Preconditioners, Sparse Iterative Solvers, PSBLAS, MPI) + /Creator (pdfLaTeX) + /Producer ($Id: userguide.tex 2008-04-08 Pasqua D'Ambra, Daniela di Serafino, + Salvatore Filippone$) +} +\pdfcatalog{ %-- Catalog dictionary of PDF output. +% /URI (http://ce.uniroma2.it/psblas) +} + +\setlength\textwidth{1.15\textwidth} +\setlength\oddsidemargin{0.3in} +\setlength\evensidemargin{0.2in} +% \newlength{\centeroffset} +% \setlength{\centeroffset}{0.5\oddsidemargin} +% \addtolength{\centeroffset}{0.5\evensidemargin} +% \addtolength{\textwidth}{-\centeroffset} +\pagestyle{myheadings} + +\newcounter{subroutine}[subsection] +\newcounter{example}[subroutine] +\makeatletter +\def\subroutine{\@ifstar{\@subroutine}{\clearpage\@subroutine}}% +\def\@subroutine#1#2{% +\stepcounter{subroutine}% + \section*{\flushleft #1---#2 \endflushleft}% + \addcontentsline{toc}{subsection}{#1}% + \markright{#1}}% +\newcommand{\subsubroutine}[2]{% +\stepcounter{subroutine}% + \subsection*{\flushleft #1---#2 \endflushleft}% + \addcontentsline{toc}{subsubsection}{#1}% + \markright{#1}}% +\newcommand{\examplename}{Example} +\newcommand{\syntaxname}{Syntax} +\def\syntax{\@ifstar{\@ssyntax}{\@syntax}}% +\def\@syntax{\nobreak\section*{\syntaxname}% + \@ssyntax}% +\def\@ssyntax#1#2{% + \nobreak + \setbox\@tempboxa\hbox{#1\ {\em $($#2$)$}}% + \ifdim \wd\@tempboxa >\hsize + \setbox\@tempboxa\hbox{\em $($#2$)$} + \ifdim\wd\@tempboxa >\hsize + \begin{flushright}#1\ \em$($#2$)$\end{flushright}% + \else + \hbox to\hsize{#1\hfil}% + \hbox to\hsize{\hfil\box\@tempboxa}% + \fi + \else + \hbox to\hsize{\hfil\box\@tempboxa\hfil}% + \fi\par\vskip\baselineskip} +\makeatother +\newcommand{\example}{\stepcounter{example}% +\section*{\examplename~\theexample}} +\def\bsideways{\sidewaystable} +\def\esideways{\endsidewaystable} + +\newcommand{\precdata}{\hyperlink{precdata}{{\tt mld\_prec\_type}}} +\newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}} +\newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_spmat\_type}}} +\newcommand{\Ref}[1]{\mbox{(\ref{#1})}} + +\begin{document} +\pdfbookmark{MLD2P4 User's and Reference Guide}{title} +\newlength{\centeroffset} +%\setlength{\centeroffset}{-0.5\oddsidemargin} +%\addtolength{\centeroffset}{0.5\evensidemargin} +%\addtolength{\textwidth}{-\centeroffset} +\thispagestyle{empty} +\vspace*{\stretch{1}} +\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth} +\flushright +{\Huge\bfseries MLD2P4\\[.8ex] User's and Reference Guide +} +\noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex] +\hfill\emph{\Large A guide for the Multi-Level Domain Decomposition \\[.6ex] +Parallel Preconditioners Package +based on PSBLAS} +\end{minipage}} + +\vspace{\stretch{1}} +\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth} +\flushright +{\large\bfseries Pasqua D'Ambra}\\ +\large ICAR-CNR, Naples, Italy\\[3ex] +{\large\bfseries Daniela di Serafino}\\ +\large Second University of Naples, Italy\\[3ex] +{\large\bfseries Salvatore Filippone} \\ +\large University of Rome ``Tor Vergata'', Italy +%\\[10ex] +%\today +\end{minipage}} + +\vspace{\stretch{1}} +\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth} +\flushright +\large Software version: 1.0\\ +%\today +\large July 24, 2008 +\end{minipage}} +%\addtolength{\textwidth}{\centeroffset} +\vspace{\stretch{2}} +\clearpage +\ \\ +\thispagestyle{empty} +\clearpage + +\pagenumbering{roman} % Roman numbering +\setcounter{page}{1} % Abstract start on page i + +\include{abstract} +\cleardoublepage + +\begingroup + \renewcommand*{\thepage}{toc} + %\pagenumbering{roman} % Roman numbering + %\setcounter{page}{1} % Abstract start on page ii + \tableofcontents +\endgroup +\cleardoublepage + +\pagenumbering{arabic} % Arabic numbering +\setcounter{page}{1} % Chapters start on page 1 + +\include{overview} +\include{distribution} +\include{building} +\include{background} +\include{gettingstarted} +\include{userinterface} +\include{errors} +\clearpage +\appendix +\include{license} +\cleardoublepage +\include{bibliography} + + +\end{document} +%%% Local Variables: +%%% mode: latex +%%% TeX-master: 'userguide' +%%% End: diff --git a/docs/src/userhtml.tex b/docs/src/userhtml.tex new file mode 100644 index 00000000..dc1b4784 --- /dev/null +++ b/docs/src/userhtml.tex @@ -0,0 +1,149 @@ +\documentclass[a4paper,twoside,11pt]{article} +\usepackage{pstricks} +\usepackage{fancybox} +\usepackage{amsfonts} +\usepackage{ifpdf} +% \usepackage{minitoc} +% \setcounter{minitocdepth}{2} +\usepackage[bookmarks=true, + bookmarksnumbered=true, + bookmarksopen=false, + plainpages=false, + pdfpagelabels, + colorlinks, + linkcolor=blue]{hyperref} +\usepackage{ifthen} +\usepackage{graphicx} +\newtheorem{theorem}{Theorem} +\newtheorem{corollary}{Corollary} +\usepackage{rotating} +%\newboolean{mtc} +%\setboolean{mtc}{true} + +\pdfoutput=0 +% \relax +% \pdfcompresslevel=0 %-- 0 = none, 9 = best +% \pdfinfo{ %-- Info dictionary of PDF output /Author (PD, DdS, SF) +% /Title (MultiLevel Domain Decomposition Parallel Preconditioners Package +% based on PSBLAS, V. 1.0) +% /Subject (MultiLevel Domain Decomposition Parallel Preconditioners Package) +% /Keywords (Parallel Numerical Software, Algebraic Multilevel Preconditioners, Sparse Iterative Solvers, PSBLAS, MPI) +% /Creator (pdfLaTeX) +% /Producer ($Id: userguide.tex 2008-04-08 Pasqua D'Ambra, Daniela di Serafino, +% Salvatore Filippone$) +% } +% \pdfcatalog{ %-- Catalog dictionary of PDF output. +% % /URI (http://ce.uniroma2.it/psblas) +% } + +\setlength\textwidth{1.15\textwidth} +% \setlength\evensidemargin{.7in} +% \newlength{\centeroffset} +% \setlength{\centeroffset}{0.5\oddsidemargin} +% \addtolength{\centeroffset}{0.5\evensidemargin} +% \addtolength{\textwidth}{-\centeroffset} +\pagestyle{myheadings} + +\newcounter{subroutine}[subsection] +\newcounter{example}[subroutine] +\makeatletter +\def\subroutine{\@ifstar{\@subroutine}{\clearpage\@subroutine}}% +\def\@subroutine#1#2{% +\stepcounter{subroutine}% + \section*{\flushleft #1---#2 \endflushleft}% + \addcontentsline{toc}{subsection}{#1}% + \markright{#1}}% +\newcommand{\subsubroutine}[2]{% +\stepcounter{subroutine}% + \subsection*{\flushleft #1---#2 \endflushleft}% + \addcontentsline{toc}{subsubsection}{#1}% + \markright{#1}}% +\newcommand{\examplename}{Example} +\newcommand{\syntaxname}{Syntax} +\def\syntax{\@ifstar{\@ssyntax}{\@syntax}}% +\def\@syntax{\nobreak\section*{\syntaxname}% + \@ssyntax}% +\def\@ssyntax#1#2{% + \nobreak + \setbox\@tempboxa\hbox{#1\ {\em $($#2$)$}}% + \ifdim \wd\@tempboxa >\hsize + \setbox\@tempboxa\hbox{\em $($#2$)$} + \ifdim\wd\@tempboxa >\hsize + \begin{flushright}#1\ \em$($#2$)$\end{flushright}% + \else + \hbox to\hsize{#1\hfil}% + \hbox to\hsize{\hfil\box\@tempboxa}% + \fi + \else + \hbox to\hsize{\hfil\box\@tempboxa\hfil}% + \fi\par\vskip\baselineskip} +\makeatother +\newcommand{\example}{\stepcounter{example}% +\section*{\examplename~\theexample}} +\def\bsideways{\begin{table}} +\def\esideways{\end{table}} + +\newcommand{\precdata}{\hyperlink{precdata}{{\tt mld\_prec\_type}}} +\newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}} +\newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_spmat\_type}}} +\newcommand{\Ref}[1]{\mbox{(\ref{#1})}} + +\begin{document} +{\Large\bfseries MLD2P4\\[.8ex] User's and Reference Guide}\\[\baselineskip] +\emph{\large A guide for the Multi-Level Domain Decomposition +Parallel Preconditioners Package +based on PSBLAS}\\[3ex] +{\bfseries Pasqua D'Ambra}\\ + ICAR-CNR, Naples, Italy\\ +{\bfseries Daniela di Serafino}\\ + Second University of Naples, Italy\\ +{\bfseries Salvatore Filippone} \\ + University of Rome ``Tor Vergata'', Italy\\[2ex] +%\\[10ex] +%\today +Software version: 1.0\\ +%\today +July 24, 2008 +\clearpage +\ \\ +\thispagestyle{empty} +\clearpage + +\pagenumbering{roman} % Roman numbering +\setcounter{page}{1} % Abstract start on page i + +\include{abstract} +\cleardoublepage + +\begingroup + \renewcommand*{\thepage}{toc} + %\pagenumbering{roman} % Roman numbering + %\setcounter{page}{1} % Abstract start on page ii + \tableofcontents +\endgroup +\cleardoublepage + +\pagenumbering{arabic} % Arabic numbering +\setcounter{page}{1} % Chapters start on page 1 + +\include{overview} +\include{distribution} +\include{building} +\include{background} +\include{gettingstarted} +\include{userinterface} +%\include{advanced} +\include{errors} +%\include{listofroutines} +\cleardoublepage +\appendix +\include{license} +\cleardoublepage +\include{bibliography} + + +\end{document} +%%% Local Variables: +%%% mode: latex +%%% TeX-master: 'userguide' +%%% End: diff --git a/docs/src/userinterface.tex b/docs/src/userinterface.tex new file mode 100644 index 00000000..e3d5cf34 --- /dev/null +++ b/docs/src/userinterface.tex @@ -0,0 +1,443 @@ +\section{User Interface\label{sec:userinterface}} +\markboth{\textsc{MLD2P4 User's and Reference Guide}} + {\textsc{\ref{sec:userinterface} User Interface}} + + +The basic user interface of MLD2P4 consists of six routines. The four routines \verb|mld_| \verb|precinit|, +\verb|mld_precset|, \verb|mld_precbld| and \verb|mld_precaply| encapsulate all the functionalities +for the setup and the application of any one-level and multi-level +preconditioner implemented in the package. +The routine \verb|mld_precfree| deallocates the preconditioner data structure, while +\verb|mld_precdescr| prints a description of the preconditioner setup by the user. + +For each routine, the same user interface is overloaded with +respect to the real/complex case and the single/double precision; +arguments with appropriate data types must be passed to the routine, +i.e. +\begin{itemize} +\item the sparse matrix data structure, containing the matrix to be + preconditioned, must be of type \verb|mld_|\emph{x}\verb|spmat_type| + with \emph{x} = \verb|s| for real single precision, \emph{x} = \verb|d| + for real double precision, \emph{x} = \verb|c| for complex single precision, + \emph{x} = \verb|z| for complex double precision; +\item the preconditioner data structure must be of type + \verb|mld_|\emph{x}\verb|prec_type|, with \emph{x} = + \verb|s|, \verb|d|, \verb|c|, \verb|z|, according to the sparse + matrix data structure; +\item the arrays containing the vectors $v$ and $w$ involved in + the preconditioner application $w=M^{-1}v$ must be of type + \emph{type}\verb|(|\emph{kind\_parameter}\verb|)|, with \emph{type} = + \verb|real|, \verb|complex| and \emph{kind\_parameter} = \verb|kind(1.e0)|, + \verb|kind(1.d0)|, according to the sparse matrix and preconditioner + data structure; note that the PSBLAS module \verb|psb_base_mod| + provides the constants \verb|psb_spk_| + = \verb|kind(1.e0)| and \verb|psb_dpk_| = \verb|kind(1.d0)|; +\item real parameters defining the preconditioner must be declared + according to the precision of the sparse matrix and preconditioner + data structures (see Section~\ref{sec:precset}). +\end{itemize} +A description of each routine is given in the remainder of this section. + +\clearpage + +\subsection{Subroutine mld\_precinit\label{sec:precinit}} + +\begin{center} +\verb|mld_precinit(p,ptype,info)| \\ +\verb|mld_precinit(p,ptype,info,nlev)| \\ +\end{center} + +\noindent +This routine allocates and initializes the preconditioner data structure, +according to the preconditioner type chosen by the user. + +{\vskip2\baselineskip\noindent\large\bfseries Arguments} + +\begin{tabular}{p{1.2cm}p{12cm}} +\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(inout)|.\\ + & The preconditioner data structure. Note that \emph{x} + must be chosen according to the real/complex, single/double + precision version of MLD2P4 under use.\\ +\verb|ptype| & \verb|character(len=*), intent(in)|.\\ + & The type of preconditioner. Its values are specified + in Table~\ref{tab:precinit}.\\ + & Note that the strings are case insensitive.\\ +\verb|info| & \verb|integer, intent(out)|.\\ + & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} for details.\\ +\verb|nlev| & \verb|integer, optional, intent(in)|.\\ + & The number of levels of the multilevel preconditioner. + If \verb|nlev| is not present and \verb|ptype|=\verb|'ML'|, \verb|'ml'|, + then \verb|nlev|=2 is assumed. Otherwise, \verb|nlev| is ignored.\\ +\end{tabular} + +\clearpage + +\subsection{Subroutine mld\_precset\label{sec:precset}} + +\begin{center} +\verb|mld_precset(p,what,val,info)|\\ +\end{center} + +\noindent +This routine sets the parameters defining the preconditioner. More +precisely, the parameter identified by \verb|what| is assigned the value +contained in \verb|val|. + +{\vskip2\baselineskip\noindent\large\bfseries Arguments} + +\begin{tabular}{p{1.2cm}p{12cm}} +\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(inout)|.\\ + & The preconditioner data structure. Note that \emph{x} must + be chosen according to the real/complex, single/double precision + version of MLD2P4 under use.\\ +\verb|what| & \verb|integer, intent(in)|. \\ + & The number identifying the parameter to be set. + A mnemonic constant has been associated to each of these + numbers, as reported in Tables~\ref{tab:p_type}-\ref{tab:p_coarse}.\\ +\verb|val | & \verb|integer| \emph{or} \verb|character(len=*)| \emph{or} + \verb|real(psb_spk_)| \emph{or} \verb|real(psb_dpk_)|, + \verb|intent(in)|.\\ + & The value of the parameter to be set. The list of allowed + values and the corresponding data types is given in + Tables~\ref{tab:p_type}-\ref{tab:p_coarse}. + When the value is of type \verb|character(len=*)|, + it is also treated as case insensitive.\\ +\verb|info| & \verb|integer, intent(out)|.\\ + & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} + for details.\\ +% +%\verb|ilev| & \verb|integer, optional, intent(in)|.\\ +% & For the multilevel preconditioner, the level at which the +% preconditioner parameter has to be set. +% The levels are numbered in increasing +% order starting from the finest one, i.e.\ level 1 is the finest level. +% If \verb|ilev| is not present, the parameter identified by \verb|what| +% is set at all the appropriate levels (see Table~\ref{tab:params}). +\end{tabular} + +\ \\ +A variety of (one-level and multi-level) preconditioners can be obtained +by a suitable setting of the preconditioner parameters. These parameters +can be logically divided into four groups, i.e.\ parameters defining +\begin{enumerate} + \item the type of multi-level preconditioner; + \item the one-level preconditioner used as smoother; + \item the aggregation algorithm; + \item the coarse-space correction at the coarsest level. +\end{enumerate} +A list of the parameters that can be set, along with their allowed and +default values, is given in Tables~\ref{tab:p_type}-\ref{tab:p_coarse}. +For a detailed description of the meaning of the parameters, please +refer to Section~\ref{sec:background}. +% +%Note that the routine allows to set different features of the +%preconditioner at each level through the use of \verb|ilev|. +%This should be done by users with experience in the field of +%multi-level preconditioners. Non-expert users are recommended +%to call \verb| mld_precset| without specifying \verb|ilev|. + +\bsideways +\begin{center} +\begin{tabular}{|l|l|p{2cm}|l|p{7cm}|} +\hline +\verb|what| & \textsc{data type} & \verb|val| & \textsc{default} & +\textsc{comments} \\ \hline +%\multicolumn{5}{|c|}{\emph{type of the multi-level preconditioner}}\\ \hline +\verb|mld_ml_type_| & \verb|character(len=*)| + & \texttt{'ADD'} \ \ \ \texttt{'MULT'} + & \texttt{'MULT'} + & Basic multi-level framework: additive or multiplicative + among the levels (always additive inside a level). \\ \hline +\verb|mld_smoother_type_|& \verb|character(len=*)| + & \texttt{'DIAG'} \ \ \ \texttt{'BJAC'} \ \ \ \texttt{'AS'} + & \texttt{'AS'} + & Basic one-level preconditioner (i.e.\ smoother): diagonal, + block Jacobi, AS. \\ \hline +\verb|mld_smoother_pos_| & \verb|character(len=*)| + & \texttt{'PRE'} \ \ \ \texttt{'POST'} \ \ \ \texttt{'TWOSIDE'} + & \texttt{'POST'} + & ``Position'' of the smoother: pre-smoother, post-smoother, + pre- and post-smoother. \\ +\hline +\end{tabular} +\end{center} +\caption{Parameters defining the type of multi-level preconditioner. +\label{tab:p_type}} +\esideways + +\bsideways +\begin{center} +\begin{tabular}{|l|l|p{3.2cm}|l|p{7cm}|} +\hline +\verb|what| & \textsc{data type} & \verb|val| & \textsc{default} & +\textsc{comments} \\ \hline +%\multicolumn{5}{|c|}{\emph{basic one-level preconditioner (smoother)}} \\ \hline +\verb|mld_sub_ovr_| & \verb|integer| + & any~int.~num.~$\ge 0$ + & 1 + & Number of overlap layers. \\ \hline +\verb|mld_sub_restr_| & \verb|character(len=*)| + & \texttt{'HALO'} \hspace{2.5cm} \texttt{'NONE'} + & \texttt{'HALO'} + & Type of restriction operator: + \texttt{'HALO'} for taking into account the overlap, \texttt{'NONE'} + for neglecting it. \\ \hline +\verb|mld_sub_prol_| & \verb|character(len=*)| + & \texttt{'SUM'} \hspace{2.5cm} \texttt{'NONE'} + & \texttt{'NONE'} + & Type of prolongation operator: + \texttt{'SUM'} for adding the contributions from the overlap, \texttt{'NONE'} + for neglecting them. \\ \hline +\verb|mld_sub_solve_| & \verb|character(len=*)| + & \texttt{'ILU'} \hspace{2.5cm} \texttt{'MILU'} \hspace{2.5cm} \texttt{'ILUT'} + \hspace{2.5cm} \texttt{'UMF'} \hspace{2.5cm} \texttt{'SLU'} + & \texttt{'UMF'} + & Local solver: ILU($p$), MILU($p$), ILU($p,t$), LU from UMFPACK, LU from SuperLU + (plus triangular solve). \\ \hline +\verb|mld_sub_fillin_| & \verb|integer| + & Any~int.~num.~$\ge 0$ + & 0 + & Fill-in level $p$ of the incomplete LU factorizations. \\ \hline +\verb|mld_sub_iluthrs_| & \verb|real(|\emph{kind\_parameter}\verb|)| + & Any~real~num.~$\ge 0$ + & 0 + & Drop tolerance $t$ in the ILU($p,t$) factorization. \\ \hline +\verb|mld_sub_ren_| & \verb|character(len=*)| + & \texttt{'RENUM\_NONE'} \texttt{'RENUM\_GLOBAL'} %, \texttt{'RENUM_GPS'} + & \texttt{'RENUM\_NONE'} + & Row and column reordering of the local submatrices: no reordering, + reordering according to the global numbering of the rows and columns of + the whole matrix. \\ +\hline +\end{tabular} +\end{center} +\caption{Parameters defining the one-level preconditioner used as smoother. +\label{tab:p_smoother}} +\esideways + +\bsideways +\begin{center} +\begin{tabular}{|l|l|p{2.3cm}|p{2.6cm}|p{7cm}|} +\hline +\verb|what| & \textsc{data type} & \verb|val| & \textsc{default} & +\textsc{comments} \\ \hline +%\multicolumn{5}{|c|}{\emph{aggregation algorithm}} \\ \hline +\verb|mld_aggr_alg_| & \verb|character(len=*)| + & \texttt{'DEC'} + & \texttt{'DEC'} + & Aggregation algorithm. Currently, only the decoupled aggregation is available. \\ \hline +\verb|mld_aggr_kind_| & \verb|character(len=*)| + & \texttt{'SMOOTH'} \hspace{2.5cm} \texttt{'RAW'} + & \texttt{'SMOOTH'} + & Type of aggregation: smoothed, raw (i.e.\ using the tentative prolongator). \\ \hline +\verb|mld_aggr_thresh_| & \verb|real(|\emph{kind\_parameter}\verb|)| + & Any~real~num. $\in [0, 1]$ + & 0 + & Threshold $\theta$ in the aggregation algorithm. \\ \hline +\verb|mld_aggr_eig_| & \verb|character(len=*)| + & \texttt{'A\_NORMI'} + & \texttt{'A\_NORMI'} + & Estimate of the eigenvalue $D^{-1}A$ with largest modulus, + to build the damping parameter $\omega$ in the smoothed aggregation. + Currently, only the infinity norm of + the matrix is available. \\ \hline +\verb|mld_aggr_damp_| & \verb|real(|\emph{kind\_parameter}\verb|)| + & Any~real~num. + & $4/(3||D^{-1}A||_\infty)$ + & Damping parameter $\omega$ in the smoothed aggregation algorithm. + If the user specifies a negative value, then $\omega$ + is set to its default value; + otherwise, $\omega$ is set to the value provided by the + user. In the latter case no estimate of the eigenvalue of + $D^{-1}A$ with largest modulus is computed.\\ +\hline +\end{tabular} +\end{center} +\caption{Parameters defining the aggregation algorithm. +\label{tab:p_aggregation}} +\esideways + +\bsideways +\begin{center} +\begin{tabular}{|l|l|p{3.2cm}|l|p{7cm}|} +\hline +\verb|what| & \textsc{data type} & \verb|val| & \textsc{default} & +\textsc{comments} \\ \hline +%\multicolumn{5}{|c|}{\emph{coarse-space correction at the coarsest level}}\\ \hline +\verb|mld_coarse_mat_| & \verb|character(len=*)| + & \texttt{'DISTR'} \hspace{2.5cm} \texttt{'REPL'} + & \texttt{'DISTR'} + & Coarsest matrix: distributed among the processors or + replicated on each of them. \\ \hline +\verb|mld_coarse_solve_| & \verb|character(len=*)| + & \texttt{'BJAC'} \hspace{2.5cm} \texttt{'UMF'} \hspace{2.5cm} + \texttt{'SLU'} \hspace{2.5cm} \texttt{'SLUDIST'} + & \texttt{'BJAC'} + & Solver used at the coarsest level: block Jacobi, sequential + LU from UMFPACK, sequential LU from SuperLU, + distributed LU from SuperLU\_Dist. + \texttt{'BJAC'} and \texttt{'SLUDIST'} require the coarsest + matrix to be distributed, while \texttt{'UMF'} and + \texttt{'SLU'} require it to be replicated. \\ \hline +\verb|mld_coarse_subsolve_| & \verb|character(len=*)| + & \texttt{'ILU'} \hspace{2.5cm} \texttt{'MILU'} + \hspace{2.5cm} \texttt{'ILUT'} + \hspace{2.5cm} \texttt{'UMF'} \hspace{2.5cm} \texttt{'SLU'} + & \texttt{'UMF'} + & Solver for the diagonal blocks of the coarse matrix, + in case the block Jacobi solver + is chosen as coarsest-level solver: ILU($p$), MILU($p$), + ILU($p,t$), LU from UMFPACK, + LU from SuperLU, plus triangular solve. \\ \hline +\verb|mld_coarse_sweeps_|& \verb|integer| + & Any~int.~num.~$> 0$ + & 4 + & Number of Block-Jacobi sweeps when 'BJAC' is used as + coarsest-level solver. \\ \hline +\verb|mld_coarse_fillin_| & \verb|integer| + & Any~int.~num.~$\ge 0$ + & 0 + & Fill-in level $p$ of the incomplete LU factorizations. \\ \hline +\verb|mld_coarse_iluthrs_| & \verb|real(|\emph{kind\_parameter}\verb|)| + & Any~real.~num.~$\ge 0$ + & 0 + & Drop tolerance $t$ in the ILU($p,t$) factorization. \\ +\hline +\end{tabular} +\end{center} +\caption{Parameters defining the coarse-space correction at the coarsest +level.\label{tab:p_coarse}} +\esideways + + +\clearpage + +\subsection{Subroutine mld\_precbld\label{sec:precbld}} + +\begin{center} +\verb|mld_precbld(a,desc_a,p,info)|\\ +\end{center} + +\noindent +This routine builds the preconditioner according to the requirements made by +the user through the routines \verb|mld_precinit| and \verb|mld_precset|. + +{\vskip2\baselineskip\noindent\large\bfseries Arguments} + +\begin{tabular}{p{1.2cm}p{12cm}} +\verb|a| & \verb|type(psb_|\emph{x}\verb|spmat_type), intent(in)|. \\ + & The sparse matrix structure containing the local part of the + matrix to be preconditioned. Note that \emph{x} must be chosen according + to the real/complex, +single/double precision version of MLD2P4 under use. + See the PSBLAS User's Guide for details \cite{PSBLASGUIDE}.\\ +\verb|desc_a| & \verb|type(psb_desc_type), intent(in)|. \\ + & The communication descriptor of \verb|a|. See the PSBLAS User's Guide for + details \cite{PSBLASGUIDE}.\\ +\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(inout)|.\\ + & The preconditioner data structure. Note that \emph{x} must be chosen according + to the real/complex, single/double precision version of MLD2P4 under use.\\ +\verb|info| & \verb|integer, intent(out)|.\\ + & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} for details.\\ +\end{tabular} + +\clearpage +\subsection{Subroutine mld\_precaply\label{sec:precaply}} + +\begin{center} +\verb|mld_precaply(p,x,y,desc_a,info)|\\ +\verb|mld_precaply(p,x,y,desc_a,info,trans,work)|\\ +\end{center} + +\noindent +This routine computes $y = op(M^{-1})\, x$, where $M$ is a previously built +preconditioner, stored into \verb|p|, and $op$ +denotes the preconditioner itself or its transpose, according to +the value of \verb|trans|. +Note that, when MLD2P4 is used with a Krylov solver from PSBLAS, +\verb|mld_precaply| is called within the PSBLAS routine \verb|mld_krylov| +and hence it is completely transparent to the user. + +{\vskip2\baselineskip\noindent\large\bfseries Arguments} + +\begin{tabular}{p{1.2cm}p{12cm}} +\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(inout)|.\\ + & The preconditioner data structure, containing the local part of $M$. + Note that \emph{x} must be chosen according + to the real/complex, single/double precision version of MLD2P4 under use.\\ +\verb|x| & \emph{type}\verb|(|\emph{kind\_parameter}\verb|), dimension(:), intent(in)|.\\ + & The local part of the vector $x$. Note that \emph{type} and + \emph{kind\_parameter} must be chosen according + to the real/complex, single/double precision version of MLD2P4 under use.\\ +\verb|y| & \emph{type}\verb|(|\emph{kind\_parameter}\verb|), dimension(:), intent(out)|.\\ + & The local part of the vector $y$. Note that \emph{type} and + \emph{kind\_parameter} must be chosen according + to the real/complex, single/double precision version of MLD2P4 under use.\\ +\verb|desc_a| & \verb|type(psb_desc_type), intent(in)|. \\ + & The communication descriptor associated to the matrix to be + preconditioned.\\ +\verb|info| & \verb|integer, intent(out)|.\\ + & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} for details.\\ +\verb|trans| & \verb|character(len=1), optional, intent(in).|\\ + & If \verb|trans| = \verb|'N','n'| then $op(M^{-1}) = M^{-1}$; + if \verb|trans| = \verb|'T','t'| then $op(M^{-1}) = M^{-T}$ + (transpose of $M^{-1})$; if \verb|trans| = \verb|'C','c'| then $op(M^{-1}) = M^{-C}$ + (conjugate transpose of $M^{-1})$.\\ +\verb|work| & \emph{type}\verb|(|\emph{kind\_parameter}\verb|), dimension(:), optional, target|.\\ + & Workspace. Its size should be at + least \verb|4 * psb_cd_get_local_| \verb|cols(desc_a)| (see the PSBLAS User's Guide). + Note that \emph{type} and \emph{kind\_parameter} must be chosen according + to the real/complex, single/double precision version of MLD2P4 under use.\\ +\end{tabular} + +\clearpage + +\subsection{Subroutine mld\_precfree\label{sec:precfree}} + +\begin{center} +\verb|mld_precfree(p,info)|\\ +\end{center} + +\noindent +This routine deallocates the preconditioner data structure. + +{\vskip2\baselineskip\noindent\large\bfseries Arguments} + +\begin{tabular}{p{1.2cm}p{10.5cm}} +\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(inout)|.\\ + & The preconditioner data structure. Note that \emph{x} must be chosen according + to the real/complex, single/double precision version of MLD2P4 under use.\\ +\verb|info| & \verb|integer, intent(out)|.\\ + & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} for details.\\ +\end{tabular} + +\clearpage + +\subsection{Subroutine mld\_precdescr\label{sec:precdescr}} + +\begin{center} +\verb|mld_precdescr(p,info)|\\ +\verb|mld_precdescr(p,info,iout)|\\ +\end{center} + +\noindent +This routine prints a description of the preconditioner to the standard output or +to a file. It must be called after \verb|mld_precbld| has been called. + +{\vskip2\baselineskip\noindent\large\bfseries Arguments} + +\begin{tabular}{p{1.2cm}p{12cm}} +\verb|p| & \verb|type(mld_|\emph{x}\verb|prec_type), intent(in)|.\\ + & The preconditioner data structure. Note that \emph{x} must be chosen according + to the real/complex, single/double precision version of MLD2P4 under use.\\ +\verb|info| & \verb|integer, intent(out)|.\\ + & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} for details.\\ +\verb|iout| & \verb|integer, intent(in), optional|.\\ + & The id of the file where the preconditioner description + will be printed; the default is the standard output.\\ +\end{tabular} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "userguide" +%%% End: