diff --git a/docs/html/userhtmlse10.html b/docs/html/userhtmlse10.html index 913cece4..a5e74971 100644 --- a/docs/html/userhtmlse10.html +++ b/docs/html/userhtmlse10.html @@ -1115,7 +1115,7 @@ href="userhtmlse13.html#x20-15700013">13, memory allocation and deallocation usually have a much larger overhead, significantly affecting performance. To alleviate this problem we define this method that preallocates internal storage; it is intended to be invoked prior to the iterative -solver method, so that the necessary internal scratch storage is available throughout +solver method, so that the necessary internal work storage is available throughout the iterative method application.

When using GPUs or other specialized devices, the vmold argument is also necessary to ensure the internal work vectors are of the appropriate dynamic type to diff --git a/docs/psblas-3.9.pdf b/docs/psblas-3.9.pdf index d0f07150..3010f28d 100644 --- a/docs/psblas-3.9.pdf +++ b/docs/psblas-3.9.pdf @@ -27828,7 +27828,7 @@ endstream endobj 2047 0 obj << -/Length 6126 +/Length 6123 >> stream 0 g 0 G @@ -27942,7 +27942,7 @@ BT 0 0 1 rg 0 0 1 RG [-495(13)]TJ 0 g 0 G - [(,)-248(memory)-248(allocation)-248(and)-248(deallocation)-248(usually)-248(have)-247(a)-248(much)]TJ 1.005 0 0 1 150.705 296.63 Tm [(lar)18(ger)-247(over)18(head,)-248(signi\002cantly)-247(af)18(fecting)-247(performance.)-308(T)92(o)-247(alleviate)-247(this)-247(pr)17(o)1(blem)]TJ 1.02 0 0 1 150.286 284.674 Tm [(we)-301(de\002ne)-300(this)-301(method)-300(that)-301(pr)18(eallocates)-301(internal)-300(storage;)-329(it)-300(is)-301(intended)-300(to)-301(be)]TJ 1.02 0 0 1 150.705 272.719 Tm [(invoked)-348(prior)-349(to)-348(the)-348(iterative)-349(solver)-348(method,)-374(so)-349(that)-348(the)-348(necessary)-349(internal)]TJ 1 0 0 1 150.705 260.764 Tm [(scratch)-250(storage)-250(is)-250(available)-250(thr)18(oughout)-250(the)-250(iterative)-250(method)-250(application.)]TJ 1.002 0 0 1 165.649 248.809 Tm [(When)-250(using)-251(GPUs)-250(or)-251(other)-250(specialized)-250(devices,)-251(the)]TJ/F131 9.9626 Tf 1 0 0 1 393.064 248.809 Tm [(vmold)]TJ/F84 9.9626 Tf 1.002 0 0 1 421.715 248.809 Tm [(ar)18(gument)-250(is)-251(also)]TJ 1.019 0 0 1 150.705 236.854 Tm [(necessary)-246(to)-246(ensur)17(e)-246(the)-246(internal)-246(work)-246(vectors)-247(ar)18(e)-246(of)-246(the)-246(appr)17(opriate)-246(dynamic)]TJ 1 0 0 1 150.705 224.899 Tm [(type)-250(to)-250(exploit)-250(the)-250(accelerator)-250(har)18(dwar)18(e.)]TJ + [(,)-248(memory)-248(allocation)-248(and)-248(deallocation)-248(usually)-248(have)-247(a)-248(much)]TJ 1.005 0 0 1 150.705 296.63 Tm [(lar)18(ger)-247(over)18(head,)-248(signi\002cantly)-247(af)18(fecting)-247(performance.)-308(T)92(o)-247(alleviate)-247(this)-247(pr)17(o)1(blem)]TJ 1.02 0 0 1 150.286 284.674 Tm [(we)-301(de\002ne)-300(this)-301(method)-300(that)-301(pr)18(eallocates)-301(internal)-300(storage;)-329(it)-300(is)-301(intended)-300(to)-301(be)]TJ 0.98 0 0 1 150.705 272.719 Tm [(invoked)-244(prior)-245(to)-244(the)-245(iterative)-244(solver)-245(method,)-246(so)-245(that)-244(the)-245(necessary)-244(internal)-245(work)]TJ 1 0 0 1 150.705 260.764 Tm [(storage)-250(is)-250(available)-250(thr)18(oughout)-250(the)-250(iterative)-250(method)-250(application.)]TJ 1.002 0 0 1 165.649 248.809 Tm [(When)-250(using)-251(GPUs)-250(or)-251(other)-250(specialized)-250(devices,)-251(the)]TJ/F131 9.9626 Tf 1 0 0 1 393.064 248.809 Tm [(vmold)]TJ/F84 9.9626 Tf 1.002 0 0 1 421.715 248.809 Tm [(ar)18(gument)-250(is)-251(also)]TJ 1.019 0 0 1 150.705 236.854 Tm [(necessary)-246(to)-246(ensur)17(e)-246(the)-246(internal)-246(work)-246(vectors)-247(ar)18(e)-246(of)-246(the)-246(appr)17(opriate)-246(dynamic)]TJ 1 0 0 1 150.705 224.899 Tm [(type)-250(to)-250(exploit)-250(the)-250(accelerator)-250(har)18(dwar)18(e.)]TJ 0 g 0 G 164.383 -134.461 Td [(160)]TJ 0 g 0 G @@ -38242,8 +38242,8 @@ endobj 2461 0 obj << /Title (Parallel Sparse BLAS V. 3.9.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) /Producer ($Id$) /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() -/CreationDate (D:20250224095556+01'00') -/ModDate (D:20250224095556+01'00') +/CreationDate (D:20250224110129+01'00') +/ModDate (D:20250224110129+01'00') /Trapped /False /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023/Fedora 40) kpathsea version 6.3.5) >> @@ -38724,7 +38724,7 @@ endobj /W [1 3 1] /Root 2460 0 R /Info 2461 0 R -/ID [<6D01D2DBC8F2DF5E38E518C024236DD8> <6D01D2DBC8F2DF5E38E518C024236DD8>] +/ID [ ] /Length 12315 >> stream @@ -38745,16 +38745,16 @@ stream  cC# !"%&*$')0+G,-./641278?5 B9:;<=>H@(fABCDEFGDJKLOI=EMYPXQRSTUVWX[\_Zfg]abJ`rWcJJJJJJJ J |J J EJJJJ ȖJJJJJJJJJJ JJJJJ"J#J&J!J$J*J' J(J,J/J+J-FJ4J01J1J2J3J6J9J55&J7J;J?J:LJ<J>JAJEJ@Y8JBJDJGJKJFmVJHJJJMJQJLzfJNJPGJSJXJRxJTJVJWJZJ_JYqJ[J]J^JaJ`Jb!-     H !! A'" B#%&-( Y)+,4. lV/12385 ~16I<9 :@= >DA BHE FLI JRM NOPQ)JVS zT\W XYZ[`]!^#a!bc##"X##!S*## #!kg## # -# K## !w###!######!## #!\#####$#!!x#"#,#%!#&#'#((#)#*#+L#1#-"K#.#0#6#2"$#3#5#8#9#<#7":#:#?#@#A#="U#>#G#B"~#C#E#F#M#H"#I#K#LM#S#N"|#O#Q#R#Y#T"#U#W#X#]#Z"D#[#_#^"#`#b#c$j"f " -  N"g###-Z!"&$#J%*'#M[(,-.2+#S/1O>4567<:3#g8?;#xK={VABCDG@#EJH#ILMNOPSK#АQUXT#VP\Y#bZ^_b]#m`c$?%i  - $Il $V$XQ "$y!$&#$D%()*20'$f+-./3461$597$8;=>?@AE:$*B<R  -GH%AI%paSOF% -IJMNUcYP%.TVWXQR%P{{ -Z%RMb{{{{{{{{{ [\%zU]^_%{]`%|S%}]%7'v%{{{{{ %{{{{ { {{{&.{&O&E&{"{$&T!{ {&{&3{!{#{%{{{{{&_*{&`&&a6&z&{.{1{'&{/{0S{({)&>{*{+{,&{-&&&{4{2&{3{8{5&{6{?{9' -{:{={J{@'!{A{Q{K'7 {L{a{R'A{S{T{U{^{`T {b'^{c *u '''w'g(t( (H(k (<!(T")#)m$)s#%):&*(*-*3v*9*@*H*O.*Um*[*_>*c*i*oUVWXYZ[\**v < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c D+ D D D D D D D D D  D - D  D  D  D D D D D D D D D D D D D D D D D D D  D! D" D# D$ D% D& D' D( D) D* D+ D, D- D. D/ D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 D: D; D< D= D> D? D@ DA DB DC DD DE DF DG DH DI DJ DK DL DM DN DO DP DQ DR DS DT DU DV DW DX+Y+^i +# K## !w###!######!## #!\#####$#!!x#"#,#%!#&#'#((#)#*#+L#1#-"K#.#0#6#2"$#3#5#8#9#<#7":#:#?#@#A#="U#>#G#B"~#C#E#F#M#H"#I#K#LM#S#N"|#O#Q#R#Y#T"#U#W#X#]#Z"D#[#_#^"#`#b#c$g"f " +  N"g###-Z!"&$#J%*'#M[(,-.2+#S/1O>4567<:3#g8?;#xK={VABCDG@#EJH#ILMNOPSK#АQUXT#VP\Y#bZ^_b]#m`c$?%f  + $Ii $V$XQ "$y!$&#$A%()*20'$c+-./3461$597$8;=>?@AE:$'B<R  +GH%AI%paSOF% +FJMNUcYP%.TVWXQR%P{{ +Z%RJb{{{{{{{{{ [\%zR]^_%{Z`%|P%}Z%4'v%{{{{{ %ֽ{{{{ { {{{&+{&L&B&{"{$&T{ {&{&3{!{#{%{{{{{&_'{&`#&a3&z&{.{1{'&{/{0S{({)&;{*{+{,&{-&&&{4{2&{3{8{5&{6{?{9' +{:{={J{@'!{A{Q{K'7{L{a{R'A{S{T{U{^{`T {b'^{c *u '''t'd(q( (H(h (9!(Q")#)j$)s %)7&*(*- *3s*9*@*H*O+*Uj*[*_;*c*i*oUVWXYZ[\**s < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c D+ D D D D D D D D D  D + D  D  D  D D D D D D D D D D D D D D D D D D D  D! D" D# D$ D% D& D' D( D) D* D+ D, D- D. D/ D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 D: D; D< D= D> D? D@ DA DB DC DD DE DF DG DH DI DJ DK DL DM DN DO DP DQ DR DS DT DU DV DW DX+V+^f endstream endobj startxref -2842217 +2842214 %%EOF diff --git a/docs/src/precs.tex b/docs/src/precs.tex index 832e4fe6..c872aa03 100644 --- a/docs/src/precs.tex +++ b/docs/src/precs.tex @@ -467,7 +467,7 @@ e.g. the CUDA enabled data strutures of Sec.~\ref{sec:cudastruct} and much larger overhead, significantly affecting performance. To alleviate this problem we define this method that preallocates internal storage; it is intended to be invoked prior to the iterative -solver method, so that the necessary internal scratch storage is +solver method, so that the necessary internal work storage is available throughout the iterative method application. When using GPUs or other specialized devices, the \fortinline|vmold|