Merge branch 'repackage' into development

7 months ago · de27c8f616
parent 497cd31018 08a69985c8
commit de27c8f616
98 changed files with 45784 additions and 18499 deletions
--- a/README.md
+++ b/README.md
@ -25,7 +25,7 @@ Harwell-Boeing and MatrixMarket file formats.

 DOCUMENTATION
 -------------
-See docs/psblas-3.8.pdf; an HTML version of the same document is
+See docs/psblas-3.9.pdf; an HTML version of the same document is
 available in docs/html. Please consult the sample programs, especially
 test/pargen/psb_[sd]_pde[23]d.f90

--- a/config/pac.m4
+++ b/config/pac.m4
@ -2018,6 +2018,75 @@ CPPFLAGS="$SAVE_CPPFLAGS";
 ])dnl 


+dnl @synopsis PAC_ARG_WITH_LIBRSB
+dnl
+dnl Test for --with-librsb="pathname".
+dnl 
+dnl Defines the path to LIBRSB build dir.
+dnl
+dnl note: Renamed after PAC_ARG_WITH_LIBS as in the Trilinos package.
+dnl
+dnl Example use:
+dnl
+dnl PAC_ARG_WITH_LIBRSB
+dnl 
+dnl tests for --with-librsb and pre-pends to LIBRSB_PATH
+dnl
+dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
+dnl
+
+AC_DEFUN(PAC_ARG_WITH_LIBRSB,
+	 [SAVE_LIBS="$LIBS"
+	  SAVE_CPPFLAGS="$CPPFLAGS"
+
+	  AC_ARG_WITH(librsb,
+	  AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
+ 	  --with-librsb=/opt/packages/librsb]),
+	  [pac_cv_librsb_dir=$withval],
+	  [pac_cv_librsb_dir=''])
+	  
+	  if test "x$pac_cv_librsb_dir" != "x"; then 
+	  LIBS="-L$pac_cv_librsb_dir $LIBS"
+	  RSB_INCLUDES="-I$pac_cv_librsb_dir"
+	  # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
+	  RSB_LIBDIR="-L$pac_cv_librsb_dir"
+	  fi
+	  #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
+	  AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
+			  [pac_rsb_header_ok=yes],
+			  [pac_rsb_header_ok=no; RSB_INCLUDES=""])
+	  
+	  if test "x$pac_rsb_header_ok" == "xyes" ; then 
+	  RSB_LIBS="-lrsb $RSB_LIBDIR"
+	  # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
+	  # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
+	  # AC_TRY_LINK_FUNC(spgpuCreate, 
+	  # 		   [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
+	  # 		   [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
+	  # AC_MSG_RESULT($pac_gpu_lib_ok)
+	  # if test "x$pac_cv_have_spgpu" == "xyes" ; then 
+	  # AC_MSG_NOTICE([Have found SPGPU])
+	  RSBLIBNAME="librsb.a";
+	  LIBRSB_DIR="$pac_cv_librsb_dir";
+	  # SPGPU_DEFINES="-DHAVE_SPGPU";
+	  LIBRSB_INCDIR="$LIBRSB_DIR";
+	  LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
+	  LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
+	  # CUDA_DIR="$pac_cv_cuda_dir";
+	  LIBRSB_DEFINES="-DHAVE_RSB";
+	  LRSB=-lpsb_rsb
+	  # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
+	  # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
+	  FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
+	  CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
+	  fi
+#  fi
+LIBS="$SAVE_LIBS"
+CPPFLAGS="$SAVE_CPPFLAGS"
+])
+dnl
+
+
 dnl @synopsis PAC_CHECK_SPGPU
 dnl
 dnl Will try to find the spgpu library and headers.
--- a/configure.ac
+++ b/configure.ac
@ -204,7 +204,7 @@ PAC_ARG_WITH_FLAGS(module-path,MODULE_PATH)

 # we just gave the user the chance to append values to these variables

-###############################################################################
+

 dnl  Library oriented Autotools facilities (we don't care about this for now)

@ -845,6 +845,30 @@ if test "x$pac_cv_ipk_size" != "x4"; then
 fi


+###############################################################################
+PAC_ARG_WITH_LIBRSB
+LIBRSB_DIR="$pac_cv_librsb_dir";
+AC_MSG_CHECKING([for LIBRSB install dir])
+case $LIBRSB_DIR in 
+     /*) ;; 
+     *) dnl AC_MSG_ERROR([The LIBRSB installation dir must be an absolute pathname
+ dnl specified with --with-librsb=/path/to/librsb])
+esac
+dnl if test ! -d "$LIBRSB_DIR" ; then 
+dnl    AC_MSG_ERROR([Could not find LIBRSB build dir $LIBRSB_DIR!])	
+dnl fi
+pac_cv_status_file="$LIBRSB_DIR/librsb.a"
+if test ! -f "$pac_cv_status_file" ; then 
+  AC_MSG_RESULT([no])
+  #AC_MSG_ERROR([Could not find an installation  in $LIBRSB_DIR.])
+else
+  AC_MSG_RESULT([$LIBRSB_DIR])
+  RSBTARGETLIB=rsbd;
+  RSBTARGETOBJ=rsbobj;	
+fi
+
+
+


 ###############################################################################
@ -945,6 +969,14 @@ AC_SUBST(CUDEFINES)
 AC_SUBST(CUDAD)
 AC_SUBST(CUDALD)
 AC_SUBST(LCUDA)
+AC_SUBST(LIBRSB_LIBS)
+AC_SUBST(LIBRSB_INCLUDES)
+AC_SUBST(LIBRSB_INCDIR)
+AC_SUBST(LIBRSB_DIR)
+AC_SUBST(LIBRSB_DEFINES)
+AC_SUBST(LRSB)
+
+
 ###############################################################################
 # the following files will be created by Automake

--- a/docs/html/cmsy10-42.png
+++ b/docs/html/cmsy10-42.png
--- a/docs/html/cmsy10-48.png
+++ b/docs/html/cmsy10-48.png
--- a/docs/html/cmsy10-49.png
+++ b/docs/html/cmsy10-49.png
--- a/docs/html/dia.png
+++ b/docs/html/dia.png
--- a/docs/html/ell.png
+++ b/docs/html/ell.png
--- a/docs/html/hdia.png
+++ b/docs/html/hdia.png
--- a/docs/html/hll.png
+++ b/docs/html/hll.png
--- a/docs/html/index.html
+++ b/docs/html/index.html
@ -10,7 +10,7 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-<!--l. 90--><p class="noindent" ><span 
+<!--l. 91--><p class="noindent" ><span 
 class="cmbx-12x-x-144">PSBLAS</span><br 
 class="newline" /> <span 
 class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br 
@ -20,8 +20,8 @@ class="newline" /> <span
 class="cmbx-10">Salvatore Filippone</span><br 
 class="newline" /><span 
 class="cmbx-10">Alfredo Buttari  </span><br 
-class="newline" />Software version: 3.8.0<br 
-class="newline" />May 1st, 2022
+class="newline" />Software version: 3.9.0<br 
+class="newline" />Aug 1st, 2024
                                                                  

                                                                  
@ -29,219 +29,39 @@ class="newline" />May 1st, 2022

                                                                  
   <div class="tableofcontents">
-   <span class="likesectionToc" ><a 
+   &#x00A0;<span class="likesectionToc" ><a 
 href="userhtmlli1.html#x2-1000" id="QQ2-2-1">Contents</a></span>
-<br />   <span class="sectionToc" >1 <a 
+<br />   &#x00A0;<span class="sectionToc" >1 <a 
 href="userhtmlse1.html#x3-20001" id="QQ2-3-2">Introduction</a></span>
-<br />   <span class="sectionToc" >2 <a 
+<br />   &#x00A0;<span class="sectionToc" >2 <a 
 href="userhtmlse2.html#x4-30002" id="QQ2-4-3">General overview</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.1 <a 
-href="userhtmlsu1.html#x6-40002.1" id="QQ2-6-5">Basic Nomenclature</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.2 <a 
-href="userhtmlsu2.html#x8-50002.2" id="QQ2-8-7">Library contents</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.3 <a 
-href="userhtmlsu3.html#x9-60002.3" id="QQ2-9-8">Application structure</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.4 <a 
-href="userhtmlsu4.html#x11-80002.4" id="QQ2-11-10">Programming model</a></span>
-<br />   <span class="sectionToc" >3 <a 
-href="userhtmlse3.html#x12-90003" id="QQ2-12-11">Data Structures and Classes</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.1 <a 
-href="userhtmlsu5.html#x13-100003.1" id="QQ2-13-12">Descriptor data structure</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.2 <a 
-href="userhtmlsu6.html#x14-260003.2" id="QQ2-14-29">Sparse Matrix class</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.3 <a 
-href="userhtmlsu7.html#x15-460003.3" id="QQ2-15-50">Dense Vector Data Structure</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.4 <a 
-href="userhtmlsu8.html#x16-530003.4" id="QQ2-16-58">Preconditioner data structure</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.5 <a 
-href="userhtmlsu9.html#x17-540003.5" id="QQ2-17-60">Heap data structure</a></span>
-<br />   <span class="sectionToc" >4 <a 
-href="userhtmlse4.html#x18-550004" id="QQ2-18-61">Computational routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.1 <a 
-href="userhtmlsu10.html#x19-560004.1" id="QQ2-19-62">psb_geaxpby &#8212; General Dense Matrix Sum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.2 <a 
-href="userhtmlsu11.html#x20-570004.2" id="QQ2-20-64">psb_gedot &#8212; Dot Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.3 <a 
-href="userhtmlsu12.html#x21-580004.3" id="QQ2-21-66">psb_gedots &#8212; Generalized Dot Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.4 <a 
-href="userhtmlsu13.html#x22-590004.4" id="QQ2-22-68">psb_normi &#8212; Infinity-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.5 <a 
-href="userhtmlsu14.html#x23-600004.5" id="QQ2-23-70">psb_geamaxs &#8212; Generalized Infinity Norm</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.6 <a 
-href="userhtmlsu15.html#x24-610004.6" id="QQ2-24-72">psb_norm1 &#8212; 1-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.7 <a 
-href="userhtmlsu16.html#x25-620004.7" id="QQ2-25-74">psb_geasums &#8212; Generalized 1-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.8 <a 
-href="userhtmlsu17.html#x26-630004.8" id="QQ2-26-76">psb_norm2 &#8212; 2-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.9 <a 
-href="userhtmlsu18.html#x27-640004.9" id="QQ2-27-78">psb_genrm2s &#8212; Generalized 2-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.10 <a 
-href="userhtmlsu19.html#x28-650004.10" id="QQ2-28-80">psb_norm1 &#8212; 1-Norm of Sparse Matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.11 <a 
-href="userhtmlsu20.html#x29-660004.11" id="QQ2-29-82">psb_normi &#8212; Infinity Norm of Sparse Matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.12 <a 
-href="userhtmlsu21.html#x30-670004.12" id="QQ2-30-84">psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.13 <a 
-href="userhtmlsu22.html#x31-680004.13" id="QQ2-31-86">psb_spsm &#8212; Triangular System Solve</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.14 <a 
-href="userhtmlsu23.html#x32-690004.14" id="QQ2-32-88">psb_gemlt &#8212; Entrywise Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.15 <a 
-href="userhtmlsu24.html#x33-700004.15" id="QQ2-33-90">psb_gediv &#8212; Entrywise Division</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.16 <a 
-href="userhtmlsu25.html#x34-710004.16" id="QQ2-34-92">psb_geinv &#8212; Entrywise Inversion</a></span>
-<br />   <span class="sectionToc" >5 <a 
-href="userhtmlse5.html#x35-720005" id="QQ2-35-94">Communication routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.1 <a 
-href="userhtmlsu26.html#x36-730005.1" id="QQ2-36-95">psb_halo &#8212; Halo Data Communication</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.2 <a 
-href="userhtmlsu27.html#x37-740005.2" id="QQ2-37-98">psb_ovrl &#8212; Overlap Update</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.3 <a 
-href="userhtmlsu28.html#x38-750005.3" id="QQ2-38-101">psb_gather &#8212; Gather Global Dense Matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.4 <a 
-href="userhtmlsu29.html#x39-760005.4" id="QQ2-39-103">psb_scatter &#8212; Scatter Global Dense Matrix</a></span>
-<br />   <span class="sectionToc" >6 <a 
-href="userhtmlse6.html#x40-770006" id="QQ2-40-105">Data management routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.1 <a 
-href="userhtmlsu30.html#x41-780006.1" id="QQ2-41-106">psb_cdall &#8212; Allocates a communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.2 <a 
-href="userhtmlsu31.html#x42-790006.2" id="QQ2-42-107">psb_cdins &#8212; Communication descriptor insert routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.3 <a 
-href="userhtmlsu32.html#x43-800006.3" id="QQ2-43-108">psb_cdasb &#8212; Communication descriptor assembly routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.4 <a 
-href="userhtmlsu33.html#x44-810006.4" id="QQ2-44-109">psb_cdcpy &#8212; Copies a communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.5 <a 
-href="userhtmlsu34.html#x45-820006.5" id="QQ2-45-110">psb_cdfree &#8212; Frees a communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.6 <a 
-href="userhtmlsu35.html#x46-830006.6" id="QQ2-46-111">psb_cdbldext &#8212; Build an extended communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.7 <a 
-href="userhtmlsu36.html#x47-840006.7" id="QQ2-47-112">psb_spall &#8212; Allocates a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.8 <a 
-href="userhtmlsu37.html#x48-850006.8" id="QQ2-48-113">psb_spins &#8212; Insert a set of coefficients into a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.9 <a 
-href="userhtmlsu38.html#x49-860006.9" id="QQ2-49-114">psb_spasb &#8212; Sparse matrix assembly routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.10 <a 
-href="userhtmlsu39.html#x50-870006.10" id="QQ2-50-115">psb_spfree &#8212; Frees a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.11 <a 
-href="userhtmlsu40.html#x51-880006.11" id="QQ2-51-116">psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.12 <a 
-href="userhtmlsu41.html#x52-890006.12" id="QQ2-52-117">psb_geall &#8212; Allocates a dense matrix</a></span>
-                                                                  
+<br />   &#x00A0;<span class="sectionToc" >3 <a 
+href="userhtmlse3.html#x8-90003" id="QQ2-8-11">Data Structures and Classes</a></span>
+<br />   &#x00A0;<span class="sectionToc" >4 <a 
+href="userhtmlse4.html#x9-550004" id="QQ2-9-61">Computational routines</a></span>
+<br />   &#x00A0;<span class="sectionToc" >5 <a 
+href="userhtmlse5.html#x10-720005" id="QQ2-10-94">Communication routines</a></span>
+<br />   &#x00A0;<span class="sectionToc" >6 <a 
+href="userhtmlse6.html#x11-770006" id="QQ2-11-105">Data management routines</a></span>
+<br />   &#x00A0;<span class="sectionToc" >7 <a 
+href="userhtmlse7.html#x12-1050007" id="QQ2-12-133">Parallel environment routines</a></span>
+<br />   &#x00A0;<span class="sectionToc" >8 <a 
+href="userhtmlse8.html#x13-1230008" id="QQ2-13-151">Error handling</a></span>
+<br />   &#x00A0;<span class="sectionToc" >9 <a 
+href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
+<br />   &#x00A0;<span class="sectionToc" >10 <a 
+href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
+<br />   &#x00A0;<span class="sectionToc" >11 <a 
+href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
+<br />   &#x00A0;<span class="sectionToc" >12 <a 
+href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
+<br />   &#x00A0;<span class="sectionToc" >13 <a 
+href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
+<br />   &#x00A0;<span class="likesectionToc" ><a 
+href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
+   </div>

-                                                                  
-<br />   &#x00A0;<span class="subsectionToc" >6.13 <a 
-href="userhtmlsu42.html#x53-900006.13" id="QQ2-53-118">psb_geins &#8212; Dense matrix insertion routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.14 <a 
-href="userhtmlsu43.html#x54-910006.14" id="QQ2-54-119">psb_geasb &#8212; Assembly a dense matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.15 <a 
-href="userhtmlsu44.html#x55-920006.15" id="QQ2-55-120">psb_gefree &#8212; Frees a dense matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.16 <a 
-href="userhtmlsu45.html#x56-930006.16" id="QQ2-56-121">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.17 <a 
-href="userhtmlsu46.html#x57-940006.17" id="QQ2-57-122">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.18 <a 
-href="userhtmlsu47.html#x58-950006.18" id="QQ2-58-123">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.19 <a 
-href="userhtmlsu48.html#x59-960006.19" id="QQ2-59-124">psb_is_owned &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.20 <a 
-href="userhtmlsu49.html#x60-970006.20" id="QQ2-60-125">psb_owned_index &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.21 <a 
-href="userhtmlsu50.html#x61-980006.21" id="QQ2-61-126">psb_is_local &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.22 <a 
-href="userhtmlsu51.html#x62-990006.22" id="QQ2-62-127">psb_local_index &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.23 <a 
-href="userhtmlsu52.html#x63-1000006.23" id="QQ2-63-128">psb_get_boundary &#8212; Extract list of boundary elements</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.24 <a 
-href="userhtmlsu53.html#x64-1010006.24" id="QQ2-64-129">psb_get_overlap &#8212; Extract list of overlap elements</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.25 <a 
-href="userhtmlsu54.html#x65-1020006.25" id="QQ2-65-130">psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.26 <a 
-href="userhtmlsu55.html#x66-1030006.26" id="QQ2-66-131">psb_sizeof &#8212; Memory occupation</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.27 <a 
-href="userhtmlsu56.html#x67-1040006.27" id="QQ2-67-132">Sorting utilities &#8212; </a></span>
-<br />   <span class="sectionToc" >7 <a 
-href="userhtmlse7.html#x68-1050007" id="QQ2-68-133">Parallel environment routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.1 <a 
-href="userhtmlsu57.html#x69-1060007.1" id="QQ2-69-134">psb_init &#8212; Initializes PSBLAS parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.2 <a 
-href="userhtmlsu58.html#x70-1070007.2" id="QQ2-70-135">psb_info &#8212; Return information about PSBLAS parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.3 <a 
-href="userhtmlsu59.html#x71-1080007.3" id="QQ2-71-136">psb_exit &#8212; Exit from PSBLAS parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.4 <a 
-href="userhtmlsu60.html#x72-1090007.4" id="QQ2-72-137">psb_get_mpi_comm &#8212; Get the MPI communicator</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.5 <a 
-href="userhtmlsu61.html#x73-1100007.5" id="QQ2-73-138">psb_get_mpi_rank &#8212; Get the MPI rank</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.6 <a 
-href="userhtmlsu62.html#x74-1110007.6" id="QQ2-74-139">psb_wtime &#8212; Wall clock timing</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.7 <a 
-href="userhtmlsu63.html#x75-1120007.7" id="QQ2-75-140">psb_barrier &#8212; Sinchronization point parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.8 <a 
-href="userhtmlsu64.html#x76-1130007.8" id="QQ2-76-141">psb_abort &#8212; Abort a computation</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.9 <a 
-href="userhtmlsu65.html#x77-1140007.9" id="QQ2-77-142">psb_bcast &#8212; Broadcast data</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.10 <a 
-href="userhtmlsu66.html#x78-1150007.10" id="QQ2-78-143">psb_sum &#8212; Global sum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.11 <a 
-href="userhtmlsu67.html#x79-1160007.11" id="QQ2-79-144">psb_max &#8212; Global maximum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.12 <a 
-href="userhtmlsu68.html#x80-1170007.12" id="QQ2-80-145">psb_min &#8212; Global minimum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.13 <a 
-href="userhtmlsu69.html#x81-1180007.13" id="QQ2-81-146">psb_amx &#8212; Global maximum absolute value</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.14 <a 
-href="userhtmlsu70.html#x82-1190007.14" id="QQ2-82-147">psb_amn &#8212; Global minimum absolute value</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.15 <a 
-href="userhtmlsu71.html#x83-1200007.15" id="QQ2-83-148">psb_nrm2 &#8212; Global 2-norm reduction</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.16 <a 
-href="userhtmlsu72.html#x84-1210007.16" id="QQ2-84-149">psb_snd &#8212; Send data</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.17 <a 
-href="userhtmlsu73.html#x85-1220007.17" id="QQ2-85-150">psb_rcv &#8212; Receive data</a></span>
-<br />   <span class="sectionToc" >8 <a 
-href="userhtmlse8.html#x86-1230008" id="QQ2-86-151">Error handling</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.1 <a 
-href="userhtmlsu74.html#x87-1240008.1" id="QQ2-87-154">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.2 <a 
-href="userhtmlsu75.html#x88-1250008.2" id="QQ2-88-155">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.3 <a 
-href="userhtmlsu76.html#x89-1260008.3" id="QQ2-89-156">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.4 <a 
-href="userhtmlsu77.html#x90-1270008.4" id="QQ2-90-157">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
-<br />   <span class="sectionToc" >9 <a 
-href="userhtmlse9.html#x91-1280009" id="QQ2-91-158">Utilities</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.1 <a 
-href="userhtmlsu78.html#x92-1290009.1" id="QQ2-92-159"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.2 <a 
-href="userhtmlsu79.html#x93-1300009.2" id="QQ2-93-160">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.3 <a 
-href="userhtmlsu80.html#x94-1310009.3" id="QQ2-94-161">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.4 <a 
-href="userhtmlsu81.html#x95-1320009.4" id="QQ2-95-162">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.5 <a 
-href="userhtmlsu82.html#x96-1330009.5" id="QQ2-96-163">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.6 <a 
-href="userhtmlsu83.html#x97-1340009.6" id="QQ2-97-164">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
-<br />   <span class="sectionToc" >10 <a 
-href="userhtmlse10.html#x98-13500010" id="QQ2-98-165">Preconditioner routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.1 <a 
-href="userhtmlsu84.html#x99-13600010.1" id="QQ2-99-166">init &#8212; Initialize a preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.2 <a 
-href="userhtmlsu85.html#x101-13700010.2" id="QQ2-101-167">build &#8212; Builds a preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.3 <a 
-href="userhtmlsu86.html#x102-13800010.3" id="QQ2-102-168">apply &#8212; Preconditioner application routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.4 <a 
-href="userhtmlsu87.html#x103-13900010.4" id="QQ2-103-169">descr &#8212; Prints a description of current preconditioner</a></span>
-                                                                  

-                                                                  
-<br />   &#x00A0;<span class="subsectionToc" >10.5 <a 
-href="userhtmlsu88.html#x104-14000010.5" id="QQ2-104-170">clone &#8212; clone current preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.6 <a 
-href="userhtmlsu89.html#x105-14100010.6" id="QQ2-105-171">free &#8212; Free a preconditioner</a></span>
-<br />   <span class="sectionToc" >11 <a 
-href="userhtmlse11.html#x106-14200011" id="QQ2-106-172">Iterative Methods</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >11.1 <a 
-href="userhtmlsu90.html#x107-14300011.1" id="QQ2-107-173">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
-<br />   <span class="likesectionToc" ><a 
-href="userhtmlli2.html#x109-14400011.1" id="QQ2-109-174">References</a></span>
-   </div>



--- a/docs/html/mat.png
+++ b/docs/html/mat.png
--- a/docs/html/psblaslibraryext.png
+++ b/docs/html/psblaslibraryext.png
--- a/docs/html/userhtml.css
+++ b/docs/html/userhtml.css
@ -33,21 +33,26 @@ p.indent{text-indent:0;}
 p + p{margin-top:1em;}
 p + div, p + pre {margin-top:1em;}
 div + p, pre + p {margin-top:1em;}
+a { overflow-wrap: break-word; word-wrap: break-word; word-break: break-word; hyphens: auto; }
@media print {div.crosslinks {visibility:hidden;}}
+table.tabular{border-collapse: collapse; border-spacing: 0;}
 a img { border-top: 0; border-left: 0; border-right: 0; }
 center { margin-top:1em; margin-bottom:1em; }
 td center { margin-top:0em; margin-bottom:0em; }
 .Canvas { position:relative; }
 img.math{vertical-align:middle;}
+div.par-math-display, div.math-display{text-align:center;}
 li p.indent { text-indent: 0em }
 li p:first-child{ margin-top:0em; }
 li p:last-child, li div:last-child { margin-bottom:0.5em; }
+li p:first-child{ margin-bottom:0; }
 li p~ul:last-child, li p~ol:last-child{ margin-bottom:0.5em; }
 .enumerate1 {list-style-type:decimal;}
 .enumerate2 {list-style-type:lower-alpha;}
 .enumerate3 {list-style-type:lower-roman;}
 .enumerate4 {list-style-type:upper-alpha;}
 div.newtheorem { margin-bottom: 2em; margin-top: 2em;}
+div.newtheorem .head{font-weight: bold;}
 .obeylines-h,.obeylines-v {white-space: nowrap; }
 div.obeylines-v p { margin-top:0; margin-bottom:0; }
 .overline{ text-decoration:overline; }
@ -91,6 +96,9 @@ table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; }
 .hline hr, .cline hr{ height : 0px; margin:0px; }
 .hline td, .cline td{ padding: 0; }
 .hline hr, .cline hr{border:none;border-top:1px solid black;}
+.hline {border-top: 1px solid black;}
+.hline + .vspace:last-child{display:none;}
+.hline:first-child{border-bottom:1px solid black;border-top:none;}
 .tabbing-right {text-align:right;}
 div.float, div.figure {margin-left: auto; margin-right: auto;}
 div.float img {text-align:center;}
@ -115,15 +123,16 @@ table.pmatrix {width:100%;}
 span.bar-css {text-decoration:overline;}
 img.cdots{vertical-align:middle;}
 .partToc a, .partToc, .likepartToc a, .likepartToc {line-height: 200%; font-weight:bold; font-size:110%;}
+.chapterToc a, .chapterToc, .likechapterToc a, .likechapterToc, .appendixToc a, .appendixToc {line-height: 200%; font-weight:bold;}
 .index-item, .index-subitem, .index-subsubitem {display:block}
 div.caption {text-indent:-2em; margin-left:3em; margin-right:1em; text-align:left;}
 div.caption span.id{font-weight: bold; white-space: nowrap; }
 h1.partHead{text-align: center}
 p.bibitem { text-indent: -2em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; }
 p.bibitem-p { text-indent: 0em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; }
+.subsubsectionHead, .likesubsubsectionHead { font-size: 1em; }
 .paragraphHead, .likeparagraphHead { margin-top:2em; font-weight: bold;}
 .subparagraphHead, .likesubparagraphHead { font-weight: bold;}
-.quote {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; margin-right:1em; text-align:justify;}
 .verse{white-space:nowrap; margin-left:2em}
 div.maketitle {text-align:center;}
 h2.titleHead{text-align:center;}
@ -131,19 +140,23 @@ div.maketitle{ margin-bottom: 2em; }
 div.author, div.date {text-align:center;}
 div.thanks{text-align:left; margin-left:10%; font-size:85%; font-style:italic; }
 div.author{white-space: nowrap;}
-.quotation {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; }
-.abstract p {margin-left:5%; margin-right:5%;}
+div.abstract p {margin-left:5%; margin-right:5%;}
 div.abstract {width:100%;}
+.abstracttitle{text-align:center;margin-bottom:1em;}
 .subsectionToc, .likesubsectionToc {margin-left:2em;}
 .subsubsectionToc, .likesubsubsectionToc {margin-left:4em;}
+.paragraphToc, .likeparagraphToc {margin-left:6em;}
+.subparagraphToc, .likesubparagraphToc {margin-left:8em;}
 .ovalbox { padding-left:3pt; padding-right:3pt; border:solid thin; }
 .Ovalbox-thick { padding-left:3pt; padding-right:3pt; border:solid thick; }
 .shadowbox { padding-left:3pt; padding-right:3pt; border:solid thin; border-right:solid thick; border-bottom:solid thick; }
 .doublebox { padding-left:3pt; padding-right:3pt; border-style:double; border:solid thick; }
 .rotatebox{display: inline-block;}
+code.lstinline{font-family:monospace,monospace;}
+pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; }
 .lstlisting .label{margin-right:0.5em; }
-div.lstlisting{font-family: monospace,monospace; white-space: nowrap; margin-top:0.5em; margin-bottom:0.5em; }
-div.lstinputlisting{ font-family: monospace,monospace; white-space: nowrap; }
+pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; }
+pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; }
 .lstinputlisting .label{margin-right:0.5em;}
 /* end css.sty */

--- a/docs/html/userhtml.html
+++ b/docs/html/userhtml.html
@ -10,7 +10,7 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-<!--l. 90--><p class="noindent" ><span 
+<!--l. 91--><p class="noindent" ><span 
 class="cmbx-12x-x-144">PSBLAS</span><br 
 class="newline" /> <span 
 class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br 
@ -20,8 +20,8 @@ class="newline" /> <span
 class="cmbx-10">Salvatore Filippone</span><br 
 class="newline" /><span 
 class="cmbx-10">Alfredo Buttari  </span><br 
-class="newline" />Software version: 3.8.0<br 
-class="newline" />May 1st, 2022
+class="newline" />Software version: 3.9.0<br 
+class="newline" />Aug 1st, 2024
                                                                  

                                                                  
@ -29,219 +29,39 @@ class="newline" />May 1st, 2022

                                                                  
   <div class="tableofcontents">
-   <span class="likesectionToc" ><a 
+   &#x00A0;<span class="likesectionToc" ><a 
 href="userhtmlli1.html#x2-1000" id="QQ2-2-1">Contents</a></span>
-<br />   <span class="sectionToc" >1 <a 
+<br />   &#x00A0;<span class="sectionToc" >1 <a 
 href="userhtmlse1.html#x3-20001" id="QQ2-3-2">Introduction</a></span>
-<br />   <span class="sectionToc" >2 <a 
+<br />   &#x00A0;<span class="sectionToc" >2 <a 
 href="userhtmlse2.html#x4-30002" id="QQ2-4-3">General overview</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.1 <a 
-href="userhtmlsu1.html#x6-40002.1" id="QQ2-6-5">Basic Nomenclature</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.2 <a 
-href="userhtmlsu2.html#x8-50002.2" id="QQ2-8-7">Library contents</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.3 <a 
-href="userhtmlsu3.html#x9-60002.3" id="QQ2-9-8">Application structure</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.4 <a 
-href="userhtmlsu4.html#x11-80002.4" id="QQ2-11-10">Programming model</a></span>
-<br />   <span class="sectionToc" >3 <a 
-href="userhtmlse3.html#x12-90003" id="QQ2-12-11">Data Structures and Classes</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.1 <a 
-href="userhtmlsu5.html#x13-100003.1" id="QQ2-13-12">Descriptor data structure</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.2 <a 
-href="userhtmlsu6.html#x14-260003.2" id="QQ2-14-29">Sparse Matrix class</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.3 <a 
-href="userhtmlsu7.html#x15-460003.3" id="QQ2-15-50">Dense Vector Data Structure</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.4 <a 
-href="userhtmlsu8.html#x16-530003.4" id="QQ2-16-58">Preconditioner data structure</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.5 <a 
-href="userhtmlsu9.html#x17-540003.5" id="QQ2-17-60">Heap data structure</a></span>
-<br />   <span class="sectionToc" >4 <a 
-href="userhtmlse4.html#x18-550004" id="QQ2-18-61">Computational routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.1 <a 
-href="userhtmlsu10.html#x19-560004.1" id="QQ2-19-62">psb_geaxpby &#8212; General Dense Matrix Sum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.2 <a 
-href="userhtmlsu11.html#x20-570004.2" id="QQ2-20-64">psb_gedot &#8212; Dot Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.3 <a 
-href="userhtmlsu12.html#x21-580004.3" id="QQ2-21-66">psb_gedots &#8212; Generalized Dot Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.4 <a 
-href="userhtmlsu13.html#x22-590004.4" id="QQ2-22-68">psb_normi &#8212; Infinity-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.5 <a 
-href="userhtmlsu14.html#x23-600004.5" id="QQ2-23-70">psb_geamaxs &#8212; Generalized Infinity Norm</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.6 <a 
-href="userhtmlsu15.html#x24-610004.6" id="QQ2-24-72">psb_norm1 &#8212; 1-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.7 <a 
-href="userhtmlsu16.html#x25-620004.7" id="QQ2-25-74">psb_geasums &#8212; Generalized 1-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.8 <a 
-href="userhtmlsu17.html#x26-630004.8" id="QQ2-26-76">psb_norm2 &#8212; 2-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.9 <a 
-href="userhtmlsu18.html#x27-640004.9" id="QQ2-27-78">psb_genrm2s &#8212; Generalized 2-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.10 <a 
-href="userhtmlsu19.html#x28-650004.10" id="QQ2-28-80">psb_norm1 &#8212; 1-Norm of Sparse Matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.11 <a 
-href="userhtmlsu20.html#x29-660004.11" id="QQ2-29-82">psb_normi &#8212; Infinity Norm of Sparse Matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.12 <a 
-href="userhtmlsu21.html#x30-670004.12" id="QQ2-30-84">psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.13 <a 
-href="userhtmlsu22.html#x31-680004.13" id="QQ2-31-86">psb_spsm &#8212; Triangular System Solve</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.14 <a 
-href="userhtmlsu23.html#x32-690004.14" id="QQ2-32-88">psb_gemlt &#8212; Entrywise Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.15 <a 
-href="userhtmlsu24.html#x33-700004.15" id="QQ2-33-90">psb_gediv &#8212; Entrywise Division</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.16 <a 
-href="userhtmlsu25.html#x34-710004.16" id="QQ2-34-92">psb_geinv &#8212; Entrywise Inversion</a></span>
-<br />   <span class="sectionToc" >5 <a 
-href="userhtmlse5.html#x35-720005" id="QQ2-35-94">Communication routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.1 <a 
-href="userhtmlsu26.html#x36-730005.1" id="QQ2-36-95">psb_halo &#8212; Halo Data Communication</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.2 <a 
-href="userhtmlsu27.html#x37-740005.2" id="QQ2-37-98">psb_ovrl &#8212; Overlap Update</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.3 <a 
-href="userhtmlsu28.html#x38-750005.3" id="QQ2-38-101">psb_gather &#8212; Gather Global Dense Matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.4 <a 
-href="userhtmlsu29.html#x39-760005.4" id="QQ2-39-103">psb_scatter &#8212; Scatter Global Dense Matrix</a></span>
-<br />   <span class="sectionToc" >6 <a 
-href="userhtmlse6.html#x40-770006" id="QQ2-40-105">Data management routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.1 <a 
-href="userhtmlsu30.html#x41-780006.1" id="QQ2-41-106">psb_cdall &#8212; Allocates a communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.2 <a 
-href="userhtmlsu31.html#x42-790006.2" id="QQ2-42-107">psb_cdins &#8212; Communication descriptor insert routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.3 <a 
-href="userhtmlsu32.html#x43-800006.3" id="QQ2-43-108">psb_cdasb &#8212; Communication descriptor assembly routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.4 <a 
-href="userhtmlsu33.html#x44-810006.4" id="QQ2-44-109">psb_cdcpy &#8212; Copies a communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.5 <a 
-href="userhtmlsu34.html#x45-820006.5" id="QQ2-45-110">psb_cdfree &#8212; Frees a communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.6 <a 
-href="userhtmlsu35.html#x46-830006.6" id="QQ2-46-111">psb_cdbldext &#8212; Build an extended communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.7 <a 
-href="userhtmlsu36.html#x47-840006.7" id="QQ2-47-112">psb_spall &#8212; Allocates a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.8 <a 
-href="userhtmlsu37.html#x48-850006.8" id="QQ2-48-113">psb_spins &#8212; Insert a set of coefficients into a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.9 <a 
-href="userhtmlsu38.html#x49-860006.9" id="QQ2-49-114">psb_spasb &#8212; Sparse matrix assembly routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.10 <a 
-href="userhtmlsu39.html#x50-870006.10" id="QQ2-50-115">psb_spfree &#8212; Frees a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.11 <a 
-href="userhtmlsu40.html#x51-880006.11" id="QQ2-51-116">psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.12 <a 
-href="userhtmlsu41.html#x52-890006.12" id="QQ2-52-117">psb_geall &#8212; Allocates a dense matrix</a></span>
-                                                                  
+<br />   &#x00A0;<span class="sectionToc" >3 <a 
+href="userhtmlse3.html#x8-90003" id="QQ2-8-11">Data Structures and Classes</a></span>
+<br />   &#x00A0;<span class="sectionToc" >4 <a 
+href="userhtmlse4.html#x9-550004" id="QQ2-9-61">Computational routines</a></span>
+<br />   &#x00A0;<span class="sectionToc" >5 <a 
+href="userhtmlse5.html#x10-720005" id="QQ2-10-94">Communication routines</a></span>
+<br />   &#x00A0;<span class="sectionToc" >6 <a 
+href="userhtmlse6.html#x11-770006" id="QQ2-11-105">Data management routines</a></span>
+<br />   &#x00A0;<span class="sectionToc" >7 <a 
+href="userhtmlse7.html#x12-1050007" id="QQ2-12-133">Parallel environment routines</a></span>
+<br />   &#x00A0;<span class="sectionToc" >8 <a 
+href="userhtmlse8.html#x13-1230008" id="QQ2-13-151">Error handling</a></span>
+<br />   &#x00A0;<span class="sectionToc" >9 <a 
+href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
+<br />   &#x00A0;<span class="sectionToc" >10 <a 
+href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
+<br />   &#x00A0;<span class="sectionToc" >11 <a 
+href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
+<br />   &#x00A0;<span class="sectionToc" >12 <a 
+href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
+<br />   &#x00A0;<span class="sectionToc" >13 <a 
+href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
+<br />   &#x00A0;<span class="likesectionToc" ><a 
+href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
+   </div>

-                                                                  
-<br />   &#x00A0;<span class="subsectionToc" >6.13 <a 
-href="userhtmlsu42.html#x53-900006.13" id="QQ2-53-118">psb_geins &#8212; Dense matrix insertion routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.14 <a 
-href="userhtmlsu43.html#x54-910006.14" id="QQ2-54-119">psb_geasb &#8212; Assembly a dense matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.15 <a 
-href="userhtmlsu44.html#x55-920006.15" id="QQ2-55-120">psb_gefree &#8212; Frees a dense matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.16 <a 
-href="userhtmlsu45.html#x56-930006.16" id="QQ2-56-121">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.17 <a 
-href="userhtmlsu46.html#x57-940006.17" id="QQ2-57-122">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.18 <a 
-href="userhtmlsu47.html#x58-950006.18" id="QQ2-58-123">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.19 <a 
-href="userhtmlsu48.html#x59-960006.19" id="QQ2-59-124">psb_is_owned &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.20 <a 
-href="userhtmlsu49.html#x60-970006.20" id="QQ2-60-125">psb_owned_index &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.21 <a 
-href="userhtmlsu50.html#x61-980006.21" id="QQ2-61-126">psb_is_local &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.22 <a 
-href="userhtmlsu51.html#x62-990006.22" id="QQ2-62-127">psb_local_index &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.23 <a 
-href="userhtmlsu52.html#x63-1000006.23" id="QQ2-63-128">psb_get_boundary &#8212; Extract list of boundary elements</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.24 <a 
-href="userhtmlsu53.html#x64-1010006.24" id="QQ2-64-129">psb_get_overlap &#8212; Extract list of overlap elements</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.25 <a 
-href="userhtmlsu54.html#x65-1020006.25" id="QQ2-65-130">psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.26 <a 
-href="userhtmlsu55.html#x66-1030006.26" id="QQ2-66-131">psb_sizeof &#8212; Memory occupation</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.27 <a 
-href="userhtmlsu56.html#x67-1040006.27" id="QQ2-67-132">Sorting utilities &#8212; </a></span>
-<br />   <span class="sectionToc" >7 <a 
-href="userhtmlse7.html#x68-1050007" id="QQ2-68-133">Parallel environment routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.1 <a 
-href="userhtmlsu57.html#x69-1060007.1" id="QQ2-69-134">psb_init &#8212; Initializes PSBLAS parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.2 <a 
-href="userhtmlsu58.html#x70-1070007.2" id="QQ2-70-135">psb_info &#8212; Return information about PSBLAS parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.3 <a 
-href="userhtmlsu59.html#x71-1080007.3" id="QQ2-71-136">psb_exit &#8212; Exit from PSBLAS parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.4 <a 
-href="userhtmlsu60.html#x72-1090007.4" id="QQ2-72-137">psb_get_mpi_comm &#8212; Get the MPI communicator</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.5 <a 
-href="userhtmlsu61.html#x73-1100007.5" id="QQ2-73-138">psb_get_mpi_rank &#8212; Get the MPI rank</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.6 <a 
-href="userhtmlsu62.html#x74-1110007.6" id="QQ2-74-139">psb_wtime &#8212; Wall clock timing</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.7 <a 
-href="userhtmlsu63.html#x75-1120007.7" id="QQ2-75-140">psb_barrier &#8212; Sinchronization point parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.8 <a 
-href="userhtmlsu64.html#x76-1130007.8" id="QQ2-76-141">psb_abort &#8212; Abort a computation</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.9 <a 
-href="userhtmlsu65.html#x77-1140007.9" id="QQ2-77-142">psb_bcast &#8212; Broadcast data</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.10 <a 
-href="userhtmlsu66.html#x78-1150007.10" id="QQ2-78-143">psb_sum &#8212; Global sum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.11 <a 
-href="userhtmlsu67.html#x79-1160007.11" id="QQ2-79-144">psb_max &#8212; Global maximum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.12 <a 
-href="userhtmlsu68.html#x80-1170007.12" id="QQ2-80-145">psb_min &#8212; Global minimum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.13 <a 
-href="userhtmlsu69.html#x81-1180007.13" id="QQ2-81-146">psb_amx &#8212; Global maximum absolute value</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.14 <a 
-href="userhtmlsu70.html#x82-1190007.14" id="QQ2-82-147">psb_amn &#8212; Global minimum absolute value</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.15 <a 
-href="userhtmlsu71.html#x83-1200007.15" id="QQ2-83-148">psb_nrm2 &#8212; Global 2-norm reduction</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.16 <a 
-href="userhtmlsu72.html#x84-1210007.16" id="QQ2-84-149">psb_snd &#8212; Send data</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.17 <a 
-href="userhtmlsu73.html#x85-1220007.17" id="QQ2-85-150">psb_rcv &#8212; Receive data</a></span>
-<br />   <span class="sectionToc" >8 <a 
-href="userhtmlse8.html#x86-1230008" id="QQ2-86-151">Error handling</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.1 <a 
-href="userhtmlsu74.html#x87-1240008.1" id="QQ2-87-154">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.2 <a 
-href="userhtmlsu75.html#x88-1250008.2" id="QQ2-88-155">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.3 <a 
-href="userhtmlsu76.html#x89-1260008.3" id="QQ2-89-156">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.4 <a 
-href="userhtmlsu77.html#x90-1270008.4" id="QQ2-90-157">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
-<br />   <span class="sectionToc" >9 <a 
-href="userhtmlse9.html#x91-1280009" id="QQ2-91-158">Utilities</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.1 <a 
-href="userhtmlsu78.html#x92-1290009.1" id="QQ2-92-159"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.2 <a 
-href="userhtmlsu79.html#x93-1300009.2" id="QQ2-93-160">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.3 <a 
-href="userhtmlsu80.html#x94-1310009.3" id="QQ2-94-161">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.4 <a 
-href="userhtmlsu81.html#x95-1320009.4" id="QQ2-95-162">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.5 <a 
-href="userhtmlsu82.html#x96-1330009.5" id="QQ2-96-163">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.6 <a 
-href="userhtmlsu83.html#x97-1340009.6" id="QQ2-97-164">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
-<br />   <span class="sectionToc" >10 <a 
-href="userhtmlse10.html#x98-13500010" id="QQ2-98-165">Preconditioner routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.1 <a 
-href="userhtmlsu84.html#x99-13600010.1" id="QQ2-99-166">init &#8212; Initialize a preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.2 <a 
-href="userhtmlsu85.html#x101-13700010.2" id="QQ2-101-167">build &#8212; Builds a preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.3 <a 
-href="userhtmlsu86.html#x102-13800010.3" id="QQ2-102-168">apply &#8212; Preconditioner application routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.4 <a 
-href="userhtmlsu87.html#x103-13900010.4" id="QQ2-103-169">descr &#8212; Prints a description of current preconditioner</a></span>
-                                                                  

-                                                                  
-<br />   &#x00A0;<span class="subsectionToc" >10.5 <a 
-href="userhtmlsu88.html#x104-14000010.5" id="QQ2-104-170">clone &#8212; clone current preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.6 <a 
-href="userhtmlsu89.html#x105-14100010.6" id="QQ2-105-171">free &#8212; Free a preconditioner</a></span>
-<br />   <span class="sectionToc" >11 <a 
-href="userhtmlse11.html#x106-14200011" id="QQ2-106-172">Iterative Methods</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >11.1 <a 
-href="userhtmlsu90.html#x107-14300011.1" id="QQ2-107-173">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
-<br />   <span class="likesectionToc" ><a 
-href="userhtmlli2.html#x109-14400011.1" id="QQ2-109-174">References</a></span>
-   </div>



--- a/docs/html/userhtml0x.png
+++ b/docs/html/userhtml0x.png
--- a/docs/html/userhtml10x.png
+++ b/docs/html/userhtml10x.png
--- a/docs/html/userhtml11x.png
+++ b/docs/html/userhtml11x.png
--- a/docs/html/userhtml12x.png
+++ b/docs/html/userhtml12x.png
--- a/docs/html/userhtml13x.png
+++ b/docs/html/userhtml13x.png
--- a/docs/html/userhtml14x.png
+++ b/docs/html/userhtml14x.png
--- a/docs/html/userhtml15x.png
+++ b/docs/html/userhtml15x.png
--- a/docs/html/userhtml16.html
+++ b/docs/html/userhtml16.html
@ -0,0 +1,19 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"  
+  "http://www.w3.org/TR/html4/loose.dtd">  
+<html > 
+<head><title></title> 
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 
+<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)"> 
+<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)"> 
+<!-- html,3 --> 
+<meta name="src" content="userhtml.tex"> 
+<link rel="stylesheet" type="text/css" href="userhtml.css"> 
+</head><body 
+>
+      <div class="footnote-text">
+  <!--l. 72--><p class="indent" >     <span class="footnote-mark"><a 
+ id="fn4x0"><a 
+ id="x16-136002x10.1"></a>   <sup class="textsuperscript">4</sup></a></span><span 
+class="cmr-8">The string is case-insensitive</span></div>
+      
+</body></html> 
--- a/docs/html/userhtml16x.png
+++ b/docs/html/userhtml16x.png
--- a/docs/html/userhtml17x.png
+++ b/docs/html/userhtml17x.png
--- a/docs/html/userhtml18.html
+++ b/docs/html/userhtml18.html
@ -0,0 +1,20 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"  
+  "http://www.w3.org/TR/html4/loose.dtd">  
+<html > 
+<head><title></title> 
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 
+<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)"> 
+<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)"> 
+<!-- html,3 --> 
+<meta name="src" content="userhtml.tex"> 
+<link rel="stylesheet" type="text/css" href="userhtml.css"> 
+</head><body 
+>
+  <div class="footnote-text">
+  <!--l. 53--><p class="noindent" ><span class="footnote-mark"><a 
+ id="fn5x0"><a 
+ id="x18-143004x11.1"></a>   <sup class="textsuperscript">5</sup></a></span><span 
+class="cmr-8">Note: the implementation is for </span><span 
+class="cmmi-8">FCG</span><span 
+class="cmr-8">(1).</span></div> 
+</body></html> 
--- a/docs/html/userhtml18x.png
+++ b/docs/html/userhtml18x.png
--- a/docs/html/userhtml19x.png
+++ b/docs/html/userhtml19x.png
--- a/docs/html/userhtml1x.png
+++ b/docs/html/userhtml1x.png
--- a/docs/html/userhtml20x.png
+++ b/docs/html/userhtml20x.png
--- a/docs/html/userhtml21x.png
+++ b/docs/html/userhtml21x.png
--- a/docs/html/userhtml22x.png
+++ b/docs/html/userhtml22x.png
--- a/docs/html/userhtml23x.png
+++ b/docs/html/userhtml23x.png
--- a/docs/html/userhtml24x.png
+++ b/docs/html/userhtml24x.png
--- a/docs/html/userhtml25x.png
+++ b/docs/html/userhtml25x.png
--- a/docs/html/userhtml26x.png
+++ b/docs/html/userhtml26x.png
--- a/docs/html/userhtml27x.png
+++ b/docs/html/userhtml27x.png
--- a/docs/html/userhtml28x.png
+++ b/docs/html/userhtml28x.png
--- a/docs/html/userhtml29x.png
+++ b/docs/html/userhtml29x.png
--- a/docs/html/userhtml2x.png
+++ b/docs/html/userhtml2x.png
--- a/docs/html/userhtml30x.png
+++ b/docs/html/userhtml30x.png
--- a/docs/html/userhtml31x.png
+++ b/docs/html/userhtml31x.png
--- a/docs/html/userhtml32x.png
+++ b/docs/html/userhtml32x.png
--- a/docs/html/userhtml3x.png
+++ b/docs/html/userhtml3x.png
--- a/docs/html/userhtml4x.png
+++ b/docs/html/userhtml4x.png
--- a/docs/html/userhtml5.html
+++ b/docs/html/userhtml5.html
@ -12,7 +12,8 @@
 >
      <div class="footnote-text">
  <!--l. 151--><p class="indent" >     <span class="footnote-mark"><a 
- id="fn1x0">   <sup class="textsuperscript">1</sup></a></span><span 
+ id="fn1x0"><a 
+ id="x5-3003x2"></a>   <sup class="textsuperscript">1</sup></a></span><span 
 class="cmr-8">In our prototype implementation we provide sample scatter/gather routines.</span></div>
      
 </body></html> 
--- a/docs/html/userhtml5x.png
+++ b/docs/html/userhtml5x.png
--- a/docs/html/userhtml6.html
+++ b/docs/html/userhtml6.html
@ -0,0 +1,24 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"  
+  "http://www.w3.org/TR/html4/loose.dtd">  
+<html > 
+<head><title></title> 
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 
+<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)"> 
+<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)"> 
+<!-- html,3 --> 
+<meta name="src" content="userhtml.tex"> 
+<link rel="stylesheet" type="text/css" href="userhtml.css"> 
+</head><body 
+>
+  <div class="footnote-text">
+  <!--l. 195--><p class="noindent" ><span class="footnote-mark"><a 
+ id="fn2x0"><a 
+ id="x6-4002x2.1"></a>   <sup class="textsuperscript">2</sup></a></span><span 
+class="cmr-8">This is the normal situation when the pattern of the sparse matrix is symmetric, which is</span>
+  <span 
+class="cmr-8">equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is</span>
+  <span 
+class="cmr-8">non-symmetric we may have one-way interactions, and these could cause a situation in which a</span>
+  <span 
+class="cmr-8">boundary point is not a halo point for its neighbour.</span></div> 
+</body></html> 
--- a/docs/html/userhtml6x.png
+++ b/docs/html/userhtml6x.png
--- a/docs/html/userhtml7.html
+++ b/docs/html/userhtml7.html
@ -11,13 +11,16 @@
 </head><body 
 >
  <div class="footnote-text">
-  <!--l. 195--><p class="noindent" ><span class="footnote-mark"><a 
- id="fn2x0">   <sup class="textsuperscript">2</sup></a></span><span 
-class="cmr-8">This is the normal situation when the pattern of the sparse matrix is symmetric, which is</span>
+  <!--l. 362--><p class="noindent" ><span class="footnote-mark"><a 
+ id="fn3x0"><a 
+ id="x7-6020x3"></a>   <sup class="textsuperscript">3</sup></a></span><span 
+class="cmr-8">The subroutine style </span><span 
+class="cmtt-8">psb</span><span 
+class="cmtt-8">_precinit </span><span 
+class="cmr-8">and </span><span 
+class="cmtt-8">psb</span><span 
+class="cmtt-8">_precbl </span><span 
+class="cmr-8">are still supported for backward</span>
  <span 
-class="cmr-8">equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is</span>
-  <span 
-class="cmr-8">non-symmetric we may have one-way interactions, and these could cause a situation in which a</span>
-  <span 
-class="cmr-8">boundary point is not a halo point for its neighbour.</span></div> 
+class="cmr-8">compatibility</span></div> 
 </body></html> 
--- a/docs/html/userhtml7x.png
+++ b/docs/html/userhtml7x.png
--- a/docs/html/userhtml8x.png
+++ b/docs/html/userhtml8x.png
--- a/docs/html/userhtml9x.png
+++ b/docs/html/userhtml9x.png
--- a/docs/html/userhtmlli1.html
+++ b/docs/html/userhtmlli1.html
@ -10,306 +10,346 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-   <!--l. 105--><div class="crosslinks"><p class="noindent">[<a 
+   <!--l. 106--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse1.html" >next</a>] [<a 
 href="#tailuserhtmlli1.html">tail</a>] [<a 
 href="userhtml.html#userhtmlli1.html" >up</a>] </p></div>
   <h3 class="likesectionHead"><a 
 id="x2-1000"></a>Contents</h3>
   <div class="tableofcontents">
-   <span class="sectionToc" >1 <a 
+   &#x00A0;<span class="sectionToc" >1 <a 
 href="userhtmlse1.html#x3-20001">Introduction</a></span>
-<br />   <span class="sectionToc" >2 <a 
+<br />   &#x00A0;<span class="sectionToc" >2 <a 
 href="userhtmlse2.html#x4-30002">General overview</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.1 <a 
-href="userhtmlsu1.html#x6-40002.1">Basic Nomenclature</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.2 <a 
-href="userhtmlsu2.html#x8-50002.2">Library contents</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.3 <a 
-href="userhtmlsu3.html#x9-60002.3">Application structure</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >2.3.1 <a 
-href="userhtmlsu3.html#x9-70002.3.1" id="QQ2-9-9">User-defined index mappings</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.4 <a 
-href="userhtmlsu4.html#x11-80002.4">Programming model</a></span>
-<br />   <span class="sectionToc" >3 <a 
-href="userhtmlse3.html#x12-90003">Data Structures and Classes</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.1 <a 
-href="userhtmlsu5.html#x13-100003.1">Descriptor data structure</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.1 <a 
-href="userhtmlsu5.html#x13-110003.1.1" id="QQ2-13-14">Descriptor Methods</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.2 <a 
-href="userhtmlsu5.html#x13-120003.1.2" id="QQ2-13-15">get_local_rows &#8212; Get number of local rows</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.3 <a 
-href="userhtmlsu5.html#x13-130003.1.3" id="QQ2-13-16">get_local_cols &#8212; Get number of local cols</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.4 <a 
-href="userhtmlsu5.html#x13-140003.1.4" id="QQ2-13-17">get_global_rows &#8212; Get number of global rows</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.5 <a 
-href="userhtmlsu5.html#x13-150003.1.5" id="QQ2-13-18">get_global_cols &#8212; Get number of global cols</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.6 <a 
-href="userhtmlsu5.html#x13-160003.1.6" id="QQ2-13-19">get_global_indices &#8212; Get vector of global indices</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.7 <a 
-href="userhtmlsu5.html#x13-170003.1.7" id="QQ2-13-20">get_context &#8212; Get communication context</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.8 <a 
-href="userhtmlsu5.html#x13-180003.1.8" id="QQ2-13-21">Clone &#8212; clone current object</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.9 <a 
-href="userhtmlsu5.html#x13-190003.1.9" id="QQ2-13-22">CNV &#8212; convert internal storage format</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.10 <a 
-href="userhtmlsu5.html#x13-200003.1.10" id="QQ2-13-23">psb_cd_get_large_threshold &#8212; Get threshold for index mapping switch</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.11 <a 
-href="userhtmlsu5.html#x13-210003.1.11" id="QQ2-13-24">psb_cd_set_large_threshold &#8212; Set threshold for index mapping switch</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.12 <a 
-href="userhtmlsu5.html#x13-220003.1.12" id="QQ2-13-25">get_p_adjcncy &#8212; Get process adjacency list</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.13 <a 
-href="userhtmlsu5.html#x13-230003.1.13" id="QQ2-13-26">set_p_adjcncy &#8212; Set process adjacency list</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.14 <a 
-href="userhtmlsu5.html#x13-240003.1.14" id="QQ2-13-27">fnd_owner &#8212; Find the owner process of a set of indices</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.15 <a 
-href="userhtmlsu5.html#x13-250003.1.15" id="QQ2-13-28">Named Constants</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.2 <a 
-href="userhtmlsu6.html#x14-260003.2">Sparse Matrix class</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.1 <a 
-href="userhtmlsu6.html#x14-270003.2.1" id="QQ2-14-31">Sparse Matrix Methods</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.2 <a 
-href="userhtmlsu6.html#x14-280003.2.2" id="QQ2-14-32">get_nrows &#8212; Get number of rows in a sparse matrix</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.3 <a 
-href="userhtmlsu6.html#x14-290003.2.3" id="QQ2-14-33">get_ncols &#8212; Get number of columns in a sparse matrix</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.4 <a 
-href="userhtmlsu6.html#x14-300003.2.4" id="QQ2-14-34">get_nnzeros &#8212; Get number of nonzero elements in a sparse matrix</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.5 <a 
-href="userhtmlsu6.html#x14-310003.2.5" id="QQ2-14-35">get_size &#8212; Get maximum number of nonzero elements in a sparse matrix</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.6 <a 
-href="userhtmlsu6.html#x14-320003.2.6" id="QQ2-14-36">sizeof &#8212; Get memory occupation in bytes of a sparse matrix</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.7 <a 
-href="userhtmlsu6.html#x14-330003.2.7" id="QQ2-14-37">get_fmt &#8212; Short description of the dynamic type</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.8 <a 
-href="userhtmlsu6.html#x14-340003.2.8" id="QQ2-14-38">is_bld, is_upd, is_asb &#8212; Status check</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.9 <a 
-href="userhtmlsu6.html#x14-350003.2.9" id="QQ2-14-39">is_lower, is_upper, is_triangle, is_unit &#8212; Format check</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.10 <a 
-href="userhtmlsu6.html#x14-360003.2.10" id="QQ2-14-40">cscnv &#8212; Convert to a different storage format</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.11 <a 
-href="userhtmlsu6.html#x14-370003.2.11" id="QQ2-14-41">csclip &#8212; Reduce to a submatrix</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.12 <a 
-href="userhtmlsu6.html#x14-380003.2.12" id="QQ2-14-42">clean_zeros &#8212; Eliminate zero coefficients</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.13 <a 
-href="userhtmlsu6.html#x14-390003.2.13" id="QQ2-14-43">get_diag &#8212; Get main diagonal</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.14 <a 
-href="userhtmlsu6.html#x14-400003.2.14" id="QQ2-14-44">clip_diag &#8212; Cut out main diagonal</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >2.1 <a 
+href="userhtmlse2.html#x4-40002.1" id="QQ2-4-5">Basic Nomenclature</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >2.2 <a 
+href="userhtmlse2.html#x4-50002.2" id="QQ2-4-7">Library contents</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >2.3 <a 
+href="userhtmlse2.html#x4-60002.3" id="QQ2-4-8">Application structure</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >2.3.1 <a 
+href="userhtmlse2.html#x4-70002.3.1" id="QQ2-4-9">User-defined index mappings</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >2.4 <a 
+href="userhtmlse2.html#x4-80002.4" id="QQ2-4-10">Programming model</a></span>
+<br />   &#x00A0;<span class="sectionToc" >3 <a 
+href="userhtmlse3.html#x8-90003">Data Structures and Classes</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >3.1 <a 
+href="userhtmlse3.html#x8-100003.1" id="QQ2-8-12">Descriptor data structure</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.1 <a 
+href="userhtmlse3.html#x8-110003.1.1" id="QQ2-8-14">Descriptor Methods</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.2 <a 
+href="userhtmlse3.html#x8-120003.1.2" id="QQ2-8-15">get_local_rows &#8212; Get number of local rows</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.3 <a 
+href="userhtmlse3.html#x8-130003.1.3" id="QQ2-8-16">get_local_cols &#8212; Get number of local cols</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.4 <a 
+href="userhtmlse3.html#x8-140003.1.4" id="QQ2-8-17">get_global_rows &#8212; Get number of global rows</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.5 <a 
+href="userhtmlse3.html#x8-150003.1.5" id="QQ2-8-18">get_global_cols &#8212; Get number of global cols</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.6 <a 
+href="userhtmlse3.html#x8-160003.1.6" id="QQ2-8-19">get_global_indices &#8212; Get vector of global indices</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.7 <a 
+href="userhtmlse3.html#x8-170003.1.7" id="QQ2-8-20">get_context &#8212; Get communication context</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.8 <a 
+href="userhtmlse3.html#x8-180003.1.8" id="QQ2-8-21">Clone &#8212; clone current object</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.9 <a 
+href="userhtmlse3.html#x8-190003.1.9" id="QQ2-8-22">CNV &#8212; convert internal storage format</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.10 <a 
+href="userhtmlse3.html#x8-200003.1.10" id="QQ2-8-23">psb_cd_get_large_threshold &#8212; Get threshold for index mapping switch</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.11 <a 
+href="userhtmlse3.html#x8-210003.1.11" id="QQ2-8-24">psb_cd_set_large_threshold &#8212; Set threshold for index mapping switch</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.12 <a 
+href="userhtmlse3.html#x8-220003.1.12" id="QQ2-8-25">get_p_adjcncy &#8212; Get process adjacency list</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.13 <a 
+href="userhtmlse3.html#x8-230003.1.13" id="QQ2-8-26">set_p_adjcncy &#8212; Set process adjacency list</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.14 <a 
+href="userhtmlse3.html#x8-240003.1.14" id="QQ2-8-27">fnd_owner &#8212; Find the owner process of a set of indices</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.15 <a 
+href="userhtmlse3.html#x8-250003.1.15" id="QQ2-8-28">Named Constants</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >3.2 <a 
+href="userhtmlse3.html#x8-260003.2" id="QQ2-8-29">Sparse Matrix class</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.1 <a 
+href="userhtmlse3.html#x8-270003.2.1" id="QQ2-8-31">Sparse Matrix Methods</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.2 <a 
+href="userhtmlse3.html#x8-280003.2.2" id="QQ2-8-32">get_nrows &#8212; Get number of rows in a sparse matrix</a></span>
                                                                  

                                                                  
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.15 <a 
-href="userhtmlsu6.html#x14-410003.2.15" id="QQ2-14-45">tril &#8212; Return the lower triangle</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.16 <a 
-href="userhtmlsu6.html#x14-420003.2.16" id="QQ2-14-46">triu &#8212; Return the upper triangle</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.17 <a 
-href="userhtmlsu6.html#x14-430003.2.17" id="QQ2-14-47">psb_set_mat_default &#8212; Set default storage format</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.18 <a 
-href="userhtmlsu6.html#x14-440003.2.18" id="QQ2-14-48">clone &#8212; Clone current object</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.19 <a 
-href="userhtmlsu6.html#x14-450003.2.19" id="QQ2-14-49">Named Constants</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.3 <a 
-href="userhtmlsu7.html#x15-460003.3">Dense Vector Data Structure</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.1 <a 
-href="userhtmlsu7.html#x15-470003.3.1" id="QQ2-15-52">Vector Methods</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.2 <a 
-href="userhtmlsu7.html#x15-480003.3.2" id="QQ2-15-53">get_nrows &#8212; Get number of rows in a dense vector</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.3 <a 
-href="userhtmlsu7.html#x15-490003.3.3" id="QQ2-15-54">sizeof &#8212; Get memory occupation in bytes of a dense vector</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.4 <a 
-href="userhtmlsu7.html#x15-500003.3.4" id="QQ2-15-55">set &#8212; Set contents of the vector</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.5 <a 
-href="userhtmlsu7.html#x15-510003.3.5" id="QQ2-15-56">get_vect &#8212; Get a copy of the vector contents</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.6 <a 
-href="userhtmlsu7.html#x15-520003.3.6" id="QQ2-15-57">clone &#8212; Clone current object</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.4 <a 
-href="userhtmlsu8.html#x16-530003.4">Preconditioner data structure</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >3.5 <a 
-href="userhtmlsu9.html#x17-540003.5">Heap data structure</a></span>
-<br />   <span class="sectionToc" >4 <a 
-href="userhtmlse4.html#x18-550004">Computational routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.1 <a 
-href="userhtmlsu10.html#x19-560004.1">psb_geaxpby &#8212; General Dense Matrix Sum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.2 <a 
-href="userhtmlsu11.html#x20-570004.2">psb_gedot &#8212; Dot Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.3 <a 
-href="userhtmlsu12.html#x21-580004.3">psb_gedots &#8212; Generalized Dot Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.4 <a 
-href="userhtmlsu13.html#x22-590004.4">psb_normi &#8212; Infinity-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.5 <a 
-href="userhtmlsu14.html#x23-600004.5">psb_geamaxs &#8212; Generalized Infinity Norm</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.6 <a 
-href="userhtmlsu15.html#x24-610004.6">psb_norm1 &#8212; 1-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.7 <a 
-href="userhtmlsu16.html#x25-620004.7">psb_geasums &#8212; Generalized 1-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.8 <a 
-href="userhtmlsu17.html#x26-630004.8">psb_norm2 &#8212; 2-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.9 <a 
-href="userhtmlsu18.html#x27-640004.9">psb_genrm2s &#8212; Generalized 2-Norm of Vector</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.10 <a 
-href="userhtmlsu19.html#x28-650004.10">psb_norm1 &#8212; 1-Norm of Sparse Matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.11 <a 
-href="userhtmlsu20.html#x29-660004.11">psb_normi &#8212; Infinity Norm of Sparse Matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.12 <a 
-href="userhtmlsu21.html#x30-670004.12">psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.13 <a 
-href="userhtmlsu22.html#x31-680004.13">psb_spsm &#8212; Triangular System Solve</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.14 <a 
-href="userhtmlsu23.html#x32-690004.14">psb_gemlt &#8212; Entrywise Product</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.15 <a 
-href="userhtmlsu24.html#x33-700004.15">psb_gediv &#8212; Entrywise Division</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >4.16 <a 
-href="userhtmlsu25.html#x34-710004.16">psb_geinv &#8212; Entrywise Inversion</a></span>
-<br />   <span class="sectionToc" >5 <a 
-href="userhtmlse5.html#x35-720005">Communication routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.1 <a 
-href="userhtmlsu26.html#x36-730005.1">psb_halo &#8212; Halo Data Communication</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.2 <a 
-href="userhtmlsu27.html#x37-740005.2">psb_ovrl &#8212; Overlap Update</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.3 <a 
-href="userhtmlsu28.html#x38-750005.3">psb_gather &#8212; Gather Global Dense Matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >5.4 <a 
-href="userhtmlsu29.html#x39-760005.4">psb_scatter &#8212; Scatter Global Dense Matrix</a></span>
-<br />   <span class="sectionToc" >6 <a 
-href="userhtmlse6.html#x40-770006">Data management routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.1 <a 
-href="userhtmlsu30.html#x41-780006.1">psb_cdall &#8212; Allocates a communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.2 <a 
-href="userhtmlsu31.html#x42-790006.2">psb_cdins &#8212; Communication descriptor insert routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.3 <a 
-href="userhtmlsu32.html#x43-800006.3">psb_cdasb &#8212; Communication descriptor assembly routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.4 <a 
-href="userhtmlsu33.html#x44-810006.4">psb_cdcpy &#8212; Copies a communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.5 <a 
-href="userhtmlsu34.html#x45-820006.5">psb_cdfree &#8212; Frees a communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.6 <a 
-href="userhtmlsu35.html#x46-830006.6">psb_cdbldext &#8212; Build an extended communication descriptor</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.7 <a 
-href="userhtmlsu36.html#x47-840006.7">psb_spall &#8212; Allocates a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.8 <a 
-href="userhtmlsu37.html#x48-850006.8">psb_spins &#8212; Insert a set of coefficients into a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.9 <a 
-href="userhtmlsu38.html#x49-860006.9">psb_spasb &#8212; Sparse matrix assembly routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.10 <a 
-href="userhtmlsu39.html#x50-870006.10">psb_spfree &#8212; Frees a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.11 <a 
-href="userhtmlsu40.html#x51-880006.11">psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.12 <a 
-href="userhtmlsu41.html#x52-890006.12">psb_geall &#8212; Allocates a dense matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.13 <a 
-href="userhtmlsu42.html#x53-900006.13">psb_geins &#8212; Dense matrix insertion routine</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.3 <a 
+href="userhtmlse3.html#x8-290003.2.3" id="QQ2-8-33">get_ncols &#8212; Get number of columns in a sparse matrix</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.4 <a 
+href="userhtmlse3.html#x8-300003.2.4" id="QQ2-8-34">get_nnzeros &#8212; Get number of nonzero elements in a sparse matrix</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.5 <a 
+href="userhtmlse3.html#x8-310003.2.5" id="QQ2-8-35">get_size &#8212; Get maximum number of nonzero elements in a sparse matrix</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.6 <a 
+href="userhtmlse3.html#x8-320003.2.6" id="QQ2-8-36">sizeof &#8212; Get memory occupation in bytes of a sparse matrix</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.7 <a 
+href="userhtmlse3.html#x8-330003.2.7" id="QQ2-8-37">get_fmt &#8212; Short description of the dynamic type</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.8 <a 
+href="userhtmlse3.html#x8-340003.2.8" id="QQ2-8-38">is_bld, is_upd, is_asb &#8212; Status check</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.9 <a 
+href="userhtmlse3.html#x8-350003.2.9" id="QQ2-8-39">is_lower, is_upper, is_triangle, is_unit &#8212; Format check</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.10 <a 
+href="userhtmlse3.html#x8-360003.2.10" id="QQ2-8-40">cscnv &#8212; Convert to a different storage format</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.11 <a 
+href="userhtmlse3.html#x8-370003.2.11" id="QQ2-8-41">csclip &#8212; Reduce to a submatrix</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.12 <a 
+href="userhtmlse3.html#x8-380003.2.12" id="QQ2-8-42">clean_zeros &#8212; Eliminate zero coefficients</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.13 <a 
+href="userhtmlse3.html#x8-390003.2.13" id="QQ2-8-43">get_diag &#8212; Get main diagonal</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.14 <a 
+href="userhtmlse3.html#x8-400003.2.14" id="QQ2-8-44">clip_diag &#8212; Cut out main diagonal</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.15 <a 
+href="userhtmlse3.html#x8-410003.2.15" id="QQ2-8-45">tril &#8212; Return the lower triangle</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.16 <a 
+href="userhtmlse3.html#x8-420003.2.16" id="QQ2-8-46">triu &#8212; Return the upper triangle</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.17 <a 
+href="userhtmlse3.html#x8-430003.2.17" id="QQ2-8-47">psb_set_mat_default &#8212; Set default storage format</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.18 <a 
+href="userhtmlse3.html#x8-440003.2.18" id="QQ2-8-48">clone &#8212; Clone current object</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.19 <a 
+href="userhtmlse3.html#x8-450003.2.19" id="QQ2-8-49">Named Constants</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >3.3 <a 
+href="userhtmlse3.html#x8-460003.3" id="QQ2-8-50">Dense Vector Data Structure</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.1 <a 
+href="userhtmlse3.html#x8-470003.3.1" id="QQ2-8-52">Vector Methods</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.2 <a 
+href="userhtmlse3.html#x8-480003.3.2" id="QQ2-8-53">get_nrows &#8212; Get number of rows in a dense vector</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.3 <a 
+href="userhtmlse3.html#x8-490003.3.3" id="QQ2-8-54">sizeof &#8212; Get memory occupation in bytes of a dense vector</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.4 <a 
+href="userhtmlse3.html#x8-500003.3.4" id="QQ2-8-55">set &#8212; Set contents of the vector</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.5 <a 
+href="userhtmlse3.html#x8-510003.3.5" id="QQ2-8-56">get_vect &#8212; Get a copy of the vector contents</a></span>
+<br />   &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.6 <a 
+href="userhtmlse3.html#x8-520003.3.6" id="QQ2-8-57">clone &#8212; Clone current object</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >3.4 <a 
+href="userhtmlse3.html#x8-530003.4" id="QQ2-8-58">Preconditioner data structure</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >3.5 <a 
+href="userhtmlse3.html#x8-540003.5" id="QQ2-8-60">Heap data structure</a></span>
+<br />   &#x00A0;<span class="sectionToc" >4 <a 
+href="userhtmlse4.html#x9-550004">Computational routines</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.1 <a 
+href="userhtmlse4.html#x9-560004.1" id="QQ2-9-62">psb_geaxpby &#8212; General Dense Matrix Sum</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.2 <a 
+href="userhtmlse4.html#x9-570004.2" id="QQ2-9-64">psb_gedot &#8212; Dot Product</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.3 <a 
+href="userhtmlse4.html#x9-580004.3" id="QQ2-9-66">psb_gedots &#8212; Generalized Dot Product</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.4 <a 
+href="userhtmlse4.html#x9-590004.4" id="QQ2-9-68">psb_normi &#8212; Infinity-Norm of Vector</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.5 <a 
+href="userhtmlse4.html#x9-600004.5" id="QQ2-9-70">psb_geamaxs &#8212; Generalized Infinity Norm</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.6 <a 
+href="userhtmlse4.html#x9-610004.6" id="QQ2-9-72">psb_norm1 &#8212; 1-Norm of Vector</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.7 <a 
+href="userhtmlse4.html#x9-620004.7" id="QQ2-9-74">psb_geasums &#8212; Generalized 1-Norm of Vector</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.8 <a 
+href="userhtmlse4.html#x9-630004.8" id="QQ2-9-76">psb_norm2 &#8212; 2-Norm of Vector</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.9 <a 
+href="userhtmlse4.html#x9-640004.9" id="QQ2-9-78">psb_genrm2s &#8212; Generalized 2-Norm of Vector</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.10 <a 
+href="userhtmlse4.html#x9-650004.10" id="QQ2-9-80">psb_norm1 &#8212; 1-Norm of Sparse Matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.11 <a 
+href="userhtmlse4.html#x9-660004.11" id="QQ2-9-82">psb_normi &#8212; Infinity Norm of Sparse Matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.12 <a 
+href="userhtmlse4.html#x9-670004.12" id="QQ2-9-84">psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.13 <a 
+href="userhtmlse4.html#x9-680004.13" id="QQ2-9-86">psb_spsm &#8212; Triangular System Solve</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.14 <a 
+href="userhtmlse4.html#x9-690004.14" id="QQ2-9-88">psb_gemlt &#8212; Entrywise Product</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.15 <a 
+href="userhtmlse4.html#x9-700004.15" id="QQ2-9-90">psb_gediv &#8212; Entrywise Division</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >4.16 <a 
+href="userhtmlse4.html#x9-710004.16" id="QQ2-9-92">psb_geinv &#8212; Entrywise Inversion</a></span>
+<br />   &#x00A0;<span class="sectionToc" >5 <a 
+href="userhtmlse5.html#x10-720005">Communication routines</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >5.1 <a 
+href="userhtmlse5.html#x10-730005.1" id="QQ2-10-95">psb_halo &#8212; Halo Data Communication</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >5.2 <a 
+href="userhtmlse5.html#x10-740005.2" id="QQ2-10-98">psb_ovrl &#8212; Overlap Update</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >5.3 <a 
+href="userhtmlse5.html#x10-750005.3" id="QQ2-10-101">psb_gather &#8212; Gather Global Dense Matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >5.4 <a 
+href="userhtmlse5.html#x10-760005.4" id="QQ2-10-103">psb_scatter &#8212; Scatter Global Dense Matrix</a></span>
+<br />   &#x00A0;<span class="sectionToc" >6 <a 
+href="userhtmlse6.html#x11-770006">Data management routines</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.1 <a 
+href="userhtmlse6.html#x11-780006.1" id="QQ2-11-106">psb_cdall &#8212; Allocates a communication descriptor</a></span>
                                                                  

                                                                  
-<br />   &#x00A0;<span class="subsectionToc" >6.14 <a 
-href="userhtmlsu43.html#x54-910006.14">psb_geasb &#8212; Assembly a dense matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.15 <a 
-href="userhtmlsu44.html#x55-920006.15">psb_gefree &#8212; Frees a dense matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.16 <a 
-href="userhtmlsu45.html#x56-930006.16">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.17 <a 
-href="userhtmlsu46.html#x57-940006.17">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.18 <a 
-href="userhtmlsu47.html#x58-950006.18">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.19 <a 
-href="userhtmlsu48.html#x59-960006.19">psb_is_owned &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.20 <a 
-href="userhtmlsu49.html#x60-970006.20">psb_owned_index &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.21 <a 
-href="userhtmlsu50.html#x61-980006.21">psb_is_local &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.22 <a 
-href="userhtmlsu51.html#x62-990006.22">psb_local_index &#8212; </a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.23 <a 
-href="userhtmlsu52.html#x63-1000006.23">psb_get_boundary &#8212; Extract list of boundary elements</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.24 <a 
-href="userhtmlsu53.html#x64-1010006.24">psb_get_overlap &#8212; Extract list of overlap elements</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.25 <a 
-href="userhtmlsu54.html#x65-1020006.25">psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.26 <a 
-href="userhtmlsu55.html#x66-1030006.26">psb_sizeof &#8212; Memory occupation</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >6.27 <a 
-href="userhtmlsu56.html#x67-1040006.27">Sorting utilities &#8212; </a></span>
-<br />   <span class="sectionToc" >7 <a 
-href="userhtmlse7.html#x68-1050007">Parallel environment routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.1 <a 
-href="userhtmlsu57.html#x69-1060007.1">psb_init &#8212; Initializes PSBLAS parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.2 <a 
-href="userhtmlsu58.html#x70-1070007.2">psb_info &#8212; Return information about PSBLAS parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.3 <a 
-href="userhtmlsu59.html#x71-1080007.3">psb_exit &#8212; Exit from PSBLAS parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.4 <a 
-href="userhtmlsu60.html#x72-1090007.4">psb_get_mpi_comm &#8212; Get the MPI communicator</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.5 <a 
-href="userhtmlsu61.html#x73-1100007.5">psb_get_mpi_rank &#8212; Get the MPI rank</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.6 <a 
-href="userhtmlsu62.html#x74-1110007.6">psb_wtime &#8212; Wall clock timing</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.7 <a 
-href="userhtmlsu63.html#x75-1120007.7">psb_barrier &#8212; Sinchronization point parallel environment</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.8 <a 
-href="userhtmlsu64.html#x76-1130007.8">psb_abort &#8212; Abort a computation</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.9 <a 
-href="userhtmlsu65.html#x77-1140007.9">psb_bcast &#8212; Broadcast data</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.10 <a 
-href="userhtmlsu66.html#x78-1150007.10">psb_sum &#8212; Global sum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.11 <a 
-href="userhtmlsu67.html#x79-1160007.11">psb_max &#8212; Global maximum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.12 <a 
-href="userhtmlsu68.html#x80-1170007.12">psb_min &#8212; Global minimum</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.13 <a 
-href="userhtmlsu69.html#x81-1180007.13">psb_amx &#8212; Global maximum absolute value</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.14 <a 
-href="userhtmlsu70.html#x82-1190007.14">psb_amn &#8212; Global minimum absolute value</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.15 <a 
-href="userhtmlsu71.html#x83-1200007.15">psb_nrm2 &#8212; Global 2-norm reduction</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.16 <a 
-href="userhtmlsu72.html#x84-1210007.16">psb_snd &#8212; Send data</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >7.17 <a 
-href="userhtmlsu73.html#x85-1220007.17">psb_rcv &#8212; Receive data</a></span>
-<br />   <span class="sectionToc" >8 <a 
-href="userhtmlse8.html#x86-1230008">Error handling</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.1 <a 
-href="userhtmlsu74.html#x87-1240008.1">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.2 <a 
-href="userhtmlsu75.html#x88-1250008.2">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.3 <a 
-href="userhtmlsu76.html#x89-1260008.3">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.4 <a 
-href="userhtmlsu77.html#x90-1270008.4">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
-<br />   <span class="sectionToc" >9 <a 
-href="userhtmlse9.html#x91-1280009">Utilities</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.1 <a 
-href="userhtmlsu78.html#x92-1290009.1"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.2 <a 
-href="userhtmlsu79.html#x93-1300009.2">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.3 <a 
-href="userhtmlsu80.html#x94-1310009.3">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.4 <a 
-href="userhtmlsu81.html#x95-1320009.4">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.5 <a 
-href="userhtmlsu82.html#x96-1330009.5">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.6 <a 
-href="userhtmlsu83.html#x97-1340009.6">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
-<br />   <span class="sectionToc" >10 <a 
-href="userhtmlse10.html#x98-13500010">Preconditioner routines</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.1 <a 
-href="userhtmlsu84.html#x99-13600010.1">init &#8212; Initialize a preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.2 <a 
-href="userhtmlsu85.html#x101-13700010.2">build &#8212; Builds a preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.3 <a 
-href="userhtmlsu86.html#x102-13800010.3">apply &#8212; Preconditioner application routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.4 <a 
-href="userhtmlsu87.html#x103-13900010.4">descr &#8212; Prints a description of current preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.5 <a 
-href="userhtmlsu88.html#x104-14000010.5">clone &#8212; clone current preconditioner</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.2 <a 
+href="userhtmlse6.html#x11-790006.2" id="QQ2-11-107">psb_cdins &#8212; Communication descriptor insert routine</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.3 <a 
+href="userhtmlse6.html#x11-800006.3" id="QQ2-11-108">psb_cdasb &#8212; Communication descriptor assembly routine</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.4 <a 
+href="userhtmlse6.html#x11-810006.4" id="QQ2-11-109">psb_cdcpy &#8212; Copies a communication descriptor</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.5 <a 
+href="userhtmlse6.html#x11-820006.5" id="QQ2-11-110">psb_cdfree &#8212; Frees a communication descriptor</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.6 <a 
+href="userhtmlse6.html#x11-830006.6" id="QQ2-11-111">psb_cdbldext &#8212; Build an extended communication descriptor</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.7 <a 
+href="userhtmlse6.html#x11-840006.7" id="QQ2-11-112">psb_spall &#8212; Allocates a sparse matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.8 <a 
+href="userhtmlse6.html#x11-850006.8" id="QQ2-11-113">psb_spins &#8212; Insert a set of coefficients into a sparse matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.9 <a 
+href="userhtmlse6.html#x11-860006.9" id="QQ2-11-114">psb_spasb &#8212; Sparse matrix assembly routine</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.10 <a 
+href="userhtmlse6.html#x11-870006.10" id="QQ2-11-115">psb_spfree &#8212; Frees a sparse matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.11 <a 
+href="userhtmlse6.html#x11-880006.11" id="QQ2-11-116">psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.12 <a 
+href="userhtmlse6.html#x11-890006.12" id="QQ2-11-117">psb_geall &#8212; Allocates a dense matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.13 <a 
+href="userhtmlse6.html#x11-900006.13" id="QQ2-11-118">psb_geins &#8212; Dense matrix insertion routine</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.14 <a 
+href="userhtmlse6.html#x11-910006.14" id="QQ2-11-119">psb_geasb &#8212; Assembly a dense matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.15 <a 
+href="userhtmlse6.html#x11-920006.15" id="QQ2-11-120">psb_gefree &#8212; Frees a dense matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.16 <a 
+href="userhtmlse6.html#x11-930006.16" id="QQ2-11-121">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.17 <a 
+href="userhtmlse6.html#x11-940006.17" id="QQ2-11-122">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.18 <a 
+href="userhtmlse6.html#x11-950006.18" id="QQ2-11-123">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.19 <a 
+href="userhtmlse6.html#x11-960006.19" id="QQ2-11-124">psb_is_owned &#8212; </a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.20 <a 
+href="userhtmlse6.html#x11-970006.20" id="QQ2-11-125">psb_owned_index &#8212; </a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.21 <a 
+href="userhtmlse6.html#x11-980006.21" id="QQ2-11-126">psb_is_local &#8212; </a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.22 <a 
+href="userhtmlse6.html#x11-990006.22" id="QQ2-11-127">psb_local_index &#8212; </a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.23 <a 
+href="userhtmlse6.html#x11-1000006.23" id="QQ2-11-128">psb_get_boundary &#8212; Extract list of boundary elements</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.24 <a 
+href="userhtmlse6.html#x11-1010006.24" id="QQ2-11-129">psb_get_overlap &#8212; Extract list of overlap elements</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.25 <a 
+href="userhtmlse6.html#x11-1020006.25" id="QQ2-11-130">psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.26 <a 
+href="userhtmlse6.html#x11-1030006.26" id="QQ2-11-131">psb_sizeof &#8212; Memory occupation</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >6.27 <a 
+href="userhtmlse6.html#x11-1040006.27" id="QQ2-11-132">Sorting utilities &#8212; </a></span>
+<br />   &#x00A0;<span class="sectionToc" >7 <a 
+href="userhtmlse7.html#x12-1050007">Parallel environment routines</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.1 <a 
+href="userhtmlse7.html#x12-1060007.1" id="QQ2-12-134">psb_init &#8212; Initializes PSBLAS parallel environment</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.2 <a 
+href="userhtmlse7.html#x12-1070007.2" id="QQ2-12-135">psb_info &#8212; Return information about PSBLAS parallel environment</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.3 <a 
+href="userhtmlse7.html#x12-1080007.3" id="QQ2-12-136">psb_exit &#8212; Exit from PSBLAS parallel environment</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.4 <a 
+href="userhtmlse7.html#x12-1090007.4" id="QQ2-12-137">psb_get_mpi_comm &#8212; Get the MPI communicator</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.5 <a 
+href="userhtmlse7.html#x12-1100007.5" id="QQ2-12-138">psb_get_mpi_rank &#8212; Get the MPI rank</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.6 <a 
+href="userhtmlse7.html#x12-1110007.6" id="QQ2-12-139">psb_wtime &#8212; Wall clock timing</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.7 <a 
+href="userhtmlse7.html#x12-1120007.7" id="QQ2-12-140">psb_barrier &#8212; Sinchronization point parallel environment</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.8 <a 
+href="userhtmlse7.html#x12-1130007.8" id="QQ2-12-141">psb_abort &#8212; Abort a computation</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.9 <a 
+href="userhtmlse7.html#x12-1140007.9" id="QQ2-12-142">psb_bcast &#8212; Broadcast data</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.10 <a 
+href="userhtmlse7.html#x12-1150007.10" id="QQ2-12-143">psb_sum &#8212; Global sum</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.11 <a 
+href="userhtmlse7.html#x12-1160007.11" id="QQ2-12-144">psb_max &#8212; Global maximum</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.12 <a 
+href="userhtmlse7.html#x12-1170007.12" id="QQ2-12-145">psb_min &#8212; Global minimum</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.13 <a 
+href="userhtmlse7.html#x12-1180007.13" id="QQ2-12-146">psb_amx &#8212; Global maximum absolute value</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.14 <a 
+href="userhtmlse7.html#x12-1190007.14" id="QQ2-12-147">psb_amn &#8212; Global minimum absolute value</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.15 <a 
+href="userhtmlse7.html#x12-1200007.15" id="QQ2-12-148">psb_nrm2 &#8212; Global 2-norm reduction</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.16 <a 
+href="userhtmlse7.html#x12-1210007.16" id="QQ2-12-149">psb_snd &#8212; Send data</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >7.17 <a 
+href="userhtmlse7.html#x12-1220007.17" id="QQ2-12-150">psb_rcv &#8212; Receive data</a></span>
+<br />   &#x00A0;<span class="sectionToc" >8 <a 
+href="userhtmlse8.html#x13-1230008">Error handling</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >8.1 <a 
+href="userhtmlse8.html#x13-1240008.1" id="QQ2-13-154">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >8.2 <a 
+href="userhtmlse8.html#x13-1250008.2" id="QQ2-13-155">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >8.3 <a 
+href="userhtmlse8.html#x13-1260008.3" id="QQ2-13-156">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >8.4 <a 
+href="userhtmlse8.html#x13-1270008.4" id="QQ2-13-157">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
+<br />   &#x00A0;<span class="sectionToc" >9 <a 
+href="userhtmlse9.html#x14-1280009">Utilities</a></span>
                                                                  

                                                                  
-<br />   &#x00A0;<span class="subsectionToc" >10.6 <a 
-href="userhtmlsu89.html#x105-14100010.6">free &#8212; Free a preconditioner</a></span>
-<br />   <span class="sectionToc" >11 <a 
-href="userhtmlse11.html#x106-14200011">Iterative Methods</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >11.1 <a 
-href="userhtmlsu90.html#x107-14300011.1">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >9.1 <a 
+href="userhtmlse9.html#x14-1290009.1" id="QQ2-14-159"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >9.2 <a 
+href="userhtmlse9.html#x14-1300009.2" id="QQ2-14-160">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >9.3 <a 
+href="userhtmlse9.html#x14-1310009.3" id="QQ2-14-161">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >9.4 <a 
+href="userhtmlse9.html#x14-1320009.4" id="QQ2-14-162">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >9.5 <a 
+href="userhtmlse9.html#x14-1330009.5" id="QQ2-14-163">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >9.6 <a 
+href="userhtmlse9.html#x14-1340009.6" id="QQ2-14-164">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
+<br />   &#x00A0;<span class="sectionToc" >10 <a 
+href="userhtmlse10.html#x15-13500010">Preconditioner routines</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.1 <a 
+href="userhtmlse10.html#x15-13600010.1" id="QQ2-15-166">init &#8212; Initialize a preconditioner</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.2 <a 
+href="userhtmlse10.html#x15-13700010.2" id="QQ2-15-167">build &#8212; Builds a preconditioner</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.3 <a 
+href="userhtmlse10.html#x15-13800010.3" id="QQ2-15-168">apply &#8212; Preconditioner application routine</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.4 <a 
+href="userhtmlse10.html#x15-13900010.4" id="QQ2-15-169">descr &#8212; Prints a description of current preconditioner</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.5 <a 
+href="userhtmlse10.html#x15-14000010.5" id="QQ2-15-170">clone &#8212; clone current preconditioner</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >10.6 <a 
+href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-171">free &#8212; Free a preconditioner</a></span>
+<br />   &#x00A0;<span class="sectionToc" >11 <a 
+href="userhtmlse11.html#x17-14200011">Iterative Methods</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >11.1 <a 
+href="userhtmlse11.html#x17-14300011.1" id="QQ2-17-173">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
+<br />   &#x00A0;<span class="sectionToc" >12 <a 
+href="userhtmlse12.html#x19-14400012">Extensions</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >12.1 <a 
+href="userhtmlse12.html#x19-14500012.1" id="QQ2-19-175">Using the extensions</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >12.2 <a 
+href="userhtmlse12.html#x19-14600012.2" id="QQ2-19-176">Extensions&#8217; Data Structures</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >12.3 <a 
+href="userhtmlse12.html#x19-14700012.3" id="QQ2-19-179">CPU-class extensions</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" >12.4 <a 
+href="userhtmlse12.html#x19-15200012.4" id="QQ2-19-188">CUDA-class extensions</a></span>
+<br />   &#x00A0;<span class="sectionToc" >13 <a 
+href="userhtmlse13.html#x20-15300013">CUDA Environment Routines</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-191">psb_cuda_init</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-193">psb_cuda_exit</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-195">psb_cuda_DeviceSync</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-197">psb_cuda_getDeviceCount</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-199">psb_cuda_getDevice</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-201">psb_cuda_setDevice</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-203">psb_cuda_DeviceHasUVA</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-205">psb_cuda_WarpSize</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-207">psb_cuda_MultiProcessors</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-209">psb_cuda_MaxThreadsPerMP</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-211">psb_cuda_MaxRegisterPerBlock</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-213">psb_cuda_MemoryClockRate</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-215">psb_cuda_MemoryBusWidth</a></span>
+<br />   &#x00A0;&#x00A0;<span class="subsectionToc" ><a 
+href="userhtmlse13.html#Q1-20-217">psb_cuda_MemoryPeakBandwidth</a></span>
   </div>
                                                                  

--- a/docs/html/userhtmlli2.html
+++ b/docs/html/userhtmlli2.html
@ -11,26 +11,16 @@
 </head><body 
 >
   <!--l. 2--><div class="crosslinks"><p class="noindent">[<a 
-href="userhtmlse11.html" >prev</a>] [<a 
-href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a 
-href="userhtmlsu88.html#tailuserhtmlli2.html">tail</a>] [<a 
+href="userhtmlse13.html" >prev</a>] [<a 
+href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a 
+href="#tailuserhtmlli2.html">tail</a>] [<a 
 href="userhtml.html# " >up</a>] </p></div>
   <h3 class="likesectionHead"><a 
- id="x109-14400011.1"></a>References</h3>
+ id="x21-168000"></a>References</h3>
 <!--l. 2--><p class="noindent" >
    <div class="thebibliography">
    <p class="bibitem" ><span class="biblabel">
-  [1]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span>
-    <a 
- id="XDesPat:11"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, S.&#x00A0;Filippone and D.&#x00A0;Rouson <span 
-class="cmti-10">Design Patterns</span>
-    <span 
-class="cmti-10">for Scientific Computations on Sparse Matrices</span>, HPSS 2011, Algorithms
-    and Programming Tools for Next-Generation High-Performance Scientific
-    Software, Bordeaux, Sep. 2011
-    </p>
-    <p class="bibitem" ><span class="biblabel">
-  [2]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [1]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XPARA04FOREST"></a>G.&#x00A0;Bella, S.&#x00A0;Filippone, A.&#x00A0;De Maio and M.&#x00A0;Testa, <span 
 class="cmti-10">A Simulation Model</span>
    <span 
@ -40,45 +30,45 @@ class="cmti-10">for  Forest  Fires</span>,  in  J.&#x00A0;Dongarra,  K.&#x00A0;M
    2005.
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [3]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [2]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="X2007d"></a>A.  Buttari,  D.  di  Serafino,  P.  D&#8217;Ambra,  S.  Filippone, 2LEV-D2P4:
    a  package  of  high-performance  preconditioners,  Applicable  Algebra  in
    Engineering, Communications and Computing, Volume 18, Number 3, May,
    2007, pp. 223-239
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [4]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [3]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="X2007c"></a>P.  D&#8217;Ambra,  S.  Filippone,  D.  Di  Serafino   On  the  Development
    of  PSBLAS-based  Parallel  Two-level  Schwarz  Preconditioners   Applied
    Numerical  Mathematics,  Elsevier  Science,  Volume  57,  Issues  11-12,
    November-December 2007, Pages 1181-1196.
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [5]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [4]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XBLAS2"></a>Dongarra,  J.  J.,  DuCroz,  J.,  Hammarling,  S.  and  Hanson,  R.,  An
    Extended Set of Fortran Basic Linear Algebra Subprograms, ACM Trans.
    Math. Softw. vol.&#x00A0;14, 1&#8211;17, 1988.
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [6]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [5]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XBLAS3"></a>Dongarra, J., DuCroz, J., Hammarling, S. and Duff, I., A Set of level
    3 Basic Linear Algebra Subprograms, ACM Trans. Math. Softw. vol.&#x00A0;16,
    1&#8211;17, 1990.
-                                                                  
-
-                                                                  
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [7]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [6]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XBLACS"></a>J.&#x00A0;J.&#x00A0;Dongarra  and  R.&#x00A0;C.&#x00A0;Whaley,  <span 
 class="cmti-10">A  User&#8217;s  Guide  to  the  BLACS</span>
    <span 
 class="cmti-10">v.</span><span 
 class="cmti-10">&#x00A0;1.1</span>, Lapack Working Note 94, Tech.&#x00A0;Rep.&#x00A0;UT-CS-95-281, University of
    Tennessee, March 1995 (updated May 1997).
+                                                                  
+
+                                                                  
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [8]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [7]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="Xsblas97"></a>I.&#x00A0;Duff, M.&#x00A0;Marrone, G.&#x00A0;Radicati and C.&#x00A0;Vittoli, <span 
 class="cmti-10">Level 3 Basic Linear</span>
    <span 
@ -86,7 +76,7 @@ class="cmti-10">Algebra Subprograms for Sparse Matrices: a User Level Interface<
    Transactions on Mathematical Software, 23(3), pp.&#x00A0;379&#8211;401, 1997.
    </p>
    <p class="bibitem" ><span class="biblabel">
-  [9]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [8]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="Xsblas02"></a>I.&#x00A0;Duff, M.&#x00A0;Heroux and R.&#x00A0;Pozo, <span 
 class="cmti-10">An Overview of the Sparse Basic</span>
    <span 
@ -96,7 +86,7 @@ class="cmti-10">Forum</span>, ACM Transactions on Mathematical Software, 28(2),
    2002.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [10]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+  [9]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XPSBLAS"></a>S.&#x00A0;Filippone  and  M.&#x00A0;Colajanni,  <span 
 class="cmti-10">PSBLAS:  A  Library  for  Parallel</span>
    <span 
@ -104,7 +94,7 @@ class="cmti-10">Linear Algebra Computation on Sparse Matrices</span>,  ACM Trans
    Mathematical Software, 26(4), pp.&#x00A0;527&#8211;550, 2000.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [11]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [10]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XSparse03"></a>S.&#x00A0;Filippone  and  A.&#x00A0;Buttari,  <span 
 class="cmti-10">Object-Oriented Techniques for Sparse</span>
    <span 
@ -112,7 +102,7 @@ class="cmti-10">Matrix Computations in Fortran 2003</span>, ACM Transactions on
    Software, 38(4), 2012.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [12]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [11]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XKIVA3PSBLAS"></a>S.&#x00A0;Filippone,  P.&#x00A0;D&#8217;Ambra,  M.&#x00A0;Colajanni,  <span 
 class="cmti-10">Using  a  Parallel  Library</span>
    <span 
@ -123,14 +113,14 @@ class="cmti-10">Linux Clusters</span>, in G.&#x00A0;Joubert, A.&#x00A0;Murli, F.
    College Press, 2002.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [13]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [12]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XDesignPatterns"></a> Gamma, E.,  Helm, R.,  Johnson, R.,  and  Vlissides, J. 1995. <span 
 class="cmti-10">Design</span>
    <span 
 class="cmti-10">Patterns: Elements of Reusable Object-Oriented Software</span>. Addison-Wesley.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [14]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [13]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XMETIS"></a>Karypis, G. and Kumar, V., <span 
 class="cmti-10">METIS: Unstructured Graph Partitioning</span>
    <span 
@ -138,18 +128,18 @@ class="cmti-10">and Sparse Matrix Ordering System</span>. Minneapolis, MN 55455:
    of Minnesota, Department of Computer Science, 1995. Internet Address:
    <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">http://www.cs.umn.edu/~karypis</span></span></span>.
-                                                                  
-
-                                                                  
    </p>
    <p class="bibitem" ><span class="biblabel">
- [15]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [14]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XBLAS1"></a>Lawson,  C.,  Hanson,  R.,  Kincaid,  D.  and  Krogh,  F.,  Basic  Linear
    Algebra Subprograms for Fortran usage, ACM Trans. Math. Softw. vol.&#x00A0;5,
    38&#8211;329, 1979.
+                                                                  
+
+                                                                  
    </p>
    <p class="bibitem" ><span class="biblabel">
- [16]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [15]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="Xmachiels"></a>Machiels, L. and Deville, M. <span 
 class="cmti-10">Fortran 90: An entry to object-oriented</span>
    <span 
@ -157,12 +147,18 @@ class="cmti-10">programming for the solution of partial differential equations.
    Math. Softw. vol.&#x00A0;23, 32&#8211;49.
    </p>
    <p class="bibitem" ><span class="biblabel">
- [17]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ [16]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="Xmetcalf"></a>Metcalf, M., Reid, J. and Cohen, M. <span 
 class="cmti-10">Fortran 95/2003 explained. </span>Oxford
    University Press, 2004.
    </p>
    <p class="bibitem" ><span class="biblabel">
+ [17]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ id="XMRC:11"></a>Metcalf, M., Reid, J. and Cohen, M. <span 
+class="cmti-10">Modern Fortran explained. </span>Oxford
+    University Press, 2011.
+    </p>
+    <p class="bibitem" ><span class="biblabel">
 [18]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
 id="XRouXiaXu:11"></a>Rouson,  D.W.I.,  Xia,  J.,  Xu,  X.:  Scientific  Software  Design:  The
    Object-Oriented Way. Cambridge University Press (2011)
@ -172,15 +168,42 @@ class="cmti-10">Fortran 95/2003 explained. </span>Oxford
 id="XMPI1"></a>M.&#x00A0;Snir,  S.&#x00A0;Otto,  S.&#x00A0;Huss-Lederman,  D.&#x00A0;Walker  and  J.&#x00A0;Dongarra,
    <span 
 class="cmti-10">MPI: The Complete Reference. Volume 1 - The MPI Core</span>, second edition,
-    MIT Press, 1998.</p></div>
+    MIT Press, 1998.
+    </p>
+    <p class="bibitem" ><span class="biblabel">
+ [20]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span>
+    <a 
+ id="XDesPat:11"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, S.&#x00A0;Filippone and D.&#x00A0;Rouson <span 
+class="cmti-10">Design Patterns</span>
+    <span 
+class="cmti-10">for Scientific Computations on Sparse Matrices</span>, HPSS 2011, Algorithms
+    and Programming Tools for Next-Generation High-Performance Scientific
+    Software, Bordeaux, Sep. 2011
+    </p>
+    <p class="bibitem" ><span class="biblabel">
+ [21]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ id="XCaFiRo:2014"></a> Cardellini, V.,  Filippone, S.,  and  Rouson, D. 2014, Design patterns
+    for sparse-matrix computations on hybrid CPU/GPU platforms, <span 
+class="cmti-10">Scientific</span>
+    <span 
+class="cmti-10">Programming</span>&#x00A0;<span 
+class="cmti-10">22,</span>&#x00A0;1, 1&#8211;19.
+    </p>
+    <p class="bibitem" ><span class="biblabel">
+ [22]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a 
+ id="XOurTechRep"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, A.&#x00A0;Fanfarillo, S.&#x00A0;Filippone, Three storage
+    formats  for  sparse  matrices  on  GPGPUs,  Tech.  Rep.  DICII  RR-15.6,
+    Università di Roma Tor Vergata (February 2015).
+</p>
+    </div>
                                                                  

                                                                  
-   <!--l. 128--><div class="crosslinks"><p class="noindent">[<a 
-href="userhtmlse11.html" >prev</a>] [<a 
-href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a 
+   <!--l. 130--><div class="crosslinks"><p class="noindent">[<a 
+href="userhtmlse13.html" >prev</a>] [<a 
+href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a 
 href="userhtmlli2.html" >front</a>] [<a 
 href="userhtml.html# " >up</a>] </p></div>
-<!--l. 128--><p class="indent" >   <a 
+<!--l. 130--><p class="indent" >   <a 
 id="tailuserhtmlli2.html"></a> 
 </body></html> 
--- a/docs/html/userhtmlse1.html
+++ b/docs/html/userhtmlse1.html
@ -27,35 +27,35 @@ preprocessing sparse matrices, and contains additional routines for dense matrix
 operations. The current implementation of PSBLAS addresses a distributed memory
 execution model operating with message passing.
 <!--l. 14--><p class="indent" >   The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2003&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#Xmetcalf">17</a>]</span>
+href="userhtmlli2.html#Xmetcalf">16</a>]</span>
 programming language, with reuse and/or adaptation of existing Fortran&#x00A0;77 and
 Fortran&#x00A0;95 software, plus a handful of C routines.
 <!--l. 19--><p class="indent" >   The use of Fortran&#x00A0;2003 offers a number of advantages over Fortran&#x00A0;95, mostly in
 the handling of requirements for evolution and adaptation of the library to new
 computing architectures and integration of new algorithms. For a detailed discussion
 of our design see&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XSparse03">11</a>]</span>; other works discussing advanced programming in Fortran&#x00A0;2003
+href="userhtmlli2.html#XSparse03">10</a>]</span>; other works discussing advanced programming in Fortran&#x00A0;2003
 include&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XDesPat:11">1</a>,&#x00A0;<a 
+href="userhtmlli2.html#XDesPat:11">20</a>,&#x00A0;<a 
 href="userhtmlli2.html#XRouXiaXu:11">18</a>]</span>; sufficient support for Fortran&#x00A0;2003 is now available from many
 compilers, including the GNU Fortran compiler from the Free Software Foundation
 (as of version 4.8).
 <!--l. 30--><p class="indent" >   Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
 object-based design, with other languages; these have been advocated by a number of
 authors, e.g.&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#Xmachiels">16</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
+href="userhtmlli2.html#Xmachiels">15</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
 management and interface overloading greatly enhance the usability of the PSBLAS
 subroutines. In this way, the library can take care of runtime memory requirements
 that are quite difficult or even impossible to predict at implementation or
 compilation time.
 <!--l. 40--><p class="indent" >   The presentation of the PSBLAS library follows the general structure of the
 proposal for serial Sparse BLAS&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#Xsblas97">8</a>,&#x00A0;<a 
-href="userhtmlli2.html#Xsblas02">9</a>]</span>, which in its turn is based on the proposal for
+href="userhtmlli2.html#Xsblas97">7</a>,&#x00A0;<a 
+href="userhtmlli2.html#Xsblas02">8</a>]</span>, which in its turn is based on the proposal for
 BLAS on dense matrices&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XBLAS1">15</a>,&#x00A0;<a 
-href="userhtmlli2.html#XBLAS2">5</a>,&#x00A0;<a 
-href="userhtmlli2.html#XBLAS3">6</a>]</span>.
+href="userhtmlli2.html#XBLAS1">14</a>,&#x00A0;<a 
+href="userhtmlli2.html#XBLAS2">4</a>,&#x00A0;<a 
+href="userhtmlli2.html#XBLAS3">5</a>]</span>.
 <!--l. 45--><p class="indent" >   The applicability of sparse iterative solvers to many different areas causes some
 terminology problems because the same concept may be denoted through different
 names depending on the application area. The PSBLAS features presented in this
--- a/docs/html/userhtmlse10.html
+++ b/docs/html/userhtmlse10.html
@ -13,16 +13,18 @@
   <!--l. 1--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse9.html" >prev</a>] [<a 
 href="userhtmlse9.html#tailuserhtmlse9.html" >prev-tail</a>] [<a 
-href="userhtmlsu81.html#tailuserhtmlse10.html">tail</a>] [<a 
-href="userhtml.html#userhtmlsu86.html" >up</a>] </p></div>
+href="userhtmlse7.html#tailuserhtmlse10.html">tail</a>] [<a 
+href="userhtml.html#userhtmlse13.html" >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">10   </span> <a 
- id="x98-13500010"></a>Preconditioner routines</h3>
+ id="x15-13500010"></a>Preconditioner routines</h3>
 <!--l. 6--><p class="noindent" >The base PSBLAS library contains the implementation of two simple preconditioning
 techniques:
     <ul class="itemize1">
-     <li class="itemize">Diagonal Scaling
+     <li class="itemize">
+     <!--l. 9--><p class="noindent" >Diagonal Scaling
     </li>
-     <li class="itemize">Block Jacobi with ILU(0) factorization</li></ul>
+     <li class="itemize">
+     <!--l. 10--><p class="noindent" >Block Jacobi with ILU(0) factorization</li></ul>
 <!--l. 14--><p class="noindent" >The supporting data type and subroutine interfaces are defined in the module
 <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_prec_mod</span></span></span>. The old interfaces <span class="obeylines-h"><span class="verb"><span 
@ -32,33 +34,686 @@ supported for backward compatibility
                                                                  

                                                                  
-   <div class="subsectionTOCS">
-   &#x00A0;<span class="subsectionToc" >10.1 <a 
-href="userhtmlsu84.html#x99-13600010.1">init &#8212; Initialize a preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.2 <a 
-href="userhtmlsu85.html#x101-13700010.2">build &#8212; Builds a preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.3 <a 
-href="userhtmlsu86.html#x102-13800010.3">apply &#8212; Preconditioner application routine</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.4 <a 
-href="userhtmlsu87.html#x103-13900010.4">descr &#8212; Prints a description of current preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.5 <a 
-href="userhtmlsu88.html#x104-14000010.5">clone &#8212; clone current preconditioner</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >10.6 <a 
-href="userhtmlsu89.html#x105-14100010.6">free &#8212; Free a preconditioner</a></span>
-   </div>
+   <h4 class="subsectionHead"><span class="titlemark">10.1   </span> <a 
+ id="x15-13600010.1"></a>init &#8212; Initialize a preconditioner</h4>
+                                                                  
+
+                                                                  
+   <pre class="verbatim" id="verbatim-97">
+call&#x00A0;prec%init(icontxt,ptype,&#x00A0;info)
+</pre>
+<!--l. 30--><p class="nopar" >
+<!--l. 32--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 33--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 33--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 34--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 34--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 35--><p class="noindent" >
+<span 
+class="cmbx-10">icontxt</span> </dt><dd 
+class="description">
+     <!--l. 35--><p class="noindent" >the communication context.<br 
+class="newline" />Scope:<span 
+class="cmbx-10">global</span>.<br 
+class="newline" />Type:<span 
+class="cmbx-10">required</span>.<br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: an integer value.
+     </dd><dt class="description">
+     <!--l. 40--><p class="noindent" >
+<span 
+class="cmbx-10">ptype</span> </dt><dd 
+class="description">
+     <!--l. 40--><p class="noindent" >the type of preconditioner. Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a character string, see usage notes.
+     </dd><dt class="description">
+     <!--l. 53--><p class="noindent" >
+<span 
+class="cmbx-10">On Exit</span> </dt><dd 
+class="description">
+     <!--l. 53--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 55--><p class="noindent" >
+<span 
+class="cmbx-10">prec</span> </dt><dd 
+class="description">
+     <!--l. 55--><p class="noindent" >Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">inout</span>.<br 
+class="newline" />Specified as: a preconditioner data structure <a 
+href="userhtmlse3.html#precdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_prec</span><span 
+class="cmtt-10">_type</span></a>.
+                                                                  
+
+                                                                  
+     </dd><dt class="description">
+     <!--l. 60--><p class="noindent" >
+<span 
+class="cmbx-10">info</span> </dt><dd 
+class="description">
+     <!--l. 60--><p class="noindent" >Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">out</span>.<br 
+class="newline" />Error code: if no error, 0 is returned.</dd></dl>
+<!--l. 66--><p class="noindent" ><span 
+class="cmbx-12">Notes </span>Legal inputs to this subroutine are interpreted depending on the <span 
+class="cmmi-10">ptype </span>string as
+follows<span class="footnote-mark"><a 
+href="userhtml16.html#fn4x0"><sup class="textsuperscript">4</sup></a></span><a 
+ id="x15-136001f4"></a> :
+     <dl class="description"><dt class="description">
+     <!--l. 74--><p class="noindent" >
+<span 
+class="cmbx-10">NONE</span> </dt><dd 
+class="description">
+     <!--l. 74--><p class="noindent" >No preconditioning, i.e. the preconditioner is just a copy operator.
+     </dd><dt class="description">
+     <!--l. 76--><p class="noindent" >
+<span 
+class="cmbx-10">DIAG</span> </dt><dd 
+class="description">
+     <!--l. 76--><p class="noindent" >Diagonal  scaling;  each  entry  of  the  input  vector  is  multiplied  by  the
+     reciprocal  of  the  sum  of  the  absolute  values  of  the  coefficients  in  the
+     corresponding row of matrix <span 
+class="cmmi-10">A</span>;
+     </dd><dt class="description">
+     <!--l. 79--><p class="noindent" >
+<span 
+class="cmbx-10">BJAC</span> </dt><dd 
+class="description">
+     <!--l. 79--><p class="noindent" >Precondition by a factorization of the block-diagonal of matrix <span 
+class="cmmi-10">A</span>, where
+     block  boundaries  are  determined  by  the  data  allocation  boundaries
+     for  each  process;  requires  no  communication.  Only  the  incomplete
+     factorization <span 
+class="cmmi-10">ILU</span>(0) is currently implemented.</dd></dl>
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">10.2   </span> <a 
+ id="x15-13700010.2"></a>build &#8212; Builds a preconditioner</h4>
+                                                                  
+
+                                                                  
+   <pre class="verbatim" id="verbatim-98">
+call&#x00A0;prec%build(a,&#x00A0;desc_a,&#x00A0;info[,amold,vmold,imold])
+</pre>
+<!--l. 91--><p class="nopar" >
+<!--l. 93--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 94--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 94--><p class="noindent" >Synchronous.
+     </dd><dt class="description">
+     <!--l. 95--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 95--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 96--><p class="noindent" >
+<span 
+class="cmbx-10">a</span> </dt><dd 
+class="description">
+     <!--l. 96--><p class="noindent" >the system sparse matrix. Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>, target.<br 
+class="newline" />Specified as: a sparse matrix data structure <a 
+href="userhtmlse3.html#spdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_Tspmat</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 101--><p class="noindent" >
+<span 
+class="cmbx-10">prec</span> </dt><dd 
+class="description">
+     <!--l. 101--><p class="noindent" >the preconditioner.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">inout</span>.<br 
+class="newline" />Specified   as:   an   already   initialized   precondtioner   data   structure
+     <a 
+href="userhtmlse3.html#precdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_prec</span><span 
+class="cmtt-10">_type</span></a><br 
+class="newline" />
+     </dd><dt class="description">
+     <!--l. 106--><p class="noindent" >
+<span 
+class="cmbx-10">desc</span><span 
+class="cmbx-10">_a</span> </dt><dd 
+class="description">
+     <!--l. 106--><p class="noindent" >the problem communication descriptor. Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>, target.<br 
+class="newline" />Specified as: a communication descriptor data structure <a 
+href="userhtmlse3.html#descdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_desc</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 116--><p class="noindent" >
+<span 
+class="cmbx-10">amold</span> </dt><dd 
+class="description">
+                                                                  
+
+                                                                  
+     <!--l. 116--><p class="noindent" >The desired dynamic type for the internal matrix storage.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local</span>.<br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: an object of a class derived from <a 
+ id="spbasedata"></a><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_T</span><span 
+class="cmtt-10">_base</span><span 
+class="cmtt-10">_sparse</span><span 
+class="cmtt-10">_mat</span>.
+     </dd><dt class="description">
+     <!--l. 121--><p class="noindent" >
+<span 
+class="cmbx-10">vmold</span> </dt><dd 
+class="description">
+     <!--l. 121--><p class="noindent" >The desired dynamic type for the internal vector storage.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local</span>.<br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: an object of a class derived from <a 
+ id="vbasedata"></a><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_T</span><span 
+class="cmtt-10">_base</span><span 
+class="cmtt-10">_vect</span><span 
+class="cmtt-10">_type</span>.
+     </dd><dt class="description">
+     <!--l. 126--><p class="noindent" >
+<span 
+class="cmbx-10">imold</span> </dt><dd 
+class="description">
+     <!--l. 126--><p class="noindent" >The desired dynamic type for the internal integer vector storage.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local</span>.<br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified    as:    an    object    of    a    class    derived    from    (integer)
+     <a 
+ id="vbasedata"></a><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_T</span><span 
+class="cmtt-10">_base</span><span 
+class="cmtt-10">_vect</span><span 
+class="cmtt-10">_type</span>.</dd></dl>
+<!--l. 133--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 134--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 134--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 135--><p class="noindent" >
+<span 
+class="cmbx-10">prec</span> </dt><dd 
+class="description">
+     <!--l. 135--><p class="noindent" >the preconditioner.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">inout</span>.<br 
+class="newline" />Specified as: a precondtioner data structure <a 
+href="userhtmlse3.html#precdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_prec</span><span 
+class="cmtt-10">_type</span></a><br 
+class="newline" />
+     </dd><dt class="description">
+     <!--l. 140--><p class="noindent" >
+<span 
+class="cmbx-10">info</span> </dt><dd 
+class="description">
+     <!--l. 140--><p class="noindent" >Error code.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">out</span>.<br 
+class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
+                                                                  
+
+                                                                  
+<!--l. 146--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">amold</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">vmold</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">imold</span></span></span> arguments may be employed to interface with special
+devices, such as GPUs and other accelerators.
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">10.3   </span> <a 
+ id="x15-13800010.3"></a>apply &#8212; Preconditioner application routine</h4>
+                                                                  
+
+                                                                  
+   <pre class="verbatim" id="verbatim-99">
+call&#x00A0;prec%apply(x,y,desc_a,info,trans,work)
+call&#x00A0;prec%apply(x,desc_a,info,trans)
+</pre>
+<!--l. 158--><p class="nopar" >
+<!--l. 160--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 161--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 161--><p class="noindent" >Synchronous.
+     </dd><dt class="description">
+     <!--l. 162--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 162--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 163--><p class="noindent" >
+<span 
+class="cmbx-10">prec</span> </dt><dd 
+class="description">
+     <!--l. 163--><p class="noindent" >the preconditioner. Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a preconditioner data structure <a 
+href="userhtmlse3.html#precdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_prec</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 168--><p class="noindent" >
+<span 
+class="cmbx-10">x</span> </dt><dd 
+class="description">
+     <!--l. 168--><p class="noindent" >the source vector. Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">inout</span>.<br 
+class="newline" />Specified as: a rank one array or an object of type <a 
+href="userhtmlse3.html#vdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_T</span><span 
+class="cmtt-10">_vect</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 173--><p class="noindent" >
+<span 
+class="cmbx-10">desc</span><span 
+class="cmbx-10">_a</span> </dt><dd 
+class="description">
+     <!--l. 173--><p class="noindent" >the problem communication descriptor. Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a communication data structure <a 
+href="userhtmlse3.html#descdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_desc</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 178--><p class="noindent" >
+<span 
+class="cmbx-10">trans</span> </dt><dd 
+class="description">
+     <!--l. 178--><p class="noindent" >Scope:  <br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a character.
+                                                                  
+
+                                                                  
+     </dd><dt class="description">
+     <!--l. 183--><p class="noindent" >
+<span 
+class="cmbx-10">work</span> </dt><dd 
+class="description">
+     <!--l. 183--><p class="noindent" >an optional work space Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">inout</span>.<br 
+class="newline" />Specified as: a double precision array.</dd></dl>
+<!--l. 190--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 191--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 191--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 192--><p class="noindent" >
+<span 
+class="cmbx-10">y</span> </dt><dd 
+class="description">
+     <!--l. 192--><p class="noindent" >the destination vector. Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">inout</span>.<br 
+class="newline" />Specified as: a rank one array or an object of type <a 
+href="userhtmlse3.html#vdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_T</span><span 
+class="cmtt-10">_vect</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 197--><p class="noindent" >
+<span 
+class="cmbx-10">info</span> </dt><dd 
+class="description">
+     <!--l. 197--><p class="noindent" >Error code.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">out</span>.<br 
+class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">10.4   </span> <a 
+ id="x15-13900010.4"></a>descr &#8212; Prints a description of current preconditioner</h4>
+                                                                  
+
+                                                                  
+   <pre class="verbatim" id="verbatim-100">
+call&#x00A0;prec%descr(info)
+call&#x00A0;prec%descr(info,iout,&#x00A0;root)
+</pre>
+<!--l. 212--><p class="nopar" >
+<!--l. 214--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 215--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 215--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 216--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 216--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 217--><p class="noindent" >
+<span 
+class="cmbx-10">prec</span> </dt><dd 
+class="description">
+     <!--l. 217--><p class="noindent" >the preconditioner. Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a preconditioner data structure <a 
+href="userhtmlse3.html#precdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_prec</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 222--><p class="noindent" >
+<span 
+class="cmbx-10">iout</span> </dt><dd 
+class="description">
+     <!--l. 222--><p class="noindent" >output unit. Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: an integer number. Default: default output unit.
+     </dd><dt class="description">
+     <!--l. 227--><p class="noindent" >
+<span 
+class="cmbx-10">root</span> </dt><dd 
+class="description">
+     <!--l. 227--><p class="noindent" >Process from which to print Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified  as:  an  integer  number  between  0  and  <span 
+class="cmmi-10">np </span><span 
+class="cmsy-10">- </span>1,  in  which  case
+     the specified process will print the description, or <span 
+class="cmsy-10">-</span>1, in which case all
+     processes will print. Default: 0.
+     </dd><dt class="description">
+     <!--l. 234--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 234--><p class="noindent" >
+                                                                  
+
+                                                                  
+     </dd><dt class="description">
+     <!--l. 235--><p class="noindent" >
+<span 
+class="cmbx-10">info</span> </dt><dd 
+class="description">
+     <!--l. 235--><p class="noindent" >Error code.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">out</span>.<br 
+class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">10.5   </span> <a 
+ id="x15-14000010.5"></a>clone &#8212; clone current preconditioner</h4>
+                                                                  

+                                                                  
+   <pre class="verbatim" id="verbatim-101">
+call&#x00A0;&#x00A0;prec%clone(precout,info)
+</pre>
+<!--l. 248--><p class="nopar" >
+<!--l. 250--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 251--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 251--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 252--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 252--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 253--><p class="noindent" >
+<span 
+class="cmbx-10">prec</span> </dt><dd 
+class="description">
+     <!--l. 253--><p class="noindent" >the preconditioner.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local</span>.<br 
+class="newline" /></dd></dl>
+<!--l. 260--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 261--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 261--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 262--><p class="noindent" >
+<span 
+class="cmbx-10">precout</span> </dt><dd 
+class="description">
+     <!--l. 262--><p class="noindent" >A copy of the input object.
+     </dd><dt class="description">
+     <!--l. 263--><p class="noindent" >
+<span 
+class="cmbx-10">info</span> </dt><dd 
+class="description">
+     <!--l. 263--><p class="noindent" >Return code.</dd></dl>
+                                                                  

+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">10.6   </span> <a 
+ id="x15-14100010.6"></a>free &#8212; Free a preconditioner</h4>
+                                                                  

+                                                                  
+   <pre class="verbatim" id="verbatim-102">
+call&#x00A0;prec%free(info)
+</pre>
+<!--l. 271--><p class="nopar" >
+<!--l. 273--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 274--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 274--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 275--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 275--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 276--><p class="noindent" >
+<span 
+class="cmbx-10">prec</span> </dt><dd 
+class="description">
+     <!--l. 276--><p class="noindent" >the preconditioner.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local</span>.<br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">inout</span>.<br 
+class="newline" />Specified as: a preconditioner data structure <a 
+href="userhtmlse3.html#precdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_prec</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 289--><p class="noindent" >
+<span 
+class="cmbx-10">On Exit</span> </dt><dd 
+class="description">
+     <!--l. 289--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 291--><p class="noindent" >
+<span 
+class="cmbx-10">prec</span> </dt><dd 
+class="description">
+     <!--l. 291--><p class="noindent" >Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">inout</span>.<br 
+class="newline" />Specified as: a preconditioner data structure <a 
+href="userhtmlse3.html#precdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_prec</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 296--><p class="noindent" >
+<span 
+class="cmbx-10">info</span> </dt><dd 
+class="description">
+     <!--l. 296--><p class="noindent" >Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">out</span>.<br 
+class="newline" />Error code: if no error, 0 is returned.</dd></dl>
+<!--l. 302--><p class="noindent" ><span 
+class="cmbx-12">Notes </span>Releases all internal storage.
+                                                                  

+                                                                  
+                                                                  
+
+                                                                  
+                                                                  

                                                                  
+                                                                  

                                                                  
   <!--l. 1--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse9.html" >prev</a>] [<a 
 href="userhtmlse9.html#tailuserhtmlse9.html" >prev-tail</a>] [<a 
 href="userhtmlse10.html" >front</a>] [<a 
-href="userhtml.html#userhtmlsu86.html" >up</a>] </p></div>
+href="userhtml.html#userhtmlse13.html" >up</a>] </p></div>
 <!--l. 1--><p class="indent" >   <a 
- id="tailuserhtmlse10.html"></a>  
+ id="tailuserhtmlse10.html"></a>   
 </body></html> 
--- a/docs/html/userhtmlse11.html
+++ b/docs/html/userhtmlse11.html
@ -13,10 +13,10 @@
   <!--l. 1--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse10.html" >prev</a>] [<a 
 href="userhtmlse10.html#tailuserhtmlse10.html" >prev-tail</a>] [<a 
-href="userhtmlsu86.html#tailuserhtmlse11.html">tail</a>] [<a 
+href="userhtmlse8.html#tailuserhtmlse11.html">tail</a>] [<a 
 href="userhtml.html# " >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">11   </span> <a 
- id="x106-14200011"></a>Iterative Methods</h3>
+ id="x17-14200011"></a>Iterative Methods</h3>
 <!--l. 4--><p class="noindent" >In this chapter we provide routines for preconditioners and iterative methods.
 The interfaces for Krylov subspace methods are available in the module
 <span class="obeylines-h"><span class="verb"><span 
@ -24,18 +24,452 @@ class="cmtt-10">psb_krylov_mod</span></span></span>.
                                                                  

                                                                  
-   <div class="subsectionTOCS">
-   &#x00A0;<span class="subsectionToc" >11.1 <a 
-href="userhtmlsu90.html#x107-14300011.1">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
-   </div>
+   <h4 class="subsectionHead"><span class="titlemark">11.1   </span> <a 
+ id="x17-14300011.1"></a>psb_krylov &#8212; Krylov Methods Driver Routine</h4>
+<!--l. 17--><p class="noindent" >This subroutine is a driver that provides a general interface for all the Krylov-Subspace
+family methods implemented in PSBLAS version 2.
+<!--l. 20--><p class="indent" >   The stopping criterion can take the following values:
+     <dl class="description"><dt class="description">
+     <!--l. 22--><p class="noindent" >
+<span 
+class="cmbx-10">1</span> </dt><dd 
+class="description">
+     <!--l. 22--><p class="noindent" >normwise backward error in the infinity norm; the iteration is stopped
+     when
+<div class="math-display" >
+<img 
+src="userhtml30x.png" alt="      -----&#x2225;ri&#x2225;------
+err = (&#x2225;A&#x2225;&#x2225;xi&#x2225;+ &#x2225;b&#x2225;) &#x003C; eps
+" class="math-display" ></div>
+     <!--l. 24--><p class="nopar" >
+     </dd><dt class="description">
+     <!--l. 25--><p class="noindent" >
+<span 
+class="cmbx-10">2</span> </dt><dd 
+class="description">
+     <!--l. 25--><p class="noindent" >Relative residual in the 2-norm; the iteration is stopped when
+<div class="math-display" >
+<img 
+src="userhtml31x.png" alt="      &#x2225;ri&#x2225;-
+err = &#x2225;b&#x2225;2 &#x003C; eps
+" class="math-display" ></div>
+     <!--l. 27--><p class="nopar" >
+     </dd><dt class="description">
+     <!--l. 28--><p class="noindent" >
+<span 
+class="cmbx-10">3</span> </dt><dd 
+class="description">
+     <!--l. 28--><p class="noindent" >Relative residual reduction in the 2-norm; the iteration is stopped when
+<div class="math-display" >
+<img 
+src="userhtml32x.png" alt="      &#x2225;ri&#x2225;
+err = &#x2225;r0&#x2225;2 &#x003C; eps
+" class="math-display" ></div>
+     <!--l. 30--><p class="nopar" ></dd></dl>
+<!--l. 32--><p class="noindent" >The behaviour is controlled by the istop argument (see later). In the above formulae, <span 
+class="cmmi-10">x</span><sub><span 
+class="cmmi-7">i</span></sub>
+is the tentative solution and <span 
+class="cmmi-10">r</span><sub><span 
+class="cmmi-7">i</span></sub> = <span 
+class="cmmi-10">b </span><span 
+class="cmsy-10">- </span><span 
+class="cmmi-10">Ax</span><sub><span 
+class="cmmi-7">i</span></sub> the corresponding residual at the <span 
+class="cmmi-10">i</span>-th
+iteration.
+   <!--l. 37-->
                                                                  

                                                                  
-   <!--l. 2--><div class="crosslinks"><p class="noindent">[<a 
+   <pre class="lstlisting" id="listing-167"><span class="label"><a 
+ id="x17-143001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">psb_krylov</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">method</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">a</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">prec</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">b</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">x</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">eps</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">desc_a</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">info</span></span><span style="color:#000000"><span 
+class="cmtt-10">,&amp;</span></span> 
+<span class="label"><a 
+ id="x17-143002r2"></a></span><span 
+class="cmtt-10">&#x00A0;</span><span 
+class="cmtt-10">&#x00A0;</span><span 
+class="cmtt-10">&#x00A0;</span><span 
+class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-10">&amp;</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">itmax</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">iter</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">err</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">itrace</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">irst</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">istop</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"><span 
+class="cmtt-10">cond</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 42--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 43--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 43--><p class="noindent" >Synchronous.
+     </dd><dt class="description">
+     <!--l. 44--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 44--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 45--><p class="noindent" >
+<span 
+class="cmbx-10">method</span> </dt><dd 
+class="description">
+     <!--l. 45--><p class="noindent" >a string that defines the iterative method to be used. Supported values
+     are:
+         <dl class="description"><dt class="description">
+         <!--l. 48--><p class="noindent" >
+     <span 
+class="cmbx-10">CG:</span> </dt><dd 
+class="description">
+         <!--l. 48--><p class="noindent" >the Conjugate Gradient method;
+         </dd><dt class="description">
+         <!--l. 49--><p class="noindent" >
+     <span 
+class="cmbx-10">CGS:</span> </dt><dd 
+class="description">
+         <!--l. 49--><p class="noindent" >the Conjugate Gradient Stabilized method;
+         </dd><dt class="description">
+         <!--l. 51--><p class="noindent" >
+     <span 
+class="cmbx-10">GCR:</span> </dt><dd 
+class="description">
+         <!--l. 51--><p class="noindent" >the Generalized Conjugate Residual method;
+         </dd><dt class="description">
+         <!--l. 52--><p class="noindent" >
+     <span 
+class="cmbx-10">FCG:</span> </dt><dd 
+class="description">
+         <!--l. 52--><p class="noindent" >the Flexible Conjugate Gradient method<span class="footnote-mark"><a 
+href="userhtml18.html#fn5x0"><sup class="textsuperscript">5</sup></a></span><a 
+ id="x17-143003f5"></a> ;
+         </dd><dt class="description">
+         <!--l. 55--><p class="noindent" >
+     <span 
+class="cmbx-10">BICG:</span> </dt><dd 
+class="description">
+         <!--l. 55--><p class="noindent" >the Bi-Conjugate Gradient method;
+         </dd><dt class="description">
+         <!--l. 56--><p class="noindent" >
+     <span 
+class="cmbx-10">BICGSTAB:</span> </dt><dd 
+class="description">
+         <!--l. 56--><p class="noindent" >the Bi-Conjugate Gradient Stabilized method;
+                                                                  
+
+                                                                  
+         </dd><dt class="description">
+         <!--l. 57--><p class="noindent" >
+     <span 
+class="cmbx-10">BICGSTABL:</span> </dt><dd 
+class="description">
+         <!--l. 57--><p class="noindent" >the Bi-Conjugate Gradient Stabilized method with restarting;
+         </dd><dt class="description">
+         <!--l. 58--><p class="noindent" >
+     <span 
+class="cmbx-10">RGMRES:</span> </dt><dd 
+class="description">
+         <!--l. 58--><p class="noindent" >the Generalized Minimal Residual method with restarting.</dd></dl>
+     </dd><dt class="description">
+     <!--l. 60--><p class="noindent" >
+<span 
+class="cmbx-10">a</span> </dt><dd 
+class="description">
+     <!--l. 60--><p class="noindent" >the local portion of global sparse matrix <span 
+class="cmmi-10">A</span>. <br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a structured data of type <a 
+href="userhtmlse3.html#spdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_Tspmat</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 66--><p class="noindent" >
+<span 
+class="cmbx-10">prec</span> </dt><dd 
+class="description">
+     <!--l. 66--><p class="noindent" >The data structure containing the preconditioner.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a structured data of type <a 
+href="userhtmlse3.html#precdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_prec</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 71--><p class="noindent" >
+<span 
+class="cmbx-10">b</span> </dt><dd 
+class="description">
+     <!--l. 71--><p class="noindent" >The RHS vector. <br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a rank one array or an object of type <a 
+href="userhtmlse3.html#vdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_T</span><span 
+class="cmtt-10">_vect</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 76--><p class="noindent" >
+<span 
+class="cmbx-10">x</span> </dt><dd 
+class="description">
+     <!--l. 76--><p class="noindent" >The initial guess. <br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">inout</span>.<br 
+class="newline" />Specified as: a rank one array or an object of type <a 
+href="userhtmlse3.html#vdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_T</span><span 
+class="cmtt-10">_vect</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 81--><p class="noindent" >
+<span 
+class="cmbx-10">eps</span> </dt><dd 
+class="description">
+                                                                  
+
+                                                                  
+     <!--l. 81--><p class="noindent" >The stopping tolerance. <br 
+class="newline" />Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a real number.
+     </dd><dt class="description">
+     <!--l. 86--><p class="noindent" >
+<span 
+class="cmbx-10">desc</span><span 
+class="cmbx-10">_a</span> </dt><dd 
+class="description">
+     <!--l. 86--><p class="noindent" >contains data structures for communications.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a structured data of type <a 
+href="userhtmlse3.html#descdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_desc</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 91--><p class="noindent" >
+<span 
+class="cmbx-10">itmax</span> </dt><dd 
+class="description">
+     <!--l. 91--><p class="noindent" >The maximum number of iterations to perform.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Default: <span 
+class="cmmi-10">itmax </span>= 1000.<br 
+class="newline" />Specified as: an integer variable <span 
+class="cmmi-10">itmax </span><span 
+class="cmsy-10">&#x2265; </span>1.
+     </dd><dt class="description">
+     <!--l. 97--><p class="noindent" >
+<span 
+class="cmbx-10">itrace</span> </dt><dd 
+class="description">
+     <!--l. 97--><p class="noindent" >If <span 
+class="cmmi-10">&#x003E; </span>0 print out an informational message about convergence every <span 
+class="cmmi-10">itrace</span>
+     iterations. If = 0 print a message in case of convergence failure.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Default: <span 
+class="cmmi-10">itrace </span>= <span 
+class="cmsy-10">-</span>1.<br 
+class="newline" />
+     </dd><dt class="description">
+     <!--l. 104--><p class="noindent" >
+<span 
+class="cmbx-10">irst</span> </dt><dd 
+class="description">
+     <!--l. 104--><p class="noindent" >An integer specifying the restart parameter.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Values: <span 
+class="cmmi-10">irst &#x003E; </span>0. This is employed for the BiCGSTABL or RGMRES methods,
+     otherwise it is ignored.
+     </dd><dt class="description">
+     <!--l. 111--><p class="noindent" >
+<span 
+class="cmbx-10">istop</span> </dt><dd 
+class="description">
+                                                                  
+
+                                                                  
+     <!--l. 111--><p class="noindent" >An integer specifying the stopping criterion.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Values: 1: use the normwise backward error, 2: use the scaled 2-norm
+     of the residual, 3: use the residual reduction in the 2-norm. Default:
+     2.
+     </dd><dt class="description">
+     <!--l. 117--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 117--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 118--><p class="noindent" >
+<span 
+class="cmbx-10">x</span> </dt><dd 
+class="description">
+     <!--l. 118--><p class="noindent" >The computed solution. <br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">inout</span>.<br 
+class="newline" />Specified as: a rank one array or an object of type <a 
+href="userhtmlse3.html#vdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_T</span><span 
+class="cmtt-10">_vect</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 123--><p class="noindent" >
+<span 
+class="cmbx-10">iter</span> </dt><dd 
+class="description">
+     <!--l. 123--><p class="noindent" >The number of iterations performed.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">out</span>.<br 
+class="newline" />Returned as: an integer variable.
+     </dd><dt class="description">
+     <!--l. 128--><p class="noindent" >
+<span 
+class="cmbx-10">err</span> </dt><dd 
+class="description">
+     <!--l. 128--><p class="noindent" >The convergence estimate on exit.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">out</span>.<br 
+class="newline" />Returned as: a real number.
+     </dd><dt class="description">
+     <!--l. 133--><p class="noindent" >
+<span 
+class="cmbx-10">cond</span> </dt><dd 
+class="description">
+     <!--l. 133--><p class="noindent" >An estimate of the condition number of matrix <span 
+class="cmmi-10">A</span>; only available with the <span 
+class="cmmi-10">CG</span>
+     method on real data.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">out</span>.<br 
+class="newline" />Returned as: a real number. A correct result will be greater than or
+     equal to one; if specified for non-real data, or an error occurred, zero is
+     returned.
+     </dd><dt class="description">
+     <!--l. 141--><p class="noindent" >
+<span 
+class="cmbx-10">info</span> </dt><dd 
+class="description">
+     <!--l. 141--><p class="noindent" >Error code.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">out</span>.<br 
+class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+   <!--l. 1--><div class="crosslinks"><p class="noindent">[<a 
 href="userhtmlse10.html" >prev</a>] [<a 
 href="userhtmlse10.html#tailuserhtmlse10.html" >prev-tail</a>] [<a 
 href="userhtmlse11.html" >front</a>] [<a 
 href="userhtml.html# " >up</a>] </p></div>
-<!--l. 2--><p class="indent" >   <a 
- id="tailuserhtmlse11.html"></a>  
+<!--l. 1--><p class="indent" >   <a 
+ id="tailuserhtmlse11.html"></a>   
 </body></html> 
--- a/docs/html/userhtmlse12.html
+++ b/docs/html/userhtmlse12.html
@ -0,0 +1,921 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"  
+  "http://www.w3.org/TR/html4/loose.dtd">  
+<html > 
+<head><title>Extensions</title> 
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 
+<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)"> 
+<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)"> 
+<!-- html,3 --> 
+<meta name="src" content="userhtml.tex"> 
+<link rel="stylesheet" type="text/css" href="userhtml.css"> 
+</head><body 
+>
+   <!--l. 1--><div class="crosslinks"><p class="noindent">[<a 
+href="userhtmlse11.html" >prev</a>] [<a 
+href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a 
+href="userhtmlse9.html#tailuserhtmlse12.html">tail</a>] [<a 
+href="userhtml.html# " >up</a>] </p></div>
+   <h3 class="sectionHead"><span class="titlemark">12   </span> <a 
+ id="x19-14400012"></a>Extensions</h3>
+<!--l. 3--><p class="noindent" >The EXT, CUDA and RSB subdirectories contains a set of extensions to the base
+library. The extensions provide additional storage formats beyond the ones already
+contained in the base library, as well as interfaces to:
+     <dl class="description"><dt class="description">
+     <!--l. 8--><p class="noindent" >
+<span 
+class="cmbx-10">SPGPU</span> </dt><dd 
+class="description">
+     <!--l. 8--><p class="noindent" >a                      CUDA                      library                      originally
+     published as <a 
+href="https://code.google.com/p/spgpu/" class="url" ><span 
+class="cmtt-10">https://code.google.com/p/spgpu/</span></a> and now included
+     in the <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">cuda</span></span></span> subdir, for computations on NVIDIA GPUs;
+     </dd><dt class="description">
+     <!--l. 11--><p class="noindent" >
+<span 
+class="cmbx-10">LIBRSB</span> </dt><dd 
+class="description">
+     <!--l. 11--><p class="noindent" ><a 
+href="http://sourceforge.net/projects/librsb/" class="url" ><span 
+class="cmtt-10">http://sourceforge.net/projects/librsb/</span></a>, for computations on
+     multicore parallel machines.</dd></dl>
+<!--l. 14--><p class="noindent" >The infrastructure laid out in the base library to allow for these extensions is detailed in
+the references&#x00A0;<span class="cite">[<a 
+href="userhtmlli2.html#XDesPat:11">20</a>,&#x00A0;<a 
+href="userhtmlli2.html#XCaFiRo:2014">21</a>,&#x00A0;<a 
+href="userhtmlli2.html#XSparse03">10</a>]</span>; the CUDA-specific data formats are described
+in&#x00A0;<span class="cite">[<a 
+href="userhtmlli2.html#XOurTechRep">22</a>]</span>.
+<!--l. 19--><p class="noindent" >
+   <h4 class="subsectionHead"><span class="titlemark">12.1   </span> <a 
+ id="x19-14500012.1"></a>Using the extensions</h4>
+<!--l. 21--><p class="noindent" >A sample application using the PSBLAS extensions will contain the following
+steps:
+     <ul class="itemize1">
+     <li class="itemize">
+     <!--l. 24--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">USE</span></span></span> the appropriat modules (<span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_ext_mod</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cuda_mod</span></span></span>);
+     </li>
+     <li class="itemize">
+     <!--l. 26--><p class="noindent" >Declare     a     <span 
+class="cmti-10">mold     </span>variable     of     the     necessary     type     (e.g.
+     <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_d_ell_sparse_mat</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_d_hlg_sparse_mat</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_d_vect_cuda</span></span></span>);
+     </li>
+     <li class="itemize">
+     <!--l. 29--><p class="noindent" >Pass the mold variable to the base library interface where needed to ensure
+     the appropriate dynamic type.</li></ul>
+                                                                  
+
+                                                                  
+<!--l. 32--><p class="noindent" >Suppose you want to use the CUDA-enabled ELLPACK data structure; you would use a
+piece of code like this (and don&#8217;t forget, you need CUDA-side vectors along with the
+matrices):
+<div class="center" 
+>
+<!--l. 85--><p class="noindent" >
+                                                                  
+
+                                                                  
+<div class="minipage"><pre class="verbatim" id="verbatim-103">
+program&#x00A0;my_cuda_test
+&#x00A0;&#x00A0;use&#x00A0;psb_base_mod
+&#x00A0;&#x00A0;use&#x00A0;psb_util_mod
+&#x00A0;&#x00A0;use&#x00A0;psb_ext_mod
+&#x00A0;&#x00A0;use&#x00A0;psb_cuda_mod
+&#x00A0;&#x00A0;type(psb_dspmat_type)&#x00A0;::&#x00A0;a,&#x00A0;agpu
+&#x00A0;&#x00A0;type(psb_d_vect_type)&#x00A0;::&#x00A0;x,&#x00A0;xg,&#x00A0;bg
+
+&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;::&#x00A0;xtmp(:)
+&#x00A0;&#x00A0;type(psb_d_vect_cuda)&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;::&#x00A0;vmold
+&#x00A0;&#x00A0;type(psb_d_elg_sparse_mat)&#x00A0;::&#x00A0;aelg
+&#x00A0;&#x00A0;type(psb_ctxt_type)&#x00A0;::&#x00A0;ctxt
+&#x00A0;&#x00A0;integer&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;::&#x00A0;iam,&#x00A0;np
+
+
+&#x00A0;&#x00A0;call&#x00A0;psb_init(ctxt)
+&#x00A0;&#x00A0;call&#x00A0;psb_info(ctxt,iam,np)
+&#x00A0;&#x00A0;call&#x00A0;psb_cuda_init(ctxt,&#x00A0;iam)
+
+
+&#x00A0;&#x00A0;!&#x00A0;My&#x00A0;own&#x00A0;home-grown&#x00A0;matrix&#x00A0;generator
+&#x00A0;&#x00A0;call&#x00A0;gen_matrix(ctxt,idim,desc_a,a,x,info)
+&#x00A0;&#x00A0;if&#x00A0;(info&#x00A0;/=&#x00A0;0)&#x00A0;goto&#x00A0;9999
+
+&#x00A0;&#x00A0;call&#x00A0;a%cscnv(agpu,info,mold=aelg)
+&#x00A0;&#x00A0;if&#x00A0;(info&#x00A0;/=&#x00A0;0)&#x00A0;goto&#x00A0;9999
+&#x00A0;&#x00A0;xtmp&#x00A0;=&#x00A0;x%get_vect()
+&#x00A0;&#x00A0;call&#x00A0;xg%bld(xtmp,mold=vmold)
+&#x00A0;&#x00A0;call&#x00A0;bg%bld(size(xtmp),mold=vmold)
+
+&#x00A0;&#x00A0;!&#x00A0;Do&#x00A0;sparse&#x00A0;MV
+&#x00A0;&#x00A0;call&#x00A0;psb_spmm(done,agpu,xg,dzero,bg,desc_a,info)
+
+
+9999&#x00A0;continue
+&#x00A0;&#x00A0;if&#x00A0;(info&#x00A0;==&#x00A0;0)&#x00A0;then
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;write(*,*)&#x00A0;&#8217;42&#8217;
+&#x00A0;&#x00A0;else
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;write(*,*)&#x00A0;&#8217;Something&#x00A0;went&#x00A0;wrong&#x00A0;&#8217;,info
+&#x00A0;&#x00A0;end&#x00A0;if
+
+
+&#x00A0;&#x00A0;call&#x00A0;psb_cuda_exit()
+&#x00A0;&#x00A0;call&#x00A0;psb_exit(ctxt)
+&#x00A0;&#x00A0;stop
+end&#x00A0;program&#x00A0;my_cuda_test
+</pre>
+<!--l. 134--><p class="nopar" >                                                           </div></div>
+                                                                  
+
+                                                                  
+<!--l. 139--><p class="indent" >   A full example of this strategy can be seen in the <span 
+class="cmtt-10">test/ext/kernel </span>and
+<span 
+class="cmtt-10">test/cuda/kernel </span>subdirectories, where we provide sample programs to test the
+speed of the sparse matrix-vector product with the various data structures included
+in the library.
+<!--l. 146--><p class="noindent" >
+   <h4 class="subsectionHead"><span class="titlemark">12.2   </span> <a 
+ id="x19-14600012.2"></a>Extensions&#8217; Data Structures</h4>
+<!--l. 150--><p class="noindent" >Access to the facilities provided by the EXT library is mainly achieved through
+the data types that are provided within. The data classes are derived from
+the base classes in PSBLAS, through the Fortran&#x00A0;2003 mechanism of <span 
+class="cmti-10">type</span>
+<span 
+class="cmti-10">extension</span>&#x00A0;<span class="cite">[<a 
+href="userhtmlli2.html#XMRC:11">17</a>]</span>.
+<!--l. 155--><p class="indent" >   The data classes are divided between the general purpose CPU extensions, the
+GPU interfaces and the RSB interfaces. In the description we will make use of the
+notation introduced in Table&#x00A0;<a 
+href="#x19-146001r21">21<!--tex4ht:ref: tab:notation --></a>.
+   <div class="table">
+                                                                  
+
+                                                                  
+<!--l. 160--><p class="indent" >   <a 
+ id="x19-146001r21"></a><hr class="float"><div class="float" 
+>
+                                                                  
+
+                                                                  
+ <div class="caption" 
+><span class="id">Table&#x00A0;21: </span><span  
+class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-146001r21 -->
+<div class="center" 
+>
+<!--l. 162--><p class="noindent" >
+<div class="tabular"> <table id="TBL-23" class="tabular" 
+ 
+><colgroup id="TBL-23-1g"><col 
+id="TBL-23-1"><col 
+id="TBL-23-2"></colgroup><tr 
+class="hline"><td><hr></td><td><hr></td></tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-1-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-1-1"  
+class="td11"><span 
+class="cmr-8">Name     </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-1-2"  
+class="td11"><span 
+class="cmr-8">Description                                    </span></td>
+</tr><tr 
+class="hline"><td><hr></td><td><hr></td></tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-2-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-2-1"  
+class="td11"><span 
+class="cmr-8">M           </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-2-2"  
+class="td11"><span 
+class="cmr-8">Number of rows in matrix                 </span></td></tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-3-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-3-1"  
+class="td11"><span 
+class="cmr-8">N </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-3-2"  
+class="td11"><span 
+class="cmr-8">Number of columns in matrix</span></td>
+</tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-4-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-4-1"  
+class="td11"><span 
+class="cmr-8">NZ         </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-4-2"  
+class="td11"><span 
+class="cmr-8">Number of nonzeros in matrix           </span></td></tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-5-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-5-1"  
+class="td11"><span 
+class="cmr-8">AVGNZR </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-5-2"  
+class="td11"><span 
+class="cmr-8">Average number of nonzeros per row</span></td>
+</tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-6-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-6-1"  
+class="td11"><span 
+class="cmr-8">MAXNZR</span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-6-2"  
+class="td11"><span 
+class="cmr-8">Maximum number of nonzeros per row</span></td>
+</tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-7-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-7-1"  
+class="td11"><span 
+class="cmr-8">NDIAG   </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-7-2"  
+class="td11"><span 
+class="cmr-8">Numero of nonzero diagonals             </span></td>
+</tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-8-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-8-1"  
+class="td11"><span 
+class="cmr-8">AS          </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-8-2"  
+class="td11"><span 
+class="cmr-8">Coefficients array                            </span></td>
+</tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-9-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-9-1"  
+class="td11"><span 
+class="cmr-8">IA          </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-9-2"  
+class="td11"><span 
+class="cmr-8">Row indices array                           </span></td>
+</tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-10-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-10-1"  
+class="td11"><span 
+class="cmr-8">JA          </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-10-2"  
+class="td11"><span 
+class="cmr-8">Column indices array                       </span></td>
+</tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-11-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-11-1"  
+class="td11"><span 
+class="cmr-8">IRP        </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-11-2"  
+class="td11"><span 
+class="cmr-8">Row start pointers array                   </span></td>
+</tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-12-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-12-1"  
+class="td11"><span 
+class="cmr-8">JCP        </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-12-2"  
+class="td11"><span 
+class="cmr-8">Column start pointers array              </span></td>
+</tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-13-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-13-1"  
+class="td11"><span 
+class="cmr-8">NZR       </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-13-2"  
+class="td11"><span 
+class="cmr-8">Number of nonzeros per row array      </span></td>
+</tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-14-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-14-1"  
+class="td11"><span 
+class="cmr-8">OFFSET </span></td><td  style="white-space:nowrap; text-align:left;" id="TBL-23-14-2"  
+class="td11"><span 
+class="cmr-8">Offset for diagonals                         </span></td>
+</tr><tr 
+class="hline"><td><hr></td><td><hr></td></tr><tr  
+ style="vertical-align:baseline;" id="TBL-23-15-"><td  style="white-space:nowrap; text-align:left;" id="TBL-23-15-1"  
+class="td11">         </td></tr></table>                                                         </div>
+</div>
+                                                                  
+
+                                                                  
+   </div><hr class="endfloat" />
+   </div>
+<!--l. 188--><p class="indent" >   <hr class="figure"><div class="figure" 
+>
+                                                                  
+
+                                                                  
+<a 
+ id="x19-146002r5"></a>
+                                                                  
+
+                                                                  
+<!--l. 192--><p class="noindent" ><img 
+src="mat.png" alt="PIC"  
+width="147" height="147" >
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;5: </span><span  
+class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-146002r5 -->
+                                                                  
+
+                                                                  
+<!--l. 198--><p class="indent" >   </div><hr class="endfigure">
+   <h4 class="subsectionHead"><span class="titlemark">12.3   </span> <a 
+ id="x19-14700012.3"></a>CPU-class extensions</h4>
+<!--l. 203--><p class="noindent" >
+   <h5 class="likesubsubsectionHead"><a 
+ id="x19-148000"></a>ELLPACK</h5>
+<!--l. 205--><p class="noindent" >The ELLPACK/ITPACK format (shown in Figure&#x00A0;<a 
+href="#x19-148001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional
+arrays <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">JA</span></span></span> with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">M</span></span></span> rows and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">MAXNZR</span></span></span> columns, where <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">MAXNZR</span></span></span> is the maximum
+number of nonzeros in any row&#x00A0;<span class="cite">[<span 
+class="cmbx-10">?</span>]</span>. Each row of the arrays <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">JA</span></span></span> contains the
+coefficients and column indices; rows shorter than <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">MAXNZR</span></span></span> are padded with zero
+coefficients and appropriate column indices, e.g. the last valid one found in the same
+row.
+<!--l. 215--><p class="indent" >   <hr class="figure"><div class="figure" 
+>
+                                                                  
+
+                                                                  
+<a 
+ id="x19-148001r6"></a>
+                                                                  
+
+                                                                  
+<!--l. 219--><p class="noindent" ><img 
+src="ell.png" alt="PIC"  
+width="233" height="233" >
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;6: </span><span  
+class="content">ELLPACK compression of matrix in Figure&#x00A0;<a 
+href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-148001r6 -->
+                                                                  
+
+                                                                  
+<!--l. 225--><p class="indent" >   </div><hr class="endfigure">
+<a 
+ id="x19-148002r1"></a>
+                                                                  
+
+                                                                  
+<!--l. 229--><p class="indent" >   <hr class="float"><div class="float" 
+>
+                                                                  
+
+                                                                  
+   <!--l. 231-->
+   <pre class="lstlisting" id="listing-168"><span class="label"><a 
+ id="x19-148003r1"></a></span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">do</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">i</span></span><span style="color:#000000"><span 
+class="cmtt-9">=1,</span></span><span style="color:#000000"><span 
+class="cmtt-9">n</span></span> 
+<span class="label"><a 
+ id="x19-148004r2"></a></span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">t</span></span><span style="color:#000000"><span 
+class="cmtt-9">=0</span></span> 
+<span class="label"><a 
+ id="x19-148005r3"></a></span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">do</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">j</span></span><span style="color:#000000"><span 
+class="cmtt-9">=1,</span></span><span style="color:#000000"><span 
+class="cmtt-9">maxnzr</span></span> 
+<span class="label"><a 
+ id="x19-148006r4"></a></span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">t</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">t</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">+</span></span><span style="color:#000000"> </span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">as</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">i</span></span><span style="color:#000000"><span 
+class="cmtt-9">,</span></span><span style="color:#000000"><span 
+class="cmtt-9">j</span></span><span style="color:#000000"><span 
+class="cmtt-9">)*</span></span><span style="color:#000000"><span 
+class="cmtt-9">x</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">ja</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">i</span></span><span style="color:#000000"><span 
+class="cmtt-9">,</span></span><span style="color:#000000"><span 
+class="cmtt-9">j</span></span><span style="color:#000000"><span 
+class="cmtt-9">))</span></span> 
+<span class="label"><a 
+ id="x19-148007r5"></a></span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">do</span></span> 
+<span class="label"><a 
+ id="x19-148008r6"></a></span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">y</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">i</span></span><span style="color:#000000"><span 
+class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">t</span></span> 
+<span class="label"><a 
+ id="x19-148009r7"></a></span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">do</span></span></pre>
+   
+<a 
+ id="x19-148010r1"></a>
+<a 
+ id="x19-148011"></a>
+        <span 
+class="cmbx-10">Algorithm</span><span 
+class="cmbx-10">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format
+                                                                  
+
+                                                                  
+   </div><hr class="endfloat" />
+<!--l. 242--><p class="indent" >   The matrix-vector product <span 
+class="cmmi-10">y </span>= <span 
+class="cmmi-10">Ax </span>can be computed with the code shown in
+Alg.&#x00A0;<a 
+href="#x19-148010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and
+two floating-point operations per inner iteration.
+<!--l. 247--><p class="indent" >   Unless all rows have exactly the same number of nonzeros, some of the coefficients
+in the <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">AS</span></span></span> array will be zeros; therefore this data structure will have an overhead both
+in terms of memory space and redundant operations (multiplications by zero). The
+overhead can be acceptable if:
+     <ol  class="enumerate1" >
+<li 
+  class="enumerate" id="x19-148013x1">
+     <!--l. 253--><p class="noindent" >The maximum number of nonzeros per row is not much larger than the
+     average;
+     </li>
+<li 
+  class="enumerate" id="x19-148015x2">
+     <!--l. 255--><p class="noindent" >The regularity of the data structure allows for faster code, e.g. by allowing
+     vectorization, thereby offsetting the additional storage requirements.</li></ol>
+<!--l. 259--><p class="noindent" >In the extreme case where the input matrix has one full row, the ELLPACK
+structure would require more memory than the normal 2D array storage. The
+ELLPACK storage format was very popular in the vector computing days; in
+modern CPUs it is not quite as popular, but it is the basis for many GPU
+formats.
+<!--l. 265--><p class="indent" >   The relevant data type is <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_T_ell_sparse_mat</span></span></span>:
+<div class="center" 
+>
+<!--l. 281--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-104">
+&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_ell_sparse_mat
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;ITPACK/ELL&#x00A0;format,&#x00A0;extended.
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
+
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_),&#x00A0;allocatable&#x00A0;::&#x00A0;irn(:),&#x00A0;ja(:,:),&#x00A0;idiag(:)
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;::&#x00A0;val(:,:)
+
+&#x00A0;&#x00A0;contains
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;....
+&#x00A0;&#x00A0;end&#x00A0;type&#x00A0;psb_d_ell_sparse_mat
+</pre>
+<!--l. 295--><p class="nopar" >                                                           </div></div>
+   <h5 class="likesubsubsectionHead"><a 
+ id="x19-149000"></a>Hacked ELLPACK</h5>
+<!--l. 303--><p class="noindent" >The <span 
+class="cmti-10">hacked ELLPACK </span>(<span 
+class="cmbx-10">HLL</span>) format alleviates the main problem of the ELLPACK
+format, that is, the amount of memory required by padding for sparse matrices in
+which the maximum row length is larger than the average.
+                                                                  
+
+                                                                  
+<!--l. 308--><p class="indent" >   The number of elements allocated to padding is
+[(<span 
+class="cmmi-10">m</span><span 
+class="cmsy-10">*</span><span 
+class="cmmi-10">maxNR</span>) <span 
+class="cmsy-10">- </span>(<span 
+class="cmmi-10">m</span><span 
+class="cmsy-10">*</span><span 
+class="cmmi-10">avgNR</span>) = <span 
+class="cmmi-10">m</span><span 
+class="cmsy-10">* </span>(<span 
+class="cmmi-10">maxNR</span><span 
+class="cmsy-10">-</span><span 
+class="cmmi-10">avgNR</span>)] for both <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">JA</span></span></span> arrays,
+where <span 
+class="cmmi-10">m </span>is equal to the number of rows of the matrix, <span 
+class="cmmi-10">maxNR </span>is the maximum
+number of nonzero elements in every row and <span 
+class="cmmi-10">avgNR </span>is the average number of
+nonzeros. Therefore a single densely populated row can seriously affect the total size
+of the allocation.
+<!--l. 317--><p class="indent" >   To limit this effect, in the HLL format we break the original matrix into equally
+sized groups of rows (called <span 
+class="cmti-10">hacks</span>), and then store these groups as independent
+matrices in ELLPACK format. The groups can be arranged selecting rows in an
+arbitrarily manner; indeed, if the rows are sorted by decreasing number of nonzeros
+we obtain essentially the JAgged Diagonals format. If the rows are not in the original
+order, then an additional vector <span 
+class="cmti-10">rIdx </span>is required, storing the actual row index for
+each row in the data structure.
+<!--l. 327--><p class="indent" >   The multiple ELLPACK-like buffers are stacked together inside a single, one
+dimensional array; an additional vector <span 
+class="cmti-10">hackOffsets </span>is provided to keep track of the
+individual submatrices. All hacks have the same number of rows <span 
+class="cmti-10">hackSize</span>; hence, the
+<span 
+class="cmti-10">hackOffsets </span>vector is an array of (<span 
+class="cmmi-10">m&#x2215;hackSize</span>) + 1 elements, each one pointing to
+the first index of a submatrix inside the stacked <span 
+class="cmti-10">cM</span>/<span 
+class="cmti-10">rP </span>buffers, plus an additional
+element pointing past the end of the last block, where the next one would begin. We
+thus have the property that the elements of the <span 
+class="cmmi-10">k</span>-th <span 
+class="cmti-10">hack </span>are stored between
+<span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">hackOffsets[k]</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">hackOffsets[k+1]</span></span></span>, similarly to what happens in the CSR
+format.
+<!--l. 342--><p class="indent" >   <hr class="figure"><div class="figure" 
+>
+                                                                  
+
+                                                                  
+<a 
+ id="x19-149001r7"></a>
+                                                                  
+
+                                                                  
+<!--l. 346--><p class="noindent" ><img 
+src="hll.png" alt="PIC"  
+width="248" height="248" >
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;7: </span><span  
+class="content">Hacked ELLPACK compression of matrix in Figure&#x00A0;<a 
+href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-149001r7 -->
+                                                                  
+
+                                                                  
+<!--l. 352--><p class="indent" >   </div><hr class="endfigure">
+<!--l. 354--><p class="indent" >   With this data structure a very long row only affects one hack, and therefore the
+additional memory is limited to the hack in which the row appears.
+<!--l. 358--><p class="indent" >   The relevant data type is <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_T_hll_sparse_mat</span></span></span>:
+<div class="center" 
+>
+<!--l. 374--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-105">
+&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_hll_sparse_mat
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;HLL&#x00A0;format.&#x00A0;(Hacked&#x00A0;ELL)
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_)&#x00A0;::&#x00A0;hksz
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_),&#x00A0;allocatable&#x00A0;::&#x00A0;irn(:),&#x00A0;ja(:),&#x00A0;idiag(:),&#x00A0;hkoffs(:)
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;::&#x00A0;val(:)
+
+&#x00A0;&#x00A0;contains
+&#x00A0;&#x00A0;&#x00A0;....
+&#x00A0;&#x00A0;end&#x00A0;type
+</pre>
+<!--l. 388--><p class="nopar" >                                                           </div></div>
+   <h5 class="likesubsubsectionHead"><a 
+ id="x19-150000"></a>Diagonal storage</h5>
+<!--l. 396--><p class="noindent" >The DIAgonal (DIA) format (shown in Figure&#x00A0;<a 
+href="#x19-150001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">AS</span></span></span>
+containing in each column the coefficients along a diagonal of the matrix, and an
+integer array <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">OFFSET</span></span></span> that determines where each diagonal starts. The diagonals in <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">AS</span></span></span>
+are padded with zeros as necessary.
+<!--l. 402--><p class="indent" >   The code to compute the matrix-vector product <span 
+class="cmmi-10">y </span>= <span 
+class="cmmi-10">Ax </span>is shown in Alg.&#x00A0;<a 
+href="#x19-150003r2">2<!--tex4ht:ref: alg:dia --></a>; it
+costs one memory read per outer iteration, plus three memory reads, one memory
+write and two floating-point operations per inner iteration. The accesses to
+<span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">x</span></span></span> are in strict sequential order, therefore no indirect addressing is
+required.
+<!--l. 409--><p class="indent" >   <hr class="figure"><div class="figure" 
+>
+                                                                  
+
+                                                                  
+<a 
+ id="x19-150001r8"></a>
+                                                                  
+
+                                                                  
+<!--l. 413--><p class="noindent" ><img 
+src="dia.png" alt="PIC"  
+width="248" height="248" >
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;8: </span><span  
+class="content">DIA compression of matrix in Figure&#x00A0;<a 
+href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-150001r8 -->
+                                                                  
+
+                                                                  
+<!--l. 419--><p class="indent" >   </div><hr class="endfigure">
+<a 
+ id="x19-150002r2"></a>
+                                                                  
+
+                                                                  
+<!--l. 423--><p class="indent" >   <hr class="float"><div class="float" 
+>
+                                                                  
+
+                                                                  
+   <div class="center" 
+>
+   <!--l. 437--><p class="noindent" >
+   <div class="minipage"><pre class="verbatim" id="verbatim-106">
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;do&#x00A0;j=1,ndiag
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;if&#x00A0;(offset(j)&#x00A0;&#x003E;&#x00A0;0)&#x00A0;then
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;ir1&#x00A0;=&#x00A0;1;&#x00A0;ir2&#x00A0;=&#x00A0;m&#x00A0;-&#x00A0;offset(j);
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;else
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;ir1&#x00A0;=&#x00A0;1&#x00A0;-&#x00A0;offset(j);&#x00A0;ir2&#x00A0;=&#x00A0;m;
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;end&#x00A0;if
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;do&#x00A0;i=ir1,ir2
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;y(i)&#x00A0;=&#x00A0;y(i)&#x00A0;+&#x00A0;alpha*as(i,j)*x(i+offset(j))
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;end&#x00A0;do
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;end&#x00A0;do
+</pre>
+<!--l. 450--><p class="nopar" >                                                           </div></div>
+   <a 
+ id="x19-150003r2"></a>
+   <a 
+ id="x19-150004"></a>
+        <span 
+class="cmbx-10">Algorithm</span><span 
+class="cmbx-10">&#x00A0;2:</span>&#x00A0; Matrix-Vector product in DIA format
+                                                                  
+
+                                                                  
+   </div><hr class="endfloat" />
+<!--l. 458--><p class="indent" >   The relevant data type is <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_T_dia_sparse_mat</span></span></span>:
+<div class="center" 
+>
+<!--l. 473--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-107">
+&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_dia_sparse_mat
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;DIA&#x00A0;format,&#x00A0;extended.
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
+
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_),&#x00A0;allocatable&#x00A0;::&#x00A0;offset(:)
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_)&#x00A0;::&#x00A0;nzeros
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;::&#x00A0;data(:,:)
+
+&#x00A0;&#x00A0;end&#x00A0;type
+</pre>
+<!--l. 486--><p class="nopar" >                                                           </div></div>
+   <h5 class="likesubsubsectionHead"><a 
+ id="x19-151000"></a>Hacked DIA</h5>
+<!--l. 495--><p class="noindent" >Storage by DIAgonals is an attractive option for matrices whose coefficients are
+located on a small set of diagonals, since they do away with storing explicitly the
+indices and therefore reduce significantly memory traffic. However, having a few
+coefficients outside of the main set of diagonals may significantly increase the
+amount of needed padding; moreover, while the DIA code is easily vectorized,
+it does not necessarily make optimal use of the memory hierarchy. While
+processing each diagonal we are updating entries in the output vector <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">y</span></span></span>,
+which is then accessed multiple times; if the vector <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">y</span></span></span> is too large to remain
+in the cache memory, the associated cache miss penalty is paid multiple
+times.
+<!--l. 507--><p class="indent" >   The <span 
+class="cmti-10">hacked DIA </span>(<span 
+class="cmbx-10">HDIA</span>) format was designed to contain the amount of padding,
+by breaking the original matrix into equally sized groups of rows (<span 
+class="cmti-10">hacks</span>), and then
+storing these groups as independent matrices in DIA format. This approach is similar
+to that of HLL, and requires using an offset vector for each submatrix. Again,
+similarly to HLL, the various submatrices are stacked inside a linear array to
+improve memory management. The fact that the matrix is accessed in slices
+helps in reducing cache misses, especially regarding accesses to the vector
+<span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">y</span></span></span>.
+<!--l. 519--><p class="indent" >   An additional vector <span 
+class="cmti-10">hackOffsets </span>is provided to complete the matrix format; given
+that <span 
+class="cmti-10">hackSize </span>is the number of rows of each hack, the <span 
+class="cmti-10">hackOffsets </span>vector is made by
+an array of (<span 
+class="cmmi-10">m&#x2215;hackSize</span>) + 1 elements, pointing to the first diagonal offset of a
+submatrix inside the stacked <span 
+class="cmti-10">offsets </span>buffers, plus an additional element equal to the
+number of nonzero diagonals in the whole matrix. We thus have the property that
+the number of diagonals of the <span 
+class="cmmi-10">k</span>-th <span 
+class="cmti-10">hack </span>is given by <span 
+class="cmti-10">hackOffsets[k+1] -</span>
+<span 
+class="cmti-10">hackOffsets[k]</span>.
+<!--l. 529--><p class="indent" >   <hr class="figure"><div class="figure" 
+>
+                                                                  
+
+                                                                  
+<a 
+ id="x19-151001r9"></a>
+                                                                  
+
+                                                                  
+<!--l. 533--><p class="noindent" ><img 
+src="hdia.png" alt="PIC"  
+width="248" height="248" >
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;9: </span><span  
+class="content">Hacked DIA compression of matrix in Figure&#x00A0;<a 
+href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-151001r9 -->
+                                                                  
+
+                                                                  
+<!--l. 539--><p class="indent" >   </div><hr class="endfigure">
+<!--l. 541--><p class="indent" >   The relevant data type is <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_T_hdia_sparse_mat</span></span></span>:
+<div class="center" 
+>
+<!--l. 568--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-108">
+&#x00A0;&#x00A0;type&#x00A0;pm
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;&#x00A0;::&#x00A0;data(:,:)
+&#x00A0;&#x00A0;end&#x00A0;type&#x00A0;pm
+
+&#x00A0;&#x00A0;type&#x00A0;po
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_),&#x00A0;allocatable&#x00A0;&#x00A0;::&#x00A0;off(:)
+&#x00A0;&#x00A0;end&#x00A0;type&#x00A0;po
+
+&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_hdia_sparse_mat
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;HDIA&#x00A0;format,&#x00A0;extended.
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
+
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;type(pm),&#x00A0;allocatable&#x00A0;::&#x00A0;hdia(:)
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;type(po),&#x00A0;allocatable&#x00A0;::&#x00A0;offset(:)
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_)&#x00A0;::&#x00A0;nblocks,&#x00A0;nzeros
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_)&#x00A0;::&#x00A0;hack&#x00A0;=&#x00A0;64
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_long_int_k_)&#x00A0;::&#x00A0;dim=0
+
+&#x00A0;&#x00A0;contains
+&#x00A0;&#x00A0;&#x00A0;....
+&#x00A0;&#x00A0;end&#x00A0;type
+</pre>
+<!--l. 593--><p class="nopar" >                                                           </div></div>
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">12.4   </span> <a 
+ id="x19-15200012.4"></a>CUDA-class extensions</h4>
+<!--l. 4--><p class="noindent" >For computing with CUDA we define a dual memorization strategy in which each
+variable on the CPU (&#8220;host&#8221;) side has a GPU (&#8220;device&#8221;) side. When a GPU-type
+variable is initialized, the data contained is (usually) the same on both sides. Each
+operator invoked on the variable may change the data so that only the host side or
+the device side are up-to-date.
+<!--l. 11--><p class="indent" >   Keeping track of the updates to data in the variables is essential: we want to
+perform most computations on the GPU, but we cannot afford the time needed to
+move data between the host memory and the device memory because the bandwidth
+of the interconnection bus would become the main bottleneck of the computation.
+Thus, each and every computational routine in the library is built according to the
+following principles:
+     <ul class="itemize1">
+     <li class="itemize">
+     <!--l. 18--><p class="noindent" >If the data type being handled is GPU-enabled, make sure that its device
+     copy is up to date, perform any arithmetic operation on the GPU, and
+     if the data has been altered as a result, mark the main-memory copy as
+     outdated.
+     </li>
+     <li class="itemize">
+     <!--l. 22--><p class="noindent" >The main-memory copy is never updated unless this is requested by the user
+     either
+         <dl class="description"><dt class="description">
+         <!--l. 25--><p class="noindent" >
+     <span 
+class="cmbx-10">explicitly</span> </dt><dd 
+class="description">
+         <!--l. 25--><p class="noindent" >by invoking a synchronization method;
+         </dd><dt class="description">
+         <!--l. 26--><p class="noindent" >
+     <span 
+class="cmbx-10">implicitly</span> </dt><dd 
+class="description">
+         <!--l. 26--><p class="noindent" >by invoking a method that involves other data items that are not
+         GPU-enabled, e.g., by assignment ov a vector to a normal array.</dd></dl>
+     </li></ul>
+<!--l. 31--><p class="noindent" >In this way, data items are put on the GPU memory &#8220;on demand&#8221; and remain there as
+long as &#8220;normal&#8221; computations are carried out. As an example, the following call to a
+matrix-vector product
+<div class="center" 
+>
+<!--l. 39--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-109">
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;call&#x00A0;psb_spmm(alpha,a,x,beta,y,desc_a,info)
+</pre>
+<!--l. 43--><p class="nopar" >                                                           </div></div>
+                                                                  
+
+                                                                  
+<!--l. 47--><p class="noindent" >will transparently and automatically be performed on the GPU whenever all three data
+inputs <code class="lstinline"><span style="color:#000000">a</span></code>, <code class="lstinline"><span style="color:#000000">x</span></code> and <code class="lstinline"><span style="color:#000000">y</span></code> are GPU-enabled. If a program makes many such calls sequentially,
+then
+     <ul class="itemize1">
+     <li class="itemize">
+     <!--l. 52--><p class="noindent" >The first kernel invocation will find the data in main memory, and will
+     copy it to the GPU memory, thus incurring a significant overhead; the
+     result is however <span 
+class="cmti-10">not </span>copied back, and therefore:
+     </li>
+     <li class="itemize">
+     <!--l. 56--><p class="noindent" >Subsequent kernel invocations involving the same vector will find the data
+     on the GPU side so that they will run at full speed.</li></ul>
+<!--l. 60--><p class="noindent" >For all invocations after the first the only data that will have to be transferred to/from
+the main memory will be the scalars <code class="lstinline"><span style="color:#000000">alpha</span></code> and <code class="lstinline"><span style="color:#000000">beta</span></code>, and the return code
+<code class="lstinline"><span style="color:#000000">info</span></code>.
+<!--l. 64--><p class="indent" >
+     <dl class="description"><dt class="description">
+     <!--l. 65--><p class="noindent" >
+<span 
+class="cmbx-10">Vectors:</span> </dt><dd 
+class="description">
+     <!--l. 65--><p class="noindent" >The  data  type  <code class="lstinline"><span style="color:#000000">psb_T_vect_gpu</span></code> provides  a  GPU-enabled  extension  of
+     the inner type <code class="lstinline"><span style="color:#000000">psb_T_base_vect_type</span></code>, and must be used together with
+     the other inner matrix type to make full use of the GPU computational
+     capabilities;
+     </dd><dt class="description">
+     <!--l. 69--><p class="noindent" >
+<span 
+class="cmbx-10">CSR:</span> </dt><dd 
+class="description">
+     <!--l. 69--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_csrg_sparse_mat</span></code> provides an interface to the GPU
+     version of CSR available in the NVIDIA CuSPARSE library;
+     </dd><dt class="description">
+     <!--l. 72--><p class="noindent" >
+<span 
+class="cmbx-10">HYB:</span> </dt><dd 
+class="description">
+     <!--l. 72--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hybg_sparse_mat</span></code> provides an interface to the HYB
+     GPU storage available in the NVIDIA CuSPARSE library. The internal
+     structure is opaque, hence the host side is just CSR; the HYB data format
+     is only available up to CUDA version 10.
+     </dd><dt class="description">
+     <!--l. 77--><p class="noindent" >
+<span 
+class="cmbx-10">ELL:</span> </dt><dd 
+class="description">
+     <!--l. 77--><p class="noindent" >The  data  type  <code class="lstinline"><span style="color:#000000">psb_T_elg_sparse_mat</span></code> provides  an  interface  to  the
+     ELLPACK implementation from SPGPU;
+                                                                  
+
+                                                                  
+     </dd><dt class="description">
+     <!--l. 80--><p class="noindent" >
+<span 
+class="cmbx-10">HLL:</span> </dt><dd 
+class="description">
+     <!--l. 80--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hlg_sparse_mat</span></code> provides an interface to the Hacked
+     ELLPACK implementation from SPGPU;
+     </dd><dt class="description">
+     <!--l. 82--><p class="noindent" >
+<span 
+class="cmbx-10">HDIA:</span> </dt><dd 
+class="description">
+     <!--l. 82--><p class="noindent" >The  data  type  <code class="lstinline"><span style="color:#000000">psb_T_hdiag_sparse_mat</span></code> provides  an  interface  to  the
+     Hacked DIAgonals implementation from SPGPU;</dd></dl>
+                                                                  
+
+                                                                  
+   <!--l. 87--><div class="crosslinks"><p class="noindent">[<a 
+href="userhtmlse11.html" >prev</a>] [<a 
+href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a 
+href="userhtmlse12.html" >front</a>] [<a 
+href="userhtml.html# " >up</a>] </p></div>
+<!--l. 87--><p class="indent" >   <a 
+ id="tailuserhtmlse12.html"></a>   
+</body></html> 
--- a/docs/html/userhtmlse13.html
+++ b/docs/html/userhtmlse13.html
@ -0,0 +1,299 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"  
+  "http://www.w3.org/TR/html4/loose.dtd">  
+<html > 
+<head><title>CUDA Environment Routines</title> 
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 
+<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)"> 
+<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)"> 
+<!-- html,3 --> 
+<meta name="src" content="userhtml.tex"> 
+<link rel="stylesheet" type="text/css" href="userhtml.css"> 
+</head><body 
+>
+   <!--l. 87--><div class="crosslinks"><p class="noindent">[<a 
+href="userhtmlse12.html" >prev</a>] [<a 
+href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail</a>] [<a 
+href="userhtmlse10.html#tailuserhtmlse13.html">tail</a>] [<a 
+href="userhtml.html# " >up</a>] </p></div>
+   <h3 class="sectionHead"><span class="titlemark">13   </span> <a 
+ id="x20-15300013"></a>CUDA Environment Routines</h3>
+<!--l. 91--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-154000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4>
+<a 
+ id="Q1-20-191"></a>
+<div class="center" 
+>
+<!--l. 99--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-110">
+call&#x00A0;psb_cuda_init(ctxt&#x00A0;[,&#x00A0;device])
+</pre>
+<!--l. 103--><p class="nopar" >                                                           </div></div>
+<!--l. 108--><p class="noindent" >This subroutine initializes the PSBLAS-CUDA environment.
+     <dl class="description"><dt class="description">
+     <!--l. 110--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 110--><p class="noindent" >Synchronous.
+     </dd><dt class="description">
+     <!--l. 111--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span>  </dt><dd 
+class="description">
+     <!--l. 111--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 112--><p class="noindent" >
+<span 
+class="cmbx-10">device</span> </dt><dd 
+class="description">
+     <!--l. 112--><p class="noindent" >ID of CUDA device to attach to.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local</span>.<br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: an integer value. &#x00A0;Default: use <code class="lstinline"><span style="color:#000000">mod</span><span style="color:#000000">(</span><span style="color:#000000">iam</span><span style="color:#000000">,</span><span style="color:#000000">ngpu</span><span style="color:#000000">)</span></code> where <code class="lstinline"><span style="color:#000000">iam</span></code> is
+     the calling process index and <code class="lstinline"><span style="color:#000000">ngpu</span></code> is the total number of CUDA devices
+     available on the current node.</dd></dl>
+<!--l. 123--><p class="noindent" ><span 
+class="cmbx-12">Notes</span>
+                                                                  
+
+                                                                  
+     <ol  class="enumerate1" >
+<li 
+  class="enumerate" id="x20-154002x1">
+     <!--l. 125--><p class="noindent" >A call to this routine must precede any other PSBLAS-CUDA call.</li></ol>
+<!--l. 129--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-155000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4>
+<a 
+ id="Q1-20-193"></a>
+<div class="center" 
+>
+<!--l. 137--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-111">
+call&#x00A0;psb_cuda_exit(ctxt)
+</pre>
+<!--l. 141--><p class="nopar" >                                                           </div></div>
+<!--l. 146--><p class="noindent" >This subroutine exits from the PSBLAS CUDA context.
+     <dl class="description"><dt class="description">
+     <!--l. 148--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 148--><p class="noindent" >Synchronous.
+     </dd><dt class="description">
+     <!--l. 149--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span>  </dt><dd 
+class="description">
+     <!--l. 149--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 150--><p class="noindent" >
+<span 
+class="cmbx-10">ctxt</span> </dt><dd 
+class="description">
+     <!--l. 150--><p class="noindent" >the communication context identifying the virtual parallel machine.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global</span>.<br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span>.<br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: an integer variable.</dd></dl>
+<!--l. 161--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-156000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4>
+<a 
+ id="Q1-20-195"></a>
+                                                                  
+
+                                                                  
+<div class="center" 
+>
+<!--l. 169--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-112">
+call&#x00A0;psb_cuda_DeviceSync()
+</pre>
+<!--l. 173--><p class="nopar" >                                                           </div></div>
+<!--l. 178--><p class="noindent" >This subroutine ensures that all previosly invoked kernels, i.e. all invocation of
+CUDA-side code, have completed.
+<!--l. 182--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-157000"></a>psb_cuda_getDeviceCount </h4>
+<a 
+ id="Q1-20-197"></a>
+<div class="center" 
+>
+<!--l. 190--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-113">
+ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDeviceCount()
+</pre>
+<!--l. 194--><p class="nopar" >                                                           </div></div>
+<!--l. 199--><p class="noindent" >Get number of devices available on current computing node.
+<!--l. 201--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-158000"></a>psb_cuda_getDevice </h4>
+<a 
+ id="Q1-20-199"></a>
+<div class="center" 
+>
+<!--l. 209--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-114">
+ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice()
+</pre>
+<!--l. 213--><p class="nopar" >                                                           </div></div>
+<!--l. 218--><p class="noindent" >Get device in use by current process.
+<!--l. 220--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-159000"></a>psb_cuda_setDevice </h4>
+<a 
+ id="Q1-20-201"></a>
+                                                                  
+
+                                                                  
+<div class="center" 
+>
+<!--l. 228--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-115">
+info&#x00A0;=&#x00A0;psb_cuda_setDevice(dev)
+</pre>
+<!--l. 232--><p class="nopar" >                                                           </div></div>
+<!--l. 237--><p class="noindent" >Set device to be used by current process.
+<!--l. 239--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-160000"></a>psb_cuda_DeviceHasUVA </h4>
+<a 
+ id="Q1-20-203"></a>
+<div class="center" 
+>
+<!--l. 247--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-116">
+hasUva&#x00A0;=&#x00A0;psb_cuda_DeviceHasUVA()
+</pre>
+<!--l. 251--><p class="nopar" >                                                           </div></div>
+<!--l. 256--><p class="noindent" >Returns true if device currently in use supports UVA (Unified Virtual Addressing).
+<!--l. 259--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-161000"></a>psb_cuda_WarpSize </h4>
+<a 
+ id="Q1-20-205"></a>
+<div class="center" 
+>
+<!--l. 267--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-117">
+nw&#x00A0;=&#x00A0;psb_cuda_WarpSize()
+</pre>
+<!--l. 271--><p class="nopar" >                                                           </div></div>
+<!--l. 276--><p class="noindent" >Returns the warp size.
+<!--l. 279--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-162000"></a>psb_cuda_MultiProcessors </h4>
+<a 
+ id="Q1-20-207"></a>
+                                                                  
+
+                                                                  
+<div class="center" 
+>
+<!--l. 287--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-118">
+nmp&#x00A0;=&#x00A0;psb_cuda_MultiProcessors()
+</pre>
+<!--l. 291--><p class="nopar" >                                                           </div></div>
+<!--l. 296--><p class="noindent" >Returns the number of multiprocessors in the CUDA device.
+<!--l. 298--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-163000"></a>psb_cuda_MaxThreadsPerMP </h4>
+<a 
+ id="Q1-20-209"></a>
+<div class="center" 
+>
+<!--l. 306--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-119">
+nt&#x00A0;=&#x00A0;psb_cuda_MaxThreadsPerMP()
+</pre>
+<!--l. 310--><p class="nopar" >                                                           </div></div>
+<!--l. 315--><p class="noindent" >Returns the maximum number of threads per multiprocessor.
+<!--l. 318--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-164000"></a>psb_cuda_MaxRegistersPerBlock </h4>
+<a 
+ id="Q1-20-211"></a>
+<div class="center" 
+>
+<!--l. 326--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-120">
+nr&#x00A0;=&#x00A0;psb_cuda_MaxRegistersPerBlock()
+</pre>
+<!--l. 330--><p class="nopar" >                                                           </div></div>
+<!--l. 335--><p class="noindent" >Returns the maximum number of register per thread block.
+<!--l. 338--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-165000"></a>psb_cuda_MemoryClockRate </h4>
+<a 
+ id="Q1-20-213"></a>
+                                                                  
+
+                                                                  
+<div class="center" 
+>
+<!--l. 346--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-121">
+cl&#x00A0;=&#x00A0;psb_cuda_MemoryClockRate()
+</pre>
+<!--l. 350--><p class="nopar" >                                                           </div></div>
+<!--l. 355--><p class="noindent" >Returns the memory clock rate in KHz, as an integer.
+<!--l. 357--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-166000"></a>psb_cuda_MemoryBusWidth </h4>
+<a 
+ id="Q1-20-215"></a>
+<div class="center" 
+>
+<!--l. 365--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-122">
+nb&#x00A0;=&#x00A0;psb_cuda_MemoryBusWidth()
+</pre>
+<!--l. 369--><p class="nopar" >                                                           </div></div>
+<!--l. 374--><p class="noindent" >Returns the memory bus width in bits.
+<!--l. 376--><p class="noindent" >
+   <h4 class="likesubsectionHead"><a 
+ id="x20-167000"></a>psb_cuda_MemoryPeakBandwidth </h4>
+<a 
+ id="Q1-20-217"></a>
+<div class="center" 
+>
+<!--l. 384--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-123">
+bw&#x00A0;=&#x00A0;psb_cuda_MemoryPeakBandwidth()
+</pre>
+<!--l. 388--><p class="nopar" >                                                           </div></div>
+<!--l. 392--><p class="noindent" >Returns the peak memory bandwidth in MB/s (real double precision).
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+<!--l. 126--><p class="indent" >
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+   <!--l. 2--><div class="crosslinks"><p class="noindent">[<a 
+href="userhtmlse12.html" >prev</a>] [<a 
+href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail</a>] [<a 
+href="userhtmlse13.html" >front</a>] [<a 
+href="userhtml.html# " >up</a>] </p></div>
+<!--l. 2--><p class="indent" >   <a 
+ id="tailuserhtmlse13.html"></a>   
+</body></html> 
--- a/docs/html/userhtmlse2.html
+++ b/docs/html/userhtmlse2.html
@ -11,7 +11,7 @@
 </head><body 
 >
   <!--l. 72--><div class="crosslinks"><p class="noindent">[<a 
-href="userhtmlsu7.html" >next</a>] [<a 
+href="userhtmlse6.html" >next</a>] [<a 
 href="userhtmlse1.html" >prev</a>] [<a 
 href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a 
 href="#tailuserhtmlse2.html">tail</a>] [<a 
@ -35,11 +35,11 @@ process are executed through calls to the serial sparse BLAS subroutines. In a
 similar way, the inter-process message exchanges are encapsulated in an
 applicaiton layer that has been strongly inspired by the Basic Linear Algebra
 Communication Subroutines (BLACS) library&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XBLACS">7</a>]</span>. Usually there is no need to deal
+href="userhtmlli2.html#XBLACS">6</a>]</span>. Usually there is no need to deal
 directly with MPI; however, in some cases, MPI routines are used directly
 to improve efficiency. For further details on our communication layer see
 Sec.&#x00A0;<a 
-href="userhtmlse7.html#x68-1050007">7<!--tex4ht:ref: sec:parenv --></a>.
+href="userhtmlse7.html#x12-1050007">7<!--tex4ht:ref: sec:parenv --></a>.
 <!--l. 101--><p class="indent" >   <hr class="figure"><div class="figure" 
 >
                                                                  
@ -85,7 +85,7 @@ class="cmtt-10">BLOCK</span></span></span>, as well as completely
 arbitrary assignments of equation indices to processes. In particular it is
 consistent with the usage of graph partitioning tools commonly available in
 the literature, e.g. METIS&#x00A0;<span class="cite">[<a 
-href="userhtmlli2.html#XMETIS">14</a>]</span>. Dense vectors conform to sparse matrices,
+href="userhtmlli2.html#XMETIS">13</a>]</span>. Dense vectors conform to sparse matrices,
 that is, the entries of a vector follow the same distribution of the matrix
 rows.
 <!--l. 146--><p class="indent" >   We assume that the sparse matrix is built in parallel, where each process generates
@ -96,30 +96,610 @@ href="userhtml5.html#fn1x0"><sup class="textsuperscript">1</sup></a></span><a
 id="x4-3002f1"></a> ,
 even though the resulting memory bottleneck would make this option unattractive in
 most cases.
-   <div class="subsectionTOCS">
-   &#x00A0;<span class="subsectionToc" >2.1 <a 
-href="userhtmlsu1.html#x6-40002.1">Basic Nomenclature</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.2 <a 
-href="userhtmlsu2.html#x8-50002.2">Library contents</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.3 <a 
-href="userhtmlsu3.html#x9-60002.3">Application structure</a></span>
-<br />   &#x00A0;&#x00A0;<span class="subsubsectionToc" >2.3.1 <a 
-href="userhtmlsu3.html#x9-70002.3.1">User-defined index mappings</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >2.4 <a 
-href="userhtmlsu4.html#x11-80002.4">Programming model</a></span>
-   </div>
+   <h4 class="subsectionHead"><span class="titlemark">2.1   </span> <a 
+ id="x4-40002.1"></a>Basic Nomenclature</h4>
+<!--l. 158--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
+memory machine is guided by the structure of the physical model, and specifically by
+the discretization mesh of the PDE.
+<!--l. 163--><p class="indent" >   Each point of the discretization mesh will have (at least) one associated
+equation/variable, and therefore one index. We say that point <span 
+class="cmmi-10">i </span><span 
+class="cmti-10">depends </span>on point <span 
+class="cmmi-10">j </span>if
+the equation for a variable associated with <span 
+class="cmmi-10">i </span>contains a term in <span 
+class="cmmi-10">j</span>, or equivalently if
+<span 
+class="cmmi-10">a</span><sub><span 
+class="cmmi-7">ij</span></sub><span 
+class="cmmi-10">&#x2260;</span>0. After the partition of the discretization mesh into <span 
+class="cmti-10">sub-domains </span>assigned
+to the parallel processes, we classify the points of a given sub-domain as
+following.
+     <dl class="description"><dt class="description">
+     <!--l. 172--><p class="noindent" >
+<span 
+class="cmbx-10">Internal.</span> </dt><dd 
+class="description">
+     <!--l. 172--><p class="noindent" >An internal point of a given domain <span 
+class="cmti-10">depends </span>only on points of the same
+     domain.  If  all  points  of  a  domain  are  assigned  to  one  process,  then
+     a  computational  step  (e.g.,  a  matrix-vector  product)  of  the  equations
+                                                                  
+
+                                                                  
+     associated  with  the  internal  points  requires  no  data  items  from  other
+     domains and no communications.
+     </dd><dt class="description">
+     <!--l. 181--><p class="noindent" >
+<span 
+class="cmbx-10">Boundary.</span> </dt><dd 
+class="description">
+     <!--l. 181--><p class="noindent" >A point of a given domain is a boundary point if it <span 
+class="cmti-10">depends </span>on points
+     belonging to other domains.
+     </dd><dt class="description">
+     <!--l. 185--><p class="noindent" >
+<span 
+class="cmbx-10">Halo.</span> </dt><dd 
+class="description">
+     <!--l. 185--><p class="noindent" >A halo point for a given domain is a point belonging to another domain
+     such that there is a boundary point which <span 
+class="cmti-10">depends </span>on it. Whenever performing
+     a computational step, such as a matrix-vector product, the values associated
+     with halo points are requested from other domains. A boundary point of a
+     given domain is usually a halo point for some other domain<span class="footnote-mark"><a 
+href="userhtml6.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a 
+ id="x4-4001f2"></a> ;
+     therefore the cardinality of the boundary points set denotes the amount
+     of data sent to other domains.
+     </dd><dt class="description">
+     <!--l. 198--><p class="noindent" >
+<span 
+class="cmbx-10">Overlap.</span> </dt><dd 
+class="description">
+     <!--l. 198--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any
+     operation that involves an overlap point has to be replicated for each
+     assignment.</dd></dl>
+<!--l. 202--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
+feature of Domain Decomposition Schwarz preconditioners which are the subject of
+related research work&#x00A0;<span class="cite">[<a 
+href="userhtmlli2.html#X2007c">3</a>,&#x00A0;<a 
+href="userhtmlli2.html#X2007d">2</a>]</span>.
+<!--l. 207--><p class="indent" >   We denote the sets of internal, boundary and halo points for a given subdomain
+by <span 
+class="cmsy-10"><img 
+src="cmsy10-49.png" alt="I" class="10x-x-49" /></span>, <span 
+class="cmsy-10"><img 
+src="cmsy10-42.png" alt="B" class="10x-x-42" /> </span>and <span 
+class="cmsy-10"><img 
+src="cmsy10-48.png" alt="H" class="10x-x-48" /></span>. Each subdomain is assigned to one process; each process usually owns
+one subdomain, although the user may choose to assign more than one subdomain to
+a process. If each process <span 
+class="cmmi-10">i </span>owns one subdomain, the number of rows in
+the local sparse matrix is <span 
+class="cmsy-10">|<img 
+src="cmsy10-49.png" alt="I" class="10x-x-49" /></span><sub><span 
+class="cmmi-7">i</span></sub><span 
+class="cmsy-10">| </span>+ <span 
+class="cmsy-10">|<img 
+src="cmsy10-42.png" alt="B" class="10x-x-42" /></span><sub><span 
+class="cmmi-7">i</span></sub><span 
+class="cmsy-10">|</span>, and the number of local columns (i.e.
+those for which there exists at least one non-zero entry in the local rows) is
+<span 
+class="cmsy-10">|<img 
+src="cmsy10-49.png" alt="I" class="10x-x-49" /></span><sub><span 
+class="cmmi-7">i</span></sub><span 
+class="cmsy-10">| </span>+ <span 
+class="cmsy-10">|<img 
+src="cmsy10-42.png" alt="B" class="10x-x-42" /></span><sub><span 
+class="cmmi-7">i</span></sub><span 
+class="cmsy-10">| </span>+ <span 
+class="cmsy-10">|<img 
+src="cmsy10-48.png" alt="H" class="10x-x-48" /></span><sub><span 
+class="cmmi-7">i</span></sub><span 
+class="cmsy-10">|</span>.
+<!--l. 217--><p class="indent" >   <hr class="figure"><div class="figure" 
+>
+                                                                  

+                                                                  
+<a 
+ id="x4-4003r2"></a>
+                                                                  

+                                                                  
+<div class="center" 
+>
+<!--l. 218--><p class="noindent" >
+<!--l. 221--><p class="noindent" ><img 
+src="points.png" alt="PIC"  
+width="46" height="46" ></div>
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;2: </span><span  
+class="content">Point classfication.</span></div><!--tex4ht:label?: x4-4003r2 -->
+                                                                  
+
+                                                                  
+<!--l. 227--><p class="indent" >   </div><hr class="endfigure">
+<!--l. 229--><p class="indent" >   This classification of mesh points guides the naming scheme that we adopted in
+the library internals and in the data structures. We explicitly note that &#8220;Halo&#8221; points
+are also often called &#8220;ghost&#8221; points in the literature.
+   <h4 class="subsectionHead"><span class="titlemark">2.2   </span> <a 
+ id="x4-50002.2"></a>Library contents</h4>
+<!--l. 238--><p class="noindent" >The PSBLAS library consists of various classes of subroutines:
+     <dl class="description"><dt class="description">
+     <!--l. 240--><p class="noindent" >
+<span 
+class="cmbx-10">Computational routines</span> </dt><dd 
+class="description">
+     <!--l. 240--><p class="noindent" >comprising:
+         <ul class="itemize1">
+         <li class="itemize">
+         <!--l. 242--><p class="noindent" >Sparse matrix by dense matrix product;
+         </li>
+         <li class="itemize">
+         <!--l. 243--><p class="noindent" >Sparse triangular systems solution for block diagonal matrices;
+         </li>
+         <li class="itemize">
+         <!--l. 245--><p class="noindent" >Vector and matrix norms;
+         </li>
+         <li class="itemize">
+         <!--l. 246--><p class="noindent" >Dense matrix sums;
+         </li>
+         <li class="itemize">
+         <!--l. 247--><p class="noindent" >Dot products.</li></ul>
+     </dd><dt class="description">
+     <!--l. 249--><p class="noindent" >
+<span 
+class="cmbx-10">Communication routines</span> </dt><dd 
+class="description">
+     <!--l. 249--><p class="noindent" >handling halo and overlap communications;
+     </dd><dt class="description">
+     <!--l. 251--><p class="noindent" >
+<span 
+class="cmbx-10">Data management and auxiliary routines</span> </dt><dd 
+class="description">
+     <!--l. 251--><p class="noindent" >including:
+         <ul class="itemize1">
+         <li class="itemize">
+         <!--l. 253--><p class="noindent" >Parallel environment management
+         </li>
+         <li class="itemize">
+         <!--l. 254--><p class="noindent" >Communication descriptors allocation;
+                                                                  
+
+                                                                  
+         </li>
+         <li class="itemize">
+         <!--l. 255--><p class="noindent" >Dense and sparse matrix allocation;
+         </li>
+         <li class="itemize">
+         <!--l. 256--><p class="noindent" >Dense and sparse matrix build and update;
+         </li>
+         <li class="itemize">
+         <!--l. 257--><p class="noindent" >Sparse matrix and data distribution preprocessing.</li></ul>
+     </dd><dt class="description">
+     <!--l. 259--><p class="noindent" >
+<span 
+class="cmbx-10">Preconditioner routines</span> </dt><dd 
+class="description">
+     <!--l. 259--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 260--><p class="noindent" >
+<span 
+class="cmbx-10">Iterative methods</span> </dt><dd 
+class="description">
+     <!--l. 260--><p class="noindent" >a subset of Krylov subspace iterative methods</dd></dl>
+<!--l. 263--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined in
+the PSBLAS software package:
+     <ul class="itemize1">
+     <li class="itemize">
+     <!--l. 266--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_</span></span></span>
+     </li>
+     <li class="itemize">
+     <!--l. 268--><p class="noindent" >all data type names are suffixed by <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">_type</span></span></span>
+     </li>
+     <li class="itemize">
+     <!--l. 269--><p class="noindent" >all constants are suffixed by <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">_</span></span></span>
+     </li>
+     <li class="itemize">
+     <!--l. 270--><p class="noindent" >all top-level subroutine names follow the rule <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_xxname</span></span></span> where <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">xx</span></span></span> can be
+     either:
+         <ul class="itemize2">
+         <li class="itemize">
+         <!--l. 273--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">ge</span></span></span>: the routine is related to dense data,
+         </li>
+         <li class="itemize">
+         <!--l. 274--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">sp</span></span></span>: the routine is related to sparse data,
+         </li>
+         <li class="itemize">
+         <!--l. 275--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">cd</span></span></span>: the routine is related to communication descriptor (see&#x00A0;<a 
+href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a>).</li></ul>
+                                                                  
+
+                                                                  
+     <!--l. 278--><p class="noindent" >For example the <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_geins</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdins</span></span></span> perform the same
+     action (see&#x00A0;<a 
+href="userhtmlse6.html#x11-770006">6<!--tex4ht:ref: sec:toolsrout --></a>) on dense matrices, sparse matrices and communication
+     descriptors respectively. Interface overloading allows the usage of the same
+     subroutine names for both real and complex data.</li></ul>
+<!--l. 285--><p class="noindent" >In the description of the subroutines, arguments or argument entries are classified
+as:
+     <dl class="description"><dt class="description">
+     <!--l. 288--><p class="noindent" >
+<span 
+class="cmbx-10">global</span> </dt><dd 
+class="description">
+     <!--l. 288--><p class="noindent" >For  input  arguments,  the  value  must  be  the  same  on  all  processes
+     participating in the subroutine call; for output arguments the value is
+     guaranteed to be the same.
+     </dd><dt class="description">
+     <!--l. 291--><p class="noindent" >
+<span 
+class="cmbx-10">local</span> </dt><dd 
+class="description">
+     <!--l. 291--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
+<!--l. 293--><p class="noindent" >To finish our general description, we define a version string with the constant
+   <div class="math-display" >
+<img 
+src="userhtml0x.png" alt="psb_version_string_
+" class="math-display" ></div>
+<!--l. 295--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">3.8.0</span></span></span>
+<!--l. 298--><p class="noindent" >
+   <h4 class="subsectionHead"><span class="titlemark">2.3   </span> <a 
+ id="x4-60002.3"></a>Application structure</h4>
+<!--l. 301--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are
+created and exist with reference to a discretized space to which there corresponds
+an index space and a matrix sparsity pattern. As an example, consider a
+cell-centered finite-volume discretization of the Navier-Stokes equations on a
+simulation domain; the index space 1<span 
+class="cmmi-10">&#x2026;</span><span 
+class="cmmi-10">n </span>is isomorphic to the set of cell centers,
+whereas the pattern of the associated linear system matrix is isomorphic to the
+adjacency graph imposed on the discretization mesh by the discretization
+stencil.
+<!--l. 311--><p class="indent" >   Thus the first order of business is to establish an index space, and this is done
+with a call to <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdall</span></span></span> in which we specify the size of the index space <span 
+class="cmmi-10">n </span>and the
+allocation of the elements of the index space to the various processes making up the
+MPI (virtual) parallel machine.
+<!--l. 317--><p class="indent" >   The index space is partitioned among processes, and this creates a mapping from
+the &#8220;global&#8221; numbering 1<span 
+class="cmmi-10">&#x2026;</span><span 
+class="cmmi-10">n </span>to a numbering &#8220;local&#8221; to each process; each process <span 
+class="cmmi-10">i</span>
+will own a certain subset 1<span 
+class="cmmi-10">&#x2026;</span><span 
+class="cmmi-10">n</span><sub>row<sub><span 
+class="cmmi-5">i</span></sub></sub>, each element of which corresponds to a certain
+element of 1<span 
+class="cmmi-10">&#x2026;</span><span 
+class="cmmi-10">n</span>. The user does not set explicitly this mapping; when the application
+needs to indicate to which element of the index space a certain item is related,
+such as the row and column index of a matrix coefficient, it does so in the
+&#8220;global&#8221; numbering, and the library will translate into the appropriate &#8220;local&#8221;
+numbering.
+                                                                  

                                                                  
+<!--l. 327--><p class="indent" >   For a given index space 1<span 
+class="cmmi-10">&#x2026;</span><span 
+class="cmmi-10">n </span>there are many possible associated topologies, i.e.
+many different discretization stencils; thus the description of the index space is not
+completed until the user has defined a sparsity pattern, either explicitly through
+<span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdins</span></span></span> or implicitly through <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spins</span></span></span>. The descriptor is finalized with a call to
+<span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdasb</span></span></span> and a sparse matrix with a call to <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spasb</span></span></span>. After <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdasb</span></span></span> each
+process <span 
+class="cmmi-10">i </span>will have defined a set of &#8220;halo&#8221; (or &#8220;ghost&#8221;) indices <span 
+class="cmmi-10">n</span><sub>row<sub><span 
+class="cmmi-5">i</span></sub></sub> + 1<span 
+class="cmmi-10">&#x2026;</span><span 
+class="cmmi-10">n</span><sub>col<sub>
+<span 
+class="cmmi-5">i</span></sub></sub>,
+denoting elements of the index space that are <span 
+class="cmti-10">not </span>assigned to process <span 
+class="cmmi-10">i</span>; however the
+variables associated with them are needed to complete computations associated with
+the sparse matrix <span 
+class="cmmi-10">A</span>, and thus they have to be fetched from (neighbouring)
+processes. The descriptor of the index space is built exactly for the purpose
+of properly sequencing the communication steps required to achieve this
+objective.
+<!--l. 343--><p class="indent" >   A simple application structure will walk through the index space allocation,
+matrix/vector creation and linear system solution as follows:
+     <ol  class="enumerate1" >
+<li 
+  class="enumerate" id="x4-6002x1">
+     <!--l. 347--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_init</span></span></span>
+     </li>
+<li 
+  class="enumerate" id="x4-6004x2">
+     <!--l. 348--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdall</span></span></span>
+     </li>
+<li 
+  class="enumerate" id="x4-6006x3">
+     <!--l. 349--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_geall</span></span></span>
+     </li>
+<li 
+  class="enumerate" id="x4-6008x4">
+     <!--l. 351--><p class="noindent" >Loop over all local rows, generate matrix and vector entries, and insert
+     them with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_geins</span></span></span>
+     </li>
+<li 
+  class="enumerate" id="x4-6010x5">
+     <!--l. 353--><p class="noindent" >Assemble the various entities:
+         <ol  class="enumerate2" >
+<li 
+  class="enumerate" id="x4-6012x1">
+         <!--l. 355--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdasb</span></span></span>
+         </li>
+<li 
+  class="enumerate" id="x4-6014x2">
+         <!--l. 356--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spasb</span></span></span>
+                                                                  
+
+                                                                  
+         </li>
+<li 
+  class="enumerate" id="x4-6016x3">
+         <!--l. 357--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_geasb</span></span></span></li></ol>
+     </li>
+<li 
+  class="enumerate" id="x4-6018x6">
+     <!--l. 359--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">prec%init</span></span></span> and build it with
+     <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">prec%build</span></span></span><span class="footnote-mark"><a 
+href="userhtml7.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a 
+ id="x4-6019f3"></a> .
+     </li>
+<li 
+  class="enumerate" id="x4-6022x7">
+     <!--l. 363--><p class="noindent" >Call the iterative driver <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_krylov</span></span></span> with the method of choice, e.g.
+     <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">bicgstab</span></span></span>.</li></ol>
+<!--l. 366--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">test/pargen/</span></span></span>.
+<!--l. 369--><p class="indent" >   For a simulation in which the same discretization mesh is used over multiple time
+steps, the following structure may be more appropriate:
+     <ol  class="enumerate1" >
+<li 
+  class="enumerate" id="x4-6024x1">
+     <!--l. 372--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_init</span></span></span>
+     </li>
+<li 
+  class="enumerate" id="x4-6026x2">
+     <!--l. 373--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdall</span></span></span>
+     </li>
+<li 
+  class="enumerate" id="x4-6028x3">
+     <!--l. 374--><p class="noindent" >Loop over the topology of the discretization mesh and build the descriptor
+     with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdins</span></span></span>
+     </li>
+<li 
+  class="enumerate" id="x4-6030x4">
+     <!--l. 376--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdasb</span></span></span>
+     </li>
+<li 
+  class="enumerate" id="x4-6032x5">
+     <!--l. 377--><p class="noindent" >Allocate  the  sparse  matrices  and  dense  vectors  with  <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spall</span></span></span> and
+     <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_geall</span></span></span>
+                                                                  
+
+                                                                  
+     </li>
+<li 
+  class="enumerate" id="x4-6034x6">
+     <!--l. 379--><p class="noindent" >Loop over the time steps:
+         <ol  class="enumerate2" >
+<li 
+  class="enumerate" id="x4-6036x1">
+         <!--l. 381--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_sprn</span></span></span>;
+         also zero out the dense vectors;
+         </li>
+<li 
+  class="enumerate" id="x4-6038x2">
+         <!--l. 384--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update them
+         with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_geins</span></span></span>
+         </li>
+<li 
+  class="enumerate" id="x4-6040x3">
+         <!--l. 386--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_geasb</span></span></span>
+         </li>
+<li 
+  class="enumerate" id="x4-6042x4">
+         <!--l. 387--><p class="noindent" >Choose and build preconditioner with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">prec%build</span></span></span>
+         </li>
+<li 
+  class="enumerate" id="x4-6044x5">
+         <!--l. 389--><p class="noindent" >Call the iterative method of choice, e.g. <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_bicgstab</span></span></span></li></ol>
+     </li></ol>
+<!--l. 392--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
+called on the data that is actually allocated to the current process, i.e. each process
+generates its own data.
+<!--l. 397--><p class="indent" >   In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spins</span></span></span>, nor is there a
+requirement to build a matrix row in its entirety before calling the routine; this
+allows the application programmer to walk through the discretization mesh element
+by element, generating the main part of a given matrix row but also contributions to
+the rows corresponding to neighbouring elements.
+<!--l. 404--><p class="indent" >   From a functional point of view it is even possible to execute one call for each
+nonzero coefficient; however this would have a substantial computational
+overhead. It is therefore advisable to pack a certain amount of data into each
+call to the insertion routine, say touching on a few tens of rows; the best
+performng value would depend on both the architecture of the computer being
+used and on the problem structure. At the opposite extreme, it would be
+possible to generate the entire part of a coefficient matrix residing on a
+process and pass it in a single call to <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a
+doubling of memory occupation, and thus would be almost always far from
+optimal.
+                                                                  
+
+                                                                  
+<!--l. 417--><p class="noindent" >
+   <h5 class="subsubsectionHead"><span class="titlemark">2.3.1   </span> <a 
+ id="x4-70002.3.1"></a>User-defined index mappings</h5>
+<!--l. 419--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
+constraints outlined in sec.&#x00A0;<a 
+href="#x4-60002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>:
+     <ol  class="enumerate1" >
+<li 
+  class="enumerate" id="x4-7002x1">
+     <!--l. 422--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span 
+class="cmmi-10">&#x2026;</span><span 
+class="cmmi-10">n</span><sub>row<sub><span 
+class="cmmi-5">i</span></sub></sub>;
+     </li>
+<li 
+  class="enumerate" id="x4-7004x2">
+     <!--l. 424--><p class="noindent" >The set of halo points must be mapped to the set <span 
+class="cmmi-10">n</span><sub>row<sub><span 
+class="cmmi-5">i</span></sub></sub> + 1<span 
+class="cmmi-10">&#x2026;</span><span 
+class="cmmi-10">n</span><sub>col<sub>
+<span 
+class="cmmi-5">i</span></sub></sub>;</li></ol>
+<!--l. 427--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
+consistency of this mapping; some errors may be caught by the library, but
+this is not guaranteed. The application structure to support this usage is as
+follows:
+     <ol  class="enumerate1" >
+<li 
+  class="enumerate" id="x4-7006x1">
+     <!--l. 433--><p class="noindent" >Initialize                                                                             index
+     space with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the
+     vectors <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">vl(:)</span></span></span> containing the set of global indices owned by the current
+     process and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">lidx(:)</span></span></span> containing the corresponding local indices;
+     </li>
+<li 
+  class="enumerate" id="x4-7008x2">
+     <!--l. 438--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">ja(:)</span></span></span> and their associated local indices <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">lidx(:)</span></span></span> with
+     a(some) call(s) to <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdins(nz,ja,desc,info,lidx=lidx)</span></span></span>;
+     </li>
+<li 
+  class="enumerate" id="x4-7010x3">
+     <!--l. 441--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_cdasb</span></span></span>;
+     </li>
+<li 
+  class="enumerate" id="x4-7012x4">
+     <!--l. 442--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_spins</span></span></span>
+     and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_geins</span></span></span> of the <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">local</span></span></span> argument specifying that the indices in <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">ia</span></span></span>,
+     <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
+                                                                  
+
+                                                                  
+<!--l. 449--><p class="noindent" >
+   <h4 class="subsectionHead"><span class="titlemark">2.4   </span> <a 
+ id="x4-80002.4"></a>Programming model</h4>
+<!--l. 451--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
+programming model: each process participating in the computation performs the
+same actions on a chunk of data. Parallelism is thus data-driven.
+<!--l. 456--><p class="indent" >   Because of this structure, many subroutines coordinate their action across the
+various processes, thus providing an implicit synchronization point, and therefore
+<span 
+class="cmti-10">must </span>be called simultaneously by all processes participating in the computation. This
+is certainly true for the data allocation and assembly routines, for all the
+computational routines and for some of the tools routines.
+<!--l. 464--><p class="indent" >   However there are many cases where no synchronization, and indeed no
+communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a 
+href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a>
+are only acting on the local data structures, and thus may be called independently.
+The most important case is that of the coefficient insertion routines: since the
+number of coefficients in the sparse and dense matrices varies among the processors,
+and since the user is free to choose an arbitrary order in builiding the matrix entries,
+these routines cannot imply a synchronization.
+<!--l. 474--><p class="indent" >   Throughout this user&#8217;s guide each subroutine will be clearly indicated
+as:
+     <dl class="description"><dt class="description">
+     <!--l. 477--><p class="noindent" >
+<span 
+class="cmbx-10">Synchronous:</span> </dt><dd 
+class="description">
+     <!--l. 477--><p class="noindent" >must  be  called  simultaneously  by  all  the  processes  in  the  relevant
+     communication context;
+     </dd><dt class="description">
+     <!--l. 479--><p class="noindent" >
+<span 
+class="cmbx-10">Asynchronous:</span> </dt><dd 
+class="description">
+     <!--l. 479--><p class="noindent" >may be called in a totally independent manner.</dd></dl>
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+                                                                  

                                                                  
   <!--l. 1--><div class="crosslinks"><p class="noindent">[<a 
-href="userhtmlsu7.html" >next</a>] [<a 
+href="userhtmlse6.html" >next</a>] [<a 
 href="userhtmlse1.html" >prev</a>] [<a 
 href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a 
 href="userhtmlse2.html" >front</a>] [<a 
 href="userhtml.html#userhtmlse2.html" >up</a>] </p></div>
 <!--l. 1--><p class="indent" >   <a 
- id="tailuserhtmlse2.html"></a>  
+ id="tailuserhtmlse2.html"></a>   
 </body></html> 
--- a/docs/html/userhtmlse3.html
+++ b/docs/html/userhtmlse3.html
--- a/docs/html/userhtmlse4.html
+++ b/docs/html/userhtmlse4.html
--- a/docs/html/userhtmlse5.html
+++ b/docs/html/userhtmlse5.html
--- a/docs/html/userhtmlse6.html
+++ b/docs/html/userhtmlse6.html
--- a/docs/html/userhtmlse7.html
+++ b/docs/html/userhtmlse7.html
--- a/docs/html/userhtmlse8.html
+++ b/docs/html/userhtmlse8.html
@ -11,13 +11,13 @@
 </head><body 
 >
   <!--l. 3--><div class="crosslinks"><p class="noindent">[<a 
-href="userhtmlsu80.html" >next</a>] [<a 
+href="userhtmlse12.html" >next</a>] [<a 
 href="userhtmlse7.html" >prev</a>] [<a 
 href="userhtmlse7.html#tailuserhtmlse7.html" >prev-tail</a>] [<a 
-href="userhtmlsu71.html#tailuserhtmlse8.html">tail</a>] [<a 
-href="userhtml.html#userhtmlsu76.html" >up</a>] </p></div>
+href="userhtmlse5.html#tailuserhtmlse8.html">tail</a>] [<a 
+href="userhtml.html#userhtmlse11.html" >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">8   </span> <a 
- id="x86-1230008"></a>Error handling</h3>
+ id="x13-1230008"></a>Error handling</h3>
 <!--l. 5--><p class="noindent" >The PSBLAS library error handling policy has been completely rewritten in version
 2.0. The idea behind the design of this new error handling strategy is to keep error
 messages on a stack allowing the user to trace back up to the point where the first
@ -36,7 +36,7 @@ zero, an error condition is raised. This process continues on all the levels of
 nested calls until the level where the user decides to abort the program
 execution.
 <!--l. 23--><p class="indent" >   Figure&#x00A0;<a 
-href="#x86-123025r5">5<!--tex4ht:ref: fig:routerr --></a> shows the layout of a generic <span class="obeylines-h"><span class="verb"><span 
+href="#x13-123025r5">5<!--tex4ht:ref: fig:routerr --></a> shows the layout of a generic <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_foo</span></span></span> routine with respect to the
 PSBLAS-2.0 error handling policy. It is possible to see how, whenever an error
 condition is detected, the <span class="obeylines-h"><span class="verb"><span 
@ -58,7 +58,7 @@ explicitly.

                                                                  
 <!--l. 40--><p class="indent" >   <a 
- id="x86-123025r5"></a><hr class="float"><div class="float" 
+ id="x13-123025r5"></a><hr class="float"><div class="float" 
 >
                                                                  

@ -67,270 +67,241 @@ explicitly.
 >
 <!--l. 101--><p class="noindent" >
 <div class="fbox"><div class="minipage"><!--l. 72-->
-<div class="lstlisting" id="listing-5"><span class="label"><a 
- id="x86-123001r1"></a></span><span 
-class="cmtt-9">subroutine</span><span 
+<pre class="lstlisting" id="listing-154"><span class="label"><a 
+ id="x13-123001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-9">subroutine</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">psb_foo</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">some</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">args</span></span><span style="color:#000000"><span 
+class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">info</span></span><span style="color:#000000"><span 
+class="cmtt-9">)</span></span> 
+<span class="label"><a 
+ id="x13-123002r2"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">psb_foo</span><span 
-class="cmtt-9">(</span><span 
-class="cmtt-9">some</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">args</span><span 
-class="cmtt-9">,</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">...</span></span> 
+<span class="label"><a 
+ id="x13-123003r3"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">info</span><span 
-class="cmtt-9">)</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123002r2"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">if</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">error</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">detected</span></span><span style="color:#000000"><span 
+class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">then</span></span> 
+<span class="label"><a 
+ id="x13-123004r4"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">...</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123003r3"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">if</span><span 
-class="cmtt-9">(</span><span 
-class="cmtt-9">error</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">info</span></span><span style="color:#000000"><span 
+class="cmtt-9">=</span></span><span style="color:#000000"><span 
+class="cmtt-9">errcode1</span></span> 
+<span class="label"><a 
+ id="x13-123005r5"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">detected</span><span 
-class="cmtt-9">)</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">then</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123004r4"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">psb_errpush</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">&#8217;</span></span><span style="color:#000000"><span 
+class="cmtt-9">psb_foo</span></span><span style="color:#000000"><span 
+class="cmtt-9">&#8217;</span></span><span style="color:#000000"><span 
+class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">errcode1</span></span><span style="color:#000000"><span 
+class="cmtt-9">)</span></span> 
+<span class="label"><a 
+ id="x13-123006r6"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">info</span><span 
-class="cmtt-9">=</span><span 
-class="cmtt-9">errcode1</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123005r5"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">9999</span></span> 
+<span class="label"><a 
+ id="x13-123007r7"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">if</span></span> 
+<span class="label"><a 
+ id="x13-123008r8"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">call</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">...</span></span> 
+<span class="label"><a 
+ id="x13-123009r9"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">psb_errpush</span><span 
-class="cmtt-9">(</span><span 
-class="cmtt-9">&#8217;</span><span 
-class="cmtt-9">psb_foo</span><span 
-class="cmtt-9">&#8217;</span><span 
-class="cmtt-9">,</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">errcode1</span><span 
-class="cmtt-9">)</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123006r6"></a></span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">psb_bar</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">some</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">args</span></span><span style="color:#000000"><span 
+class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">info</span></span><span style="color:#000000"><span 
+class="cmtt-9">)</span></span> 
+<span class="label"><a 
+ id="x13-123010r10"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">if</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">info</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">.</span></span><span style="color:#000000"><span 
+class="cmtt-9">ne</span></span><span style="color:#000000"><span 
+class="cmtt-9">.</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">zero</span></span><span style="color:#000000"><span 
+class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">then</span></span> 
+<span class="label"><a 
+ id="x13-123011r11"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">goto</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">9999</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123007r7"></a></span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">info</span></span><span style="color:#000000"><span 
+class="cmtt-9">=</span></span><span style="color:#000000"><span 
+class="cmtt-9">errcode2</span></span> 
+<span class="label"><a 
+ id="x13-123012r12"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">end</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">if</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123008r8"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">psb_errpush</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">&#8217;</span></span><span style="color:#000000"><span 
+class="cmtt-9">psb_foo</span></span><span style="color:#000000"><span 
+class="cmtt-9">&#8217;</span></span><span style="color:#000000"><span 
+class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">errcode2</span></span><span style="color:#000000"><span 
+class="cmtt-9">)</span></span> 
+<span class="label"><a 
+ id="x13-123013r13"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">...</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123009r9"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">call</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">9999</span></span> 
+<span class="label"><a 
+ id="x13-123014r14"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">psb_bar</span><span 
-class="cmtt-9">(</span><span 
-class="cmtt-9">some</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">args</span><span 
-class="cmtt-9">,</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">if</span></span> 
+<span class="label"><a 
+ id="x13-123015r15"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">info</span><span 
-class="cmtt-9">)</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123010r10"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">...</span></span> 
+<span class="label"><a 
+ id="x13-123016r16"></a></span><span style="color:#000000"><span 
+class="cmtt-9">9999</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">continue</span></span> 
+<span class="label"><a 
+ id="x13-123017r17"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">if</span><span 
-class="cmtt-9">(</span><span 
-class="cmtt-9">info</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">if</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">err_act</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">.</span></span><span style="color:#000000"><span 
+class="cmtt-9">eq</span></span><span style="color:#000000"><span 
+class="cmtt-9">.</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">act_abort</span></span><span style="color:#000000"><span 
+class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">then</span></span> 
+<span class="label"><a 
+ id="x13-123018r18"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">.</span><span 
-class="cmtt-9">ne</span><span 
-class="cmtt-9">.</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">zero</span><span 
-class="cmtt-9">)</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">then</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123011r11"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">psb_error</span></span><span style="color:#000000"><span 
+class="cmtt-9">(</span></span><span style="color:#000000"><span 
+class="cmtt-9">icontxt</span></span><span style="color:#000000"><span 
+class="cmtt-9">)</span></span> 
+<span class="label"><a 
+ id="x13-123019r19"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">return</span></span> 
+<span class="label"><a 
+ id="x13-123020r20"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">info</span><span 
-class="cmtt-9">=</span><span 
-class="cmtt-9">errcode2</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123012r12"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">else</span></span> 
+<span class="label"><a 
+ id="x13-123021r21"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
 class="cmtt-9">&#x00A0;</span><span 
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">return</span></span> 
+<span class="label"><a 
+ id="x13-123022r22"></a></span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">call</span><span 
 class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">psb_errpush</span><span 
-class="cmtt-9">(</span><span 
-class="cmtt-9">&#8217;</span><span 
-class="cmtt-9">psb_foo</span><span 
-class="cmtt-9">&#8217;</span><span 
-class="cmtt-9">,</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">errcode2</span><span 
-class="cmtt-9">)</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123013r13"></a></span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">goto</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">9999</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123014r14"></a></span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">end</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">if</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123015r15"></a></span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">...</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123016r16"></a></span><span 
-class="cmtt-9">9999</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">continue</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123017r17"></a></span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">if</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">(</span><span 
-class="cmtt-9">err_act</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">.</span><span 
-class="cmtt-9">eq</span><span 
-class="cmtt-9">.</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">act_abort</span><span 
-class="cmtt-9">)</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">then</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123018r18"></a></span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">call</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">psb_error</span><span 
-class="cmtt-9">(</span><span 
-class="cmtt-9">icontxt</span><span 
-class="cmtt-9">)</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123019r19"></a></span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">return</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123020r20"></a></span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">else</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123021r21"></a></span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">return</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123022r22"></a></span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">end</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">if</span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123023r23"></a></span><span 
-class="cmtt-9">&#x00A0;</span><br /><span class="label"><a 
- id="x86-123024r24"></a></span><span 
-class="cmtt-9">end</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">subroutine</span><span 
-class="cmtt-9">&#x00A0;</span><span 
-class="cmtt-9">psb_foo</span>
-</div>                                                              </div> </div>
+class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span 
+class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">if</span></span> 
+<span class="label"><a 
+ id="x13-123023r23"></a></span> 
+<span class="label"><a 
+ id="x13-123024r24"></a></span><span style="color:#000000"><span 
+class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">subroutine</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-9">psb_foo</span></span></pre></div></div>
 </div>
 <br /> <div class="caption" 
 ><span class="id">Listing 5: </span><span  
 class="content">The layout of a generic <span 
 class="cmtt-10">psb</span><span 
 class="cmtt-10">_foo </span>routine with respect to PSBLAS-2.0
-error handling policy.</span></div><!--tex4ht:label?: x86-123025r5 -->
+error handling policy.</span></div><!--tex4ht:label?: x13-123025r5 -->
                                                                  

                                                                  
   </div><hr class="endfloat" />
 <!--l. 112--><p class="indent" >   Figure&#x00A0;<a 
-href="#x86-123026r6">6<!--tex4ht:ref: fig:errormsg --></a> reports a sample error message generated by the PSBLAS-2.0
+href="#x13-123026r6">6<!--tex4ht:ref: fig:errormsg --></a> reports a sample error message generated by the PSBLAS-2.0
 library. This error has been generated by the fact that the user has chosen the
 invalid &#8220;FOO&#8221; storage format to represent the sparse matrix. From this
 error message it is possible to see that the error has been detected inside
@ -342,7 +313,7 @@ process).

                                                                  
 <!--l. 120--><p class="indent" >   <a 
- id="x86-123026r6"></a><hr class="float"><div class="float" 
+ id="x13-123026r6"></a><hr class="float"><div class="float" 
 >
                                                                  

@ -371,7 +342,7 @@ Aborting...
 <br /> <div class="caption" 
 ><span class="id">Listing 6: </span><span  
 class="content">A sample PSBLAS-3.0 error message. Process 0 detected an error
-condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x86-123026r6 -->
+condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x13-123026r6 -->
                                                                  

                                                                  
@ -379,28 +350,236 @@ condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x86-1230
                                                                  

                                                                  
-   <div class="subsectionTOCS">
-   &#x00A0;<span class="subsectionToc" >8.1 <a 
-href="userhtmlsu74.html#x87-1240008.1">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.2 <a 
-href="userhtmlsu75.html#x88-1250008.2">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.3 <a 
-href="userhtmlsu76.html#x89-1260008.3">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >8.4 <a 
-href="userhtmlsu77.html#x90-1270008.4">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
-   </div>
+   <h4 class="subsectionHead"><span class="titlemark">8.1   </span> <a 
+ id="x13-1240008.1"></a>psb_errpush &#8212; Pushes an error code onto the error stack</h4>
+   <!--l. 174-->
+   <pre class="lstlisting" id="listing-155"><span class="label"><a 
+ id="x13-124001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">psb_errpush</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">err_c</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">r_name</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">i_err</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">a_err</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 178--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 179--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 179--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 180--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 180--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 181--><p class="noindent" >
+<span 
+class="cmbx-10">err</span><span 
+class="cmbx-10">_c</span> </dt><dd 
+class="description">
+     <!--l. 181--><p class="noindent" >the error code<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: an integer.
+     </dd><dt class="description">
+     <!--l. 186--><p class="noindent" >
+<span 
+class="cmbx-10">r</span><span 
+class="cmbx-10">_name</span> </dt><dd 
+class="description">
+     <!--l. 186--><p class="noindent" >the soutine where the error has been caught.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: a string.<br 
+class="newline" />
+     </dd><dt class="description">
+     <!--l. 191--><p class="noindent" >
+<span 
+class="cmbx-10">i</span><span 
+class="cmbx-10">_err</span> </dt><dd 
+class="description">
+     <!--l. 191--><p class="noindent" >addional info for error code<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Specified as: an integer array<br 
+class="newline" />
+     </dd><dt class="description">
+     <!--l. 195--><p class="noindent" >
+<span 
+class="cmbx-10">a</span><span 
+class="cmbx-10">_err</span> </dt><dd 
+class="description">
+                                                                  

+                                                                  
+     <!--l. 195--><p class="noindent" >addional info for error code<br 
+class="newline" />Scope: <span 
+class="cmbx-10">local </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Specified as: a string.<br 
+class="newline" /></dd></dl>
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">8.2   </span> <a 
+ id="x13-1250008.2"></a>psb_error &#8212; Prints the error stack content and aborts execution</h4>
+   <!--l. 204-->
+   <pre class="lstlisting" id="listing-156"><span class="label"><a 
+ id="x13-125001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">psb_error</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">icontxt</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 208--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 209--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 209--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 210--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 210--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 211--><p class="noindent" >
+<span 
+class="cmbx-10">icontxt</span> </dt><dd 
+class="description">
+     <!--l. 211--><p class="noindent" >the communication context.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">optional</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: an integer.</dd></dl>
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">8.3   </span> <a 
+ id="x13-1260008.3"></a>psb_set_errverbosity &#8212; Sets the verbosity of error messages</h4>
+   <!--l. 224-->
+   <pre class="lstlisting" id="listing-157"><span class="label"><a 
+ id="x13-126001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">psb_set_errverbosity</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">v</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 228--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 229--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 229--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 230--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 230--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 231--><p class="noindent" >
+<span 
+class="cmbx-10">v</span> </dt><dd 
+class="description">
+     <!--l. 231--><p class="noindent" >the verbosity level<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global</span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: an integer.</dd></dl>
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">8.4   </span> <a 
+ id="x13-1270008.4"></a>psb_set_erraction &#8212; Set the type of action to be taken upon error
+condition</h4>
+   <!--l. 241-->
+   <pre class="lstlisting" id="listing-158"><span class="label"><a 
+ id="x13-127001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">psb_set_erraction</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">err_act</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 245--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 246--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 246--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 247--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span> </dt><dd 
+class="description">
+     <!--l. 247--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 248--><p class="noindent" >
+<span 
+class="cmbx-10">err</span><span 
+class="cmbx-10">_act</span> </dt><dd 
+class="description">
+     <!--l. 248--><p class="noindent" >the type of action.<br 
+class="newline" />Scope: <span 
+class="cmbx-10">global </span><br 
+class="newline" />Type: <span 
+class="cmbx-10">required</span><br 
+class="newline" />Intent: <span 
+class="cmbx-10">in</span>.<br 
+class="newline" />Specified as: an integer. Possible values: <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_act_ret</span></span></span>, <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">psb_act_abort</span></span></span>.</dd></dl>
+                                                                  
+
+                                                                  
+                                                                  

+                                                                  
+                                                                  

                                                                  
+                                                                  

                                                                  
   <!--l. 1--><div class="crosslinks"><p class="noindent">[<a 
-href="userhtmlsu80.html" >next</a>] [<a 
+href="userhtmlse12.html" >next</a>] [<a 
 href="userhtmlse7.html" >prev</a>] [<a 
 href="userhtmlse7.html#tailuserhtmlse7.html" >prev-tail</a>] [<a 
 href="userhtmlse8.html" >front</a>] [<a 
-href="userhtml.html#userhtmlsu76.html" >up</a>] </p></div>
+href="userhtml.html#userhtmlse11.html" >up</a>] </p></div>
 <!--l. 1--><p class="indent" >   <a 
- id="tailuserhtmlse8.html"></a>  
+ id="tailuserhtmlse8.html"></a>   
 </body></html> 
--- a/docs/html/userhtmlse9.html
+++ b/docs/html/userhtmlse9.html
@ -11,47 +11,728 @@
 </head><body 
 >
   <!--l. 1--><div class="crosslinks"><p class="noindent">[<a 
-href="userhtmlsu86.html" >next</a>] [<a 
+href="userhtmlse13.html" >next</a>] [<a 
 href="userhtmlse8.html" >prev</a>] [<a 
 href="userhtmlse8.html#tailuserhtmlse8.html" >prev-tail</a>] [<a 
-href="userhtmlsu75.html#tailuserhtmlse9.html">tail</a>] [<a 
-href="userhtml.html#userhtmlsu80.html" >up</a>] </p></div>
+href="userhtmlse6.html#tailuserhtmlse9.html">tail</a>] [<a 
+href="userhtml.html#userhtmlse12.html" >up</a>] </p></div>
   <h3 class="sectionHead"><span class="titlemark">9   </span> <a 
- id="x91-1280009"></a>Utilities</h3>
+ id="x14-1280009"></a>Utilities</h3>
 <!--l. 4--><p class="noindent" >We have some utilities available for input and output of sparse matrices; the
 interfaces to these routines are available in the module <span class="obeylines-h"><span class="verb"><span 
 class="cmtt-10">psb_util_mod</span></span></span>.
                                                                  

                                                                  
-   <div class="subsectionTOCS">
-   &#x00A0;<span class="subsectionToc" >9.1 <a 
-href="userhtmlsu78.html#x92-1290009.1"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.2 <a 
-href="userhtmlsu79.html#x93-1300009.2">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.3 <a 
-href="userhtmlsu80.html#x94-1310009.3">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.4 <a 
-href="userhtmlsu81.html#x95-1320009.4">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.5 <a 
-href="userhtmlsu82.html#x96-1330009.5">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
-<br />   &#x00A0;<span class="subsectionToc" >9.6 <a 
-href="userhtmlsu83.html#x97-1340009.6">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
-   </div>
+   <h4 class="subsectionHead"><span class="titlemark">9.1   </span> <a 
+ id="x14-1290009.1"></a> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing
+format</h4>
+   <!--l. 16-->
+   <pre class="lstlisting" id="listing-159"><span class="label"><a 
+ id="x14-129001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">hb_read</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">a</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iret</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iunit</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">filename</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">b</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">mtitle</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 20--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 21--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 21--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 22--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span>  </dt><dd 
+class="description">
+     <!--l. 22--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 23--><p class="noindent" >
+<span 
+class="cmbx-10">filename</span> </dt><dd 
+class="description">
+     <!--l. 23--><p class="noindent" >The name of the file to be read.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>, in
+     which case the default input unit 5 (i.e. standard input in Unix jargon) is
+     used. Default: <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.
+     </dd><dt class="description">
+     <!--l. 28--><p class="noindent" >
+<span 
+class="cmbx-10">iunit</span> </dt><dd 
+class="description">
+     <!--l. 28--><p class="noindent" >The Fortran file unit number.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.</dd></dl>
+<!--l. 33--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 34--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 34--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 35--><p class="noindent" >
+<span 
+class="cmbx-10">a</span> </dt><dd 
+class="description">
+     <!--l. 35--><p class="noindent" >the sparse matrix read from file.<br 
+class="newline" />Type:<span 
+class="cmbx-10">required</span>.<br 
+class="newline" />Specified as: a structured data of type <a 
+href="userhtmlse3.html#spdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_Tspmat</span><span 
+class="cmtt-10">_type</span></a>.
+                                                                  
+
+                                                                  
+     </dd><dt class="description">
+     <!--l. 38--><p class="noindent" >
+<span 
+class="cmbx-10">b</span> </dt><dd 
+class="description">
+     <!--l. 38--><p class="noindent" >Rigth hand side(s).<br 
+class="newline" />Type: <span 
+class="cmbx-10">Optional </span><br 
+class="newline" />An array of type real or complex, rank 2 and having the ALLOCATABLE
+     attribute; will be allocated and filled in if the input file contains a right
+     hand side, otherwise will be left in the UNALLOCATED state.
+     </dd><dt class="description">
+     <!--l. 43--><p class="noindent" >
+<span 
+class="cmbx-10">mtitle</span> </dt><dd 
+class="description">
+     <!--l. 43--><p class="noindent" >Matrix title.<br 
+class="newline" />Type: <span 
+class="cmbx-10">Optional </span><br 
+class="newline" />A charachter variable of length 72 holding a copy of the matrix title as
+     specified by the Harwell-Boeing format and contained in the input file.
+     </dd><dt class="description">
+     <!--l. 48--><p class="noindent" >
+<span 
+class="cmbx-10">iret</span> </dt><dd 
+class="description">
+     <!--l. 48--><p class="noindent" >Error code.<br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">9.2   </span> <a 
+ id="x14-1300009.2"></a>hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing
+format</h4>
+   <!--l. 59-->
+   <pre class="lstlisting" id="listing-160"><span class="label"><a 
+ id="x14-130001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">hb_write</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">a</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iret</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iunit</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">filename</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">key</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">rhs</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">mtitle</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 65--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 66--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 66--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 67--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span>  </dt><dd 
+class="description">
+     <!--l. 67--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 68--><p class="noindent" >
+<span 
+class="cmbx-10">a</span> </dt><dd 
+class="description">
+     <!--l. 68--><p class="noindent" >the sparse matrix to be written.<br 
+class="newline" />Type:<span 
+class="cmbx-10">required</span>.<br 
+class="newline" />Specified as: a structured data of type <a 
+href="userhtmlse3.html#spdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_Tspmat</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 71--><p class="noindent" >
+<span 
+class="cmbx-10">b</span> </dt><dd 
+class="description">
+     <!--l. 71--><p class="noindent" >Rigth hand side.<br 
+class="newline" />Type: <span 
+class="cmbx-10">Optional </span><br 
+class="newline" />An array of type real or complex, rank 1 and having the ALLOCATABLE
+     attribute; will be allocated and filled in if the input file contains a right
+     hand side.
+     </dd><dt class="description">
+     <!--l. 76--><p class="noindent" >
+<span 
+class="cmbx-10">filename</span> </dt><dd 
+class="description">
+     <!--l. 76--><p class="noindent" >The name of the file to be written to.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>, in
+     which case the default output unit 6 (i.e. standard output in Unix jargon)
+     is used. Default: <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.
+     </dd><dt class="description">
+     <!--l. 81--><p class="noindent" >
+<span 
+class="cmbx-10">iunit</span> </dt><dd 
+class="description">
+     <!--l. 81--><p class="noindent" >The Fortran file unit number.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.
+                                                                  
+
+                                                                  
+     </dd><dt class="description">
+     <!--l. 84--><p class="noindent" >
+<span 
+class="cmbx-10">key</span> </dt><dd 
+class="description">
+     <!--l. 84--><p class="noindent" >Matrix key.<br 
+class="newline" />Type: <span 
+class="cmbx-10">Optional </span><br 
+class="newline" />A charachter variable of length 8 holding the matrix key as specified by
+     the Harwell-Boeing format and to be written to file.
+     </dd><dt class="description">
+     <!--l. 89--><p class="noindent" >
+<span 
+class="cmbx-10">mtitle</span> </dt><dd 
+class="description">
+     <!--l. 89--><p class="noindent" >Matrix title.<br 
+class="newline" />Type: <span 
+class="cmbx-10">Optional </span><br 
+class="newline" />A charachter variable of length 72 holding the matrix title as specified by
+     the Harwell-Boeing format and to be written to file.</dd></dl>
+<!--l. 96--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 97--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 97--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 98--><p class="noindent" >
+<span 
+class="cmbx-10">iret</span> </dt><dd 
+class="description">
+     <!--l. 98--><p class="noindent" >Error code.<br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">9.3   </span> <a 
+ id="x14-1310009.3"></a>mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket
+format</h4>
+   <!--l. 111-->
+   <pre class="lstlisting" id="listing-161"><span class="label"><a 
+ id="x14-131001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">mm_mat_read</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">a</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iret</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iunit</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">filename</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 115--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 116--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 116--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 117--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span>  </dt><dd 
+class="description">
+     <!--l. 117--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 118--><p class="noindent" >
+<span 
+class="cmbx-10">filename</span> </dt><dd 
+class="description">
+     <!--l. 118--><p class="noindent" >The name of the file to be read.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>, in
+     which case the default input unit 5 (i.e. standard input in Unix jargon) is
+     used. Default: <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.
+     </dd><dt class="description">
+     <!--l. 123--><p class="noindent" >
+<span 
+class="cmbx-10">iunit</span> </dt><dd 
+class="description">
+     <!--l. 123--><p class="noindent" >The Fortran file unit number.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.</dd></dl>
+<!--l. 128--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 129--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 129--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 130--><p class="noindent" >
+<span 
+class="cmbx-10">a</span> </dt><dd 
+class="description">
+     <!--l. 130--><p class="noindent" >the sparse matrix read from file.<br 
+class="newline" />Type:<span 
+class="cmbx-10">required</span>.<br 
+class="newline" />Specified as: a structured data of type <a 
+href="userhtmlse3.html#spdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_Tspmat</span><span 
+class="cmtt-10">_type</span></a>.
+                                                                  

+                                                                  
+     </dd><dt class="description">
+     <!--l. 133--><p class="noindent" >
+<span 
+class="cmbx-10">iret</span> </dt><dd 
+class="description">
+     <!--l. 133--><p class="noindent" >Error code.<br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
+                                                                  

+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">9.4   </span> <a 
+ id="x14-1320009.4"></a>mm_array_read &#8212; Read a dense array from a file in the MatrixMarket
+format</h4>
+   <!--l. 142-->
+   <pre class="lstlisting" id="listing-162"><span class="label"><a 
+ id="x14-132001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">mm_array_read</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">b</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iret</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iunit</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">filename</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 146--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 147--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 147--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 148--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span>  </dt><dd 
+class="description">
+     <!--l. 148--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 149--><p class="noindent" >
+<span 
+class="cmbx-10">filename</span> </dt><dd 
+class="description">
+     <!--l. 149--><p class="noindent" >The name of the file to be read.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>, in
+     which case the default input unit 5 (i.e. standard input in Unix jargon) is
+     used. Default: <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.
+     </dd><dt class="description">
+     <!--l. 154--><p class="noindent" >
+<span 
+class="cmbx-10">iunit</span> </dt><dd 
+class="description">
+     <!--l. 154--><p class="noindent" >The Fortran file unit number.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.</dd></dl>
+<!--l. 159--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 160--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 160--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 161--><p class="noindent" >
+<span 
+class="cmbx-10">b</span> </dt><dd 
+class="description">
+     <!--l. 161--><p class="noindent" >Rigth hand side(s).<br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />An  array  of  type  real  or  complex,  rank  1  or  2  and  having  the
+                                                                  

+                                                                  
+     ALLOCATABLE attribute, or an object of type <a 
+href="userhtmlse3.html#vdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_T</span><span 
+class="cmtt-10">_vect</span><span 
+class="cmtt-10">_type</span></a>, of
+     type real or complex.<br 
+class="newline" />Will be allocated and filled in if the input file contains a right hand side,
+     otherwise will be left in the UNALLOCATED state. <br 
+class="newline" />
+     </dd><dt class="description">
+     <!--l. 168--><p class="noindent" >
+<span 
+class="cmbx-10">iret</span> </dt><dd 
+class="description">
+     <!--l. 168--><p class="noindent" >Error code.<br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
+                                                                  

+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">9.5   </span> <a 
+ id="x14-1330009.5"></a>mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket
+format</h4>
+   <!--l. 179-->
+   <pre class="lstlisting" id="listing-163"><span class="label"><a 
+ id="x14-133001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">mm_mat_write</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">a</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">mtitle</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iret</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iunit</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">filename</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 182--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 183--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 183--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 184--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span>  </dt><dd 
+class="description">
+     <!--l. 184--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 185--><p class="noindent" >
+<span 
+class="cmbx-10">a</span> </dt><dd 
+class="description">
+     <!--l. 185--><p class="noindent" >the sparse matrix to be written.<br 
+class="newline" />Type:<span 
+class="cmbx-10">required</span>.<br 
+class="newline" />Specified as: a structured data of type <a 
+href="userhtmlse3.html#spdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_Tspmat</span><span 
+class="cmtt-10">_type</span></a>.
+     </dd><dt class="description">
+     <!--l. 188--><p class="noindent" >
+<span 
+class="cmbx-10">mtitle</span> </dt><dd 
+class="description">
+     <!--l. 188--><p class="noindent" >Matrix title.<br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />A  charachter  variable  holding  a  descriptive  title  for  the  matrix  to  be
+     written to file.
+     </dd><dt class="description">
+     <!--l. 192--><p class="noindent" >
+<span 
+class="cmbx-10">filename</span> </dt><dd 
+class="description">
+     <!--l. 192--><p class="noindent" >The name of the file to be written to.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>, in
+     which case the default output unit 6 (i.e. standard output in Unix jargon)
+     is used. Default: <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.
+     </dd><dt class="description">
+     <!--l. 197--><p class="noindent" >
+<span 
+class="cmbx-10">iunit</span> </dt><dd 
+class="description">
+     <!--l. 197--><p class="noindent" >The Fortran file unit number.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.</dd></dl>
+                                                                  

                                                                  
+<!--l. 202--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 203--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 203--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 204--><p class="noindent" >
+<span 
+class="cmbx-10">iret</span> </dt><dd 
+class="description">
+     <!--l. 204--><p class="noindent" >Error code.<br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
+<!--l. 209--><p class="noindent" ><span 
+class="cmbx-12">Notes</span>
+<!--l. 211--><p class="indent" >   If this function is called on a matrix <code class="lstinline"><span style="color:#000000">a</span></code> on a distributed communicator only the
+local part is written in output. To get a single MatrixMarket file with the whole
+matrix when appropriate, e.g. for debugging purposes, one could <span 
+class="cmti-10">gather </span>the whole
+matrix on a single rank and then write it. Consider the following example for a
+<span 
+class="cmti-10">double </span>precision matrix
+<div class="center" 
+>
+<!--l. 227--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-93">
+type(psb_ldspmat_type)&#x00A0;::&#x00A0;aglobal
+
+call&#x00A0;psb_gather(aglobal,a,desc_a,info)
+if&#x00A0;(iam&#x00A0;==&#x00A0;psb_root_)&#x00A0;then
+call&#x00A0;mm_mat_write(aglobal,mtitle,info,filename)
+end&#x00A0;if
+call&#x00A0;psb_spfree(aglobal,&#x00A0;desc_a,&#x00A0;info)
+</pre>
+<!--l. 237--><p class="nopar" >                                                           </div></div>
+<!--l. 241--><p class="noindent" >To simplify this procedure in <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">C</span></span></span>, there is a utility function
+<div class="center" 
+>
+<!--l. 247--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-94">
+psb_i_t&#x00A0;psb_c_&#x003C;s,d,c,z&#x003E;global_mat_write(ah,cdh);
+</pre>
+<!--l. 251--><p class="nopar" >                                                           </div></div>
+<!--l. 255--><p class="noindent" >that produces exactly this result.
+                                                                  
+
+                                                                  
+   <h4 class="subsectionHead"><span class="titlemark">9.6   </span> <a 
+ id="x14-1340009.6"></a>mm_array_write &#8212; Write a dense array from a file in the MatrixMarket
+format</h4>
+   <!--l. 261-->
+   <pre class="lstlisting" id="listing-165"><span class="label"><a 
+ id="x14-134001r1"></a></span><span style="color:#000000"><span 
+class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">mm_array_write</span></span><span style="color:#000000"><span 
+class="cmtt-10">(</span></span><span style="color:#000000"><span 
+class="cmtt-10">b</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">vtitle</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iret</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">iunit</span></span><span style="color:#000000"><span 
+class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span 
+class="cmtt-10">filename</span></span><span style="color:#000000"><span 
+class="cmtt-10">)</span></span></pre>
+   
+<!--l. 265--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 266--><p class="noindent" >
+<span 
+class="cmbx-10">Type:</span> </dt><dd 
+class="description">
+     <!--l. 266--><p class="noindent" >Asynchronous.
+     </dd><dt class="description">
+     <!--l. 267--><p class="noindent" >
+<span 
+class="cmbx-10">On Entry</span>  </dt><dd 
+class="description">
+     <!--l. 267--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 268--><p class="noindent" >
+<span 
+class="cmbx-10">b</span> </dt><dd 
+class="description">
+     <!--l. 268--><p class="noindent" >Rigth hand side(s).<br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />An  array  of  type  real  or  complex,  rank  1  or  2,  or  an  object  of  type
+     <a 
+href="userhtmlse3.html#vdata"><span 
+class="cmtt-10">psb</span><span 
+class="cmtt-10">_T</span><span 
+class="cmtt-10">_vect</span><span 
+class="cmtt-10">_type</span></a>, of type real or complex; its contents will be written to
+     disk.<br 
+class="newline" />
+     </dd><dt class="description">
+     <!--l. 273--><p class="noindent" >
+<span 
+class="cmbx-10">filename</span> </dt><dd 
+class="description">
+     <!--l. 273--><p class="noindent" >The name of the file to be written.<br 
+class="newline" />
+     </dd><dt class="description">
+     <!--l. 274--><p class="noindent" >
+<span 
+class="cmbx-10">vtitle</span> </dt><dd 
+class="description">
+     <!--l. 274--><p class="noindent" >Matrix title.<br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />A charachter variable holding a descriptive title for the vector to be written
+     to file. Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>, in
+     which case the default input unit 5 (i.e. standard input in Unix jargon) is
+     used. Default: <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.
+     </dd><dt class="description">
+     <!--l. 282--><p class="noindent" >
+<span 
+class="cmbx-10">iunit</span> </dt><dd 
+class="description">
+                                                                  
+
+                                                                  
+     <!--l. 282--><p class="noindent" >The Fortran file unit number.<br 
+class="newline" />Type:<span 
+class="cmbx-10">optional</span>.<br 
+class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">-</span></span></span>.</dd></dl>
+<!--l. 287--><p class="noindent" >
+     <dl class="description"><dt class="description">
+     <!--l. 288--><p class="noindent" >
+<span 
+class="cmbx-10">On Return</span> </dt><dd 
+class="description">
+     <!--l. 288--><p class="noindent" >
+     </dd><dt class="description">
+     <!--l. 289--><p class="noindent" >
+<span 
+class="cmbx-10">iret</span> </dt><dd 
+class="description">
+     <!--l. 289--><p class="noindent" >Error code.<br 
+class="newline" />Type: <span 
+class="cmbx-10">required </span><br 
+class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
+<!--l. 294--><p class="noindent" ><span 
+class="cmbx-12">Notes</span>
+<!--l. 296--><p class="indent" >   If this function is called on a vector <code class="lstinline"><span style="color:#000000">v</span></code> on a distributed communicator only the
+local part is written in output. To get a single MatrixMarket file with the whole
+vector when appropriate, e.g. for debugging purposes, one could <span 
+class="cmti-10">gather </span>the whole
+vector on a single rank and then write it. Consider the following example for a <span 
+class="cmti-10">double</span>
+precision vector
+<div class="center" 
+>
+<!--l. 312--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-95">
+real(psb_dpk_),&#x00A0;allocatable&#x00A0;::&#x00A0;vglobal(:)
+
+call&#x00A0;psb_gather(vglobal,v,desc,info)
+if&#x00A0;(iam&#x00A0;==&#x00A0;psb_root_)&#x00A0;then
+call&#x00A0;mm_array_write(vglobal,vtitle,info,filename)
+end&#x00A0;if
+call&#x00A0;deallocate(vglobal,&#x00A0;stat=info)
+</pre>
+<!--l. 322--><p class="nopar" >                                                           </div></div>
+<!--l. 326--><p class="noindent" >To simplify this procedure in <span class="obeylines-h"><span class="verb"><span 
+class="cmtt-10">C</span></span></span>, there is a utility function
+<div class="center" 
+>
+<!--l. 332--><p class="noindent" >
+<div class="minipage"><pre class="verbatim" id="verbatim-96">
+psb_i_t&#x00A0;psb_c_&#x003C;s,d,c,z&#x003E;global_vec_write(vh,cdh);
+</pre>
+<!--l. 336--><p class="nopar" >                                                           </div></div>
+<!--l. 340--><p class="noindent" >that produces exactly this result.
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+                                                                  
+
+                                                                  
+                                                                  

                                                                  
   <!--l. 1--><div class="crosslinks"><p class="noindent">[<a 
-href="userhtmlsu86.html" >next</a>] [<a 
+href="userhtmlse13.html" >next</a>] [<a 
 href="userhtmlse8.html" >prev</a>] [<a 
 href="userhtmlse8.html#tailuserhtmlse8.html" >prev-tail</a>] [<a 
 href="userhtmlse9.html" >front</a>] [<a 
-href="userhtml.html#userhtmlsu80.html" >up</a>] </p></div>
+href="userhtml.html#userhtmlse12.html" >up</a>] </p></div>
 <!--l. 1--><p class="indent" >   <a 
- id="tailuserhtmlse9.html"></a>  
+ id="tailuserhtmlse9.html"></a>   
 </body></html> 
--- a/docs/psblas-3.9.pdf
+++ b/docs/psblas-3.9.pdf
--- a/docs/src/Makefile
+++ b/docs/src/Makefile
@ -86,7 +86,8 @@
 TOPFILE   = userguide.tex
 HTMLFILE  = userhtml.tex
 SECFILE   = intro.tex commrout.tex datastruct.tex psbrout.tex toolsrout.tex\
-	methods.tex precs.tex penv.tex error.tex util.tex biblio.tex
+	methods.tex precs.tex penv.tex error.tex util.tex biblio.tex \
+        ext-intro.tex cuda.tex
 FIGDIR    = figures

 XPDFFLAGS = 
@ -139,7 +140,7 @@ PDF      = $(join $(BASEFILE),.pdf)
 PS       = $(join $(BASEFILE),.ps)
 GXS      = $(join $(BASEFILE),.gxs)
 GLX      = $(join $(BASEFILE),.glx)
-TARGETPDF= ../psblas-3.8.pdf
+TARGETPDF= ../psblas-3.9.pdf
 BASEHTML = $(patsubst %.tex,%,$(HTMLFILE))
 HTML     = $(join $(BASEHTML),.html)
 HTMLDIR  = ../html
--- a/docs/src/biblio.tex
+++ b/docs/src/biblio.tex
@ -1,9 +1,5 @@

 \begin{thebibliography}{99}
-\bibitem{DesPat:11}
- D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson
-{\em Design Patterns for Scientific Computations on Sparse Matrices},
- HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011

 \bibitem{PARA04FOREST}
 G.~Bella, S.~Filippone, A.~De Maio and M.~Testa,
@ -154,6 +150,11 @@ Lawson, C.,  Hanson, R., Kincaid, D. and Krogh, F.,
 {\em Fortran 95/2003 explained.}
 {Oxford University Press}, 2004.
 %
+\bibitem{MRC:11}
+{Metcalf, M., Reid, J. and Cohen, M.}
+{\em Modern Fortran  explained.}
+{Oxford University Press}, 2011.
+%
 %% \bibitem{DD2}
 %% B.~Smith, P.~Bjorstad and W.~Gropp,
 %% {\em Domain Decomposition: Parallel Multilevel Methods for Elliptic
@ -169,4 +170,20 @@ M.~Snir, S.~Otto, S.~Huss-Lederman, D.~Walker and J.~Dongarra,
 {\em MPI: The Complete Reference. Volume 1 - The MPI Core}, second edition,
 MIT Press, 1998.
 %
+
+\bibitem{DesPat:11}
+ D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson
+{\em Design Patterns for Scientific Computations on Sparse Matrices},
+ HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011
+  
+\bibitem{CaFiRo:2014}
+{ Cardellini, V.}, { Filippone, S.}, { and} { Rouson, D.} 2014,
+ Design patterns for sparse-matrix computations on hybrid {CPU/GPU}
+  platforms,
+{\em Scientific Programming\/}~{\em 22,\/}~1, 1--19.
+\bibitem{OurTechRep}
+D.~Barbieri, V.~Cardellini, A.~Fanfarillo, S.~Filippone, Three storage formats
+  for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6, Universit\`a di
+  Roma Tor Vergata (February 2015).
+
 \end{thebibliography}
--- a/docs/src/cuda.tex
+++ b/docs/src/cuda.tex
@ -0,0 +1,395 @@
+
+\subsection{CUDA-class extensions}
+
+For computing  with CUDA we define a dual memorization strategy in
+which each variable on the CPU (``host'') side has a GPU (``device'')
+side. When a GPU-type variable is initialized, the data contained is
+(usually) the same on both sides. Each operator invoked on the
+variable may change the data so that only the host side or the device
+side are up-to-date. 
+
+Keeping track of the updates to data in the variables  is essential: we want
+to perform most  computations on the GPU, but we cannot afford the time
+needed to move data between the host  memory and the device memory
+because the bandwidth of the interconnection bus would become the main
+bottleneck of the computation. Thus, each and every computational
+routine in the library is built according to the following principles: 
+\begin{itemize}
+\item If the data type being handled is {GPU}-enabled, make sure that
+  its device copy is up to date, perform any arithmetic operation on
+  the {GPU}, and if the data has been altered as a result, mark
+  the main-memory copy as outdated.
+\item The main-memory copy is never updated unless this is requested
+  by the user either 
+\begin{description}
+\item[explicitly] by invoking a synchronization method;
+\item[implicitly] by invoking a method that involves other data items
+  that are not {GPU}-enabled, e.g., by assignment ov a vector to a
+  normal array. 
+\end{description}
+\end{itemize}
+In this way, data items are put on the {GPU} memory ``on demand'' and
+remain there as long as ``normal'' computations are carried out. 
+As an example, the following call to a matrix-vector product
+\ifpdf
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+    call psb_spmm(alpha,a,x,beta,y,desc_a,info)
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+    call psb_spmm(alpha,a,x,beta,y,desc_a,info)
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+will transparently and automatically be performed on the {GPU} whenever
+all three data inputs \fortinline|a|, \fortinline|x|  and
+\fortinline|y| are {GPU}-enabled. If a program makes many such calls
+sequentially, then 
+\begin{itemize}
+\item The first kernel invocation will find the data in main memory,
+  and will copy it to the {GPU} memory, thus incurring a significant
+  overhead; the result is however \emph{not} copied back, and
+  therefore:
+\item Subsequent kernel invocations involving the same vector will
+  find the data on the {GPU} side so that they will run at full
+  speed.
+\end{itemize}
+For all invocations after the first the only data that will have to be
+transferred to/from the main memory will be the scalars \fortinline|alpha|
+and \fortinline|beta|, and the return code \fortinline|info|.  
+
+\begin{description}
+\item[Vectors:] The data type \fortinline|psb_T_vect_gpu| provides a
+  GPU-enabled extension of the inner type \fortinline|psb_T_base_vect_type|,
+  and must be used together with the other inner matrix type to make
+  full use of the GPU computational capabilities;
+\item[CSR:] The data type \fortinline|psb_T_csrg_sparse_mat| provides an
+  interface to the GPU version of CSR available in the NVIDIA CuSPARSE
+  library;
+\item[HYB:] The data type \fortinline|psb_T_hybg_sparse_mat| provides an
+  interface to the HYB GPU storage  available in the NVIDIA CuSPARSE
+  library. The internal structure is opaque, hence the host side is
+  just CSR; the HYB data format is only available up to CUDA version
+  10. 
+\item[ELL:] The data type \fortinline|psb_T_elg_sparse_mat| provides an
+  interface to the  ELLPACK implementation from SPGPU;
+
+\item[HLL:] The data type \fortinline|psb_T_hlg_sparse_mat| provides an
+  interface to the  Hacked ELLPACK implementation from SPGPU;
+\item[HDIA:] The data type \fortinline|psb_T_hdiag_sparse_mat| provides an
+  interface to the  Hacked DIAgonals implementation from SPGPU;
+\end{description}
+
+
+\section{CUDA Environment Routines}
+\label{sec:cudaenv}
+
+\subsection*{psb\_cuda\_init --- Initializes PSBLAS-CUDA
+  environment}
+\addcontentsline{toc}{subsection}{psb\_cuda\_init}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+call psb_cuda_init(ctxt [, device])
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+call psb_cuda_init(ctxt [, device])
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+This subroutine initializes the PSBLAS-CUDA  environment. 
+\begin{description}
+\item[Type:] Synchronous.
+\item[\bf  On Entry ]
+\item[device] ID of CUDA device to attach to.\\
+Scope: {\bf local}.\\
+Type: {\bf optional}.\\
+Intent: {\bf in}.\\
+Specified as: an integer value. \
+Default: use \fortinline|mod(iam,ngpu)| where \fortinline|iam| is the calling
+process index and \fortinline|ngpu| is the total number of CUDA devices
+available on the current node. 
+\end{description}
+
+
+{\par\noindent\large\bfseries Notes}
+\begin{enumerate}
+\item A call to this routine must precede any other PSBLAS-CUDA call. 
+\end{enumerate}
+
+\subsection*{psb\_cuda\_exit --- Exit from  PSBLAS-CUDA
+  environment}
+\addcontentsline{toc}{subsection}{psb\_cuda\_exit}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+call psb_cuda_exit(ctxt)
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+call psb_cuda_exit(ctxt)
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+This subroutine exits from the  PSBLAS CUDA context.
+\begin{description}
+\item[Type:] Synchronous.
+\item[\bf  On Entry ]
+\item[ctxt] the communication context identifying the virtual
+  parallel machine.\\
+Scope: {\bf global}.\\
+Type: {\bf required}.\\
+Intent: {\bf in}.\\
+Specified as: an integer variable.
+\end{description}
+
+
+
+
+\subsection*{psb\_cuda\_DeviceSync ---  Synchronize CUDA device}
+\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceSync}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+call psb_cuda_DeviceSync()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+call psb_cuda_DeviceSync()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+This subroutine ensures that all previosly invoked kernels, i.e. all
+invocation of CUDA-side code, have completed.
+
+
+\subsection*{psb\_cuda\_getDeviceCount }
+\addcontentsline{toc}{subsection}{psb\_cuda\_getDeviceCount}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+ngpus =  psb_cuda_getDeviceCount()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+ngpus =  psb_cuda_getDeviceCount()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+Get number of devices available on current computing node. 
+
+\subsection*{psb\_cuda\_getDevice }
+\addcontentsline{toc}{subsection}{psb\_cuda\_getDevice}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+ngpus =  psb_cuda_getDevice()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+ngpus =  psb_cuda_getDevice()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+Get  device in use by current process. 
+
+\subsection*{psb\_cuda\_setDevice }
+\addcontentsline{toc}{subsection}{psb\_cuda\_setDevice}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+info = psb_cuda_setDevice(dev)
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+info = psb_cuda_setDevice(dev)
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+Set  device to be used  by current process. 
+
+\subsection*{psb\_cuda\_DeviceHasUVA }
+\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceHasUVA}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+hasUva = psb_cuda_DeviceHasUVA()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+hasUva = psb_cuda_DeviceHasUVA()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+Returns true if device currently in use supports UVA
+(Unified Virtual Addressing).
+
+\subsection*{psb\_cuda\_WarpSize }
+\addcontentsline{toc}{subsection}{psb\_cuda\_WarpSize}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+nw = psb_cuda_WarpSize()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+nw = psb_cuda_WarpSize()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+Returns the warp size.
+
+
+\subsection*{psb\_cuda\_MultiProcessors }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MultiProcessors}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+nmp = psb_cuda_MultiProcessors()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+nmp = psb_cuda_MultiProcessors()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+Returns the number of multiprocessors in the CUDA device.
+
+\subsection*{psb\_cuda\_MaxThreadsPerMP }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MaxThreadsPerMP}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+nt = psb_cuda_MaxThreadsPerMP()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+nt = psb_cuda_MaxThreadsPerMP()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+Returns the maximum number of threads per multiprocessor. 
+
+
+\subsection*{psb\_cuda\_MaxRegistersPerBlock }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MaxRegisterPerBlock}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+nr = psb_cuda_MaxRegistersPerBlock()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+nr = psb_cuda_MaxRegistersPerBlock()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+Returns the maximum number of register per thread block. 
+
+
+\subsection*{psb\_cuda\_MemoryClockRate }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryClockRate}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+cl = psb_cuda_MemoryClockRate()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+cl = psb_cuda_MemoryClockRate()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+Returns the memory clock rate in KHz, as an integer. 
+
+\subsection*{psb\_cuda\_MemoryBusWidth }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryBusWidth}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+nb = psb_cuda_MemoryBusWidth()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+nb = psb_cuda_MemoryBusWidth()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+Returns the memory bus width in bits.
+
+\subsection*{psb\_cuda\_MemoryPeakBandwidth }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryPeakBandwidth}
+
+\ifpdf
+\begin{minted}[breaklines=true]{fortran}
+bw = psb_cuda_MemoryPeakBandwidth()
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+bw = psb_cuda_MemoryPeakBandwidth()
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+Returns the peak memory bandwidth in MB/s (real double precision).
+
+
+
--- a/docs/src/ext-intro.tex
+++ b/docs/src/ext-intro.tex
@ -0,0 +1,598 @@
+\section{Extensions}\label{sec:ext-intro}
+
+The EXT, CUDA and RSB  subdirectories contains a set of extensions to the base
+library. The extensions provide additional storage formats beyond the
+ones already contained in the base library, as well as interfaces
+to:
+\begin{description}
+\item[SPGPU] a CUDA library originally published as
+  \url{https://code.google.com/p/spgpu/} and now included in the
+  \verb|cuda| subdir, for computations on   NVIDIA GPUs;
+\item[LIBRSB] \url{http://sourceforge.net/projects/librsb/}, for
+  computations on multicore parallel machines. 
+\end{description}
+The infrastructure laid out in the base library to allow for these
+extensions is detailed in the references~\cite{DesPat:11,CaFiRo:2014,Sparse03};
+the CUDA-specific data formats are described in~\cite{OurTechRep}. 
+
+
+\subsection{Using the extensions}
+\label{sec:ext-appstruct}
+A sample application using the PSBLAS extensions will contain the
+following steps:
+\begin{itemize}
+\item \verb|USE| the appropriat modules (\verb|psb_ext_mod|,
+  \verb|psb_cuda_mod|);
+\item Declare a \emph{mold} variable of the necessary type
+  (e.g. \verb|psb_d_ell_sparse_mat|, \verb|psb_d_hlg_sparse_mat|,
+  \verb|psb_d_vect_cuda|);
+\item Pass the mold variable to the base library interface where
+  needed to ensure the appropriate dynamic type.
+\end{itemize}
+Suppose you want to use the CUDA-enabled ELLPACK data structure; you
+would use a piece of code like this (and don't forget, you need
+CUDA-side vectors along with the matrices):
+\ifpdf
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+program my_cuda_test
+  use psb_base_mod
+  use psb_util_mod 
+  use psb_ext_mod
+  use psb_cuda_mod
+  type(psb_dspmat_type) :: a, agpu
+  type(psb_d_vect_type) :: x, xg, bg
+
+  real(psb_dpk_), allocatable :: xtmp(:)
+  type(psb_d_vect_cuda)       :: vmold
+  type(psb_d_elg_sparse_mat) :: aelg
+  type(psb_ctxt_type) :: ctxt
+  integer             :: iam, np
+
+
+  call psb_init(ctxt)
+  call psb_info(ctxt,iam,np)
+  call psb_cuda_init(ctxt, iam)
+
+  
+  ! My own home-grown matrix generator
+  call gen_matrix(ctxt,idim,desc_a,a,x,info)
+  if (info /= 0) goto 9999
+
+  call a%cscnv(agpu,info,mold=aelg)
+  if (info /= 0) goto 9999
+  xtmp = x%get_vect() 
+  call xg%bld(xtmp,mold=vmold)
+  call bg%bld(size(xtmp),mold=vmold)
+  
+  ! Do sparse MV
+  call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info)
+
+ 
+9999 continue
+  if (info == 0) then 
+     write(*,*) '42'
+  else
+     write(*,*) 'Something went wrong ',info
+  end if
+  
+  
+  call psb_cuda_exit()
+  call psb_exit(ctxt)
+  stop
+end program my_cuda_test
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+program my_cuda_test
+  use psb_base_mod
+  use psb_util_mod 
+  use psb_ext_mod
+  use psb_cuda_mod
+  type(psb_dspmat_type) :: a, agpu
+  type(psb_d_vect_type) :: x, xg, bg
+
+  real(psb_dpk_), allocatable :: xtmp(:)
+  type(psb_d_vect_cuda)       :: vmold
+  type(psb_d_elg_sparse_mat) :: aelg
+  type(psb_ctxt_type) :: ctxt
+  integer             :: iam, np
+
+
+  call psb_init(ctxt)
+  call psb_info(ctxt,iam,np)
+  call psb_cuda_init(ctxt, iam)
+
+  
+  ! My own home-grown matrix generator
+  call gen_matrix(ctxt,idim,desc_a,a,x,info)
+  if (info /= 0) goto 9999
+
+  call a%cscnv(agpu,info,mold=aelg)
+  if (info /= 0) goto 9999
+  xtmp = x%get_vect() 
+  call xg%bld(xtmp,mold=vmold)
+  call bg%bld(size(xtmp),mold=vmold)
+  
+  ! Do sparse MV
+  call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info)
+
+ 
+9999 continue
+  if (info == 0) then 
+     write(*,*) '42'
+  else
+     write(*,*) 'Something went wrong ',info
+  end if
+  
+  
+  call psb_cuda_exit()
+  call psb_exit(ctxt)
+  stop
+end program my_cuda_test
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+      
+A full example of this strategy can be seen in the
+\texttt{test/ext/kernel} and \texttt{test/\-cuda/\-kernel} subdirectories,
+where we provide  sample programs 
+to test the speed of the sparse matrix-vector product with the various
+data structures included in the library. 
+
+
+\subsection{Extensions' Data Structures}
+\label{sec:ext-datastruct}
+%\ifthenelse{\boolean{mtc}}{\minitoc}{}
+
+Access to the facilities provided by the EXT library is mainly
+achieved through the data types that are provided within. 
+The data classes are derived from the base  classes in PSBLAS, through 
+the Fortran~2003 mechanism of \emph{type extension}~\cite{MRC:11}.  
+
+The data classes are divided between the general purpose CPU
+extensions, the GPU interfaces and the RSB interfaces.
+In the description we will make use of the notation introduced in
+Table~\ref{tab:notation}. 
+
+\begin{table}[ht]
+\caption{Notation for parameters describing a sparse matrix}
+\begin{center}
+{\footnotesize
+\begin{tabular}{ll}
+\hline
+Name & Description \\
+\hline
+M		& Number of rows in matrix		 \\
+N		& Number of columns in matrix		 \\
+NZ              & Number of nonzeros in matrix   \\
+AVGNZR          & Average number of nonzeros per row  \\
+MAXNZR          & Maximum number of nonzeros per row  \\
+NDIAG           & Numero of nonzero diagonals\\
+AS	        & Coefficients 	array		 \\
+IA	        & Row indices array			 \\
+JA	        & Column  indices array			 \\
+IRP	        & Row start pointers array			 \\
+JCP	        & Column start pointers array			 \\
+NZR 	        & Number of nonzeros per row array \\
+OFFSET          & Offset for diagonals			 \\
+\hline
+\end{tabular}
+}
+\end{center}
+\label{tab:notation}
+\end{table}
+
+\begin{figure}[ht]
+	\centering
+%		\includegraphics[width=5.2cm]{figures/mat.eps}
+\ifcase\pdfoutput
+  \includegraphics[width=5.2cm]{mat.png}
+\or
+  \includegraphics[width=5.2cm]{figures/mat.pdf}
+\fi
+	\caption{Example of sparse matrix}
+	\label{fig:dense}
+\end{figure} 
+
+\subsection{CPU-class extensions}
+
+
+\subsubsection*{ELLPACK}
+
+The ELLPACK/ITPACK format (shown in Figure~\ref{fig:ell}) 
+comprises  two 2-dimensional arrays \verb|AS| and
+\verb|JA|  with \verb|M| rows and \verb|MAXNZR| columns, where
+\verb|MAXNZR| is the maximum
+number of nonzeros in any row~\cite{ELLPACK}. 
+Each row of the arrays \verb|AS| and \verb|JA| contains the
+coefficients and column indices; rows shorter than
+\verb|MAXNZR| are padded with zero coefficients and appropriate column
+indices, e.g. the last valid one found in the same row.
+
+\begin{figure}[ht]
+	\centering
+%		\includegraphics[width=8.2cm]{figures/ell.eps}
+\ifcase\pdfoutput
+  \includegraphics[width=8.2cm]{ell.png}
+\or
+  \includegraphics[width=8.2cm]{figures/ell.pdf}
+\fi
+	\caption{ELLPACK compression of matrix in Figure~\ref{fig:dense}}
+	\label{fig:ell}
+\end{figure} 
+
+
+\begin{algorithm}
+\lstset{language=Fortran}
+\small
+  \begin{lstlisting}
+    do i=1,n
+      t=0
+      do j=1,maxnzr
+        t = t +  as(i,j)*x(ja(i,j))
+      end do
+      y(i) = t
+    end do
+  \end{lstlisting}
+  \caption{\label{alg:ell} Matrix-Vector product in ELL format}
+\end{algorithm}
+The matrix-vector product $y=Ax$ can be computed with the code shown in
+Alg.~\ref{alg:ell}; it costs  one  memory write per outer iteration, 
+plus three memory reads  and two floating-point operations per inner
+iteration.   
+
+Unless all rows have exactly the same number of nonzeros, some of the
+coefficients in the \verb|AS| array will be zeros; therefore this
+data structure will have  an overhead both in terms of memory space
+and redundant operations (multiplications by zero).  The overhead can
+be acceptable if: 
+\begin{enumerate}
+\item The maximum number of nonzeros per row is not much larger than
+  the    average;
+\item The regularity of the data structure allows for faster  code,
+  e.g. by allowing vectorization, thereby offsetting the additional
+  storage requirements.  
+\end{enumerate}
+In the extreme case where the input matrix has one full row, the
+ELLPACK structure would require more memory than the normal 2D array
+storage. The ELLPACK storage format was very popular in the vector
+computing days; in modern CPUs it is not quite as popular, but it
+is  the basis for many GPU formats. 
+
+The relevant data type is \verb|psb_T_ell_sparse_mat|:
+\ifpdf
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+  type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat
+    !
+    ! ITPACK/ELL format, extended.
+    !     
+    
+    integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:)
+    real(psb_dpk_), allocatable :: val(:,:)
+
+  contains
+    ....
+  end type psb_d_ell_sparse_mat
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+  type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat
+    !
+    ! ITPACK/ELL format, extended.
+    !     
+    
+    integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:)
+    real(psb_dpk_), allocatable :: val(:,:)
+
+  contains
+    ....
+  end type psb_d_ell_sparse_mat
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+
+\subsubsection*{Hacked ELLPACK}
+
+The \textit{hacked ELLPACK} (\textbf{HLL}) format 
+alleviates the main problem of the ELLPACK format, that is, 
+the  amount of  memory required by  padding for  sparse matrices in
+which the maximum row length is  larger than the average.
+
+The number of  elements  allocated to padding is $[(m*maxNR) -
+(m*avgNR) = m*(maxNR-avgNR)]$ 
+for both \verb|AS|  and \verb|JA| arrays,
+where $m$ is equal to the number of rows of the matrix, $maxNR$ is the
+maximum number of nonzero elements 
+in every row and $avgNR$ is the average number of nonzeros. 
+Therefore a single densely populated row can seriously affect the
+total size of the allocation. 
+
+To limit this effect, in the HLL format  we break the original matrix
+into equally sized groups of rows (called \textit{hacks}), and then store
+these groups as independent matrices in ELLPACK format. 
+The groups can be arranged selecting rows in an arbitrarily manner;
+indeed, if the rows are sorted by decreasing number of nonzeros we
+obtain essentially the JAgged Diagonals format. 
+If the rows are not in the original order, then an   additional vector
+\textit{rIdx} is required, storing the actual row index  for each row
+in the data structure.
+
+The multiple ELLPACK-like buffers are stacked together inside a
+single, one dimensional array; 
+an additional  vector \textit{hackOffsets} is provided to keep track
+of the individual submatrices.
+All hacks have the same number of rows  \textit{hackSize}; hence, 
+the \textit{hackOffsets} vector is  an array of
+$(m/hackSize)+1$ elements, each one pointing  to the first index of a
+submatrix inside the stacked \textit{cM}/\textit{rP} buffers, plus an
+additional element pointing past the end of the last block, where the
+next one would begin. 
+We thus have the property that  
+the elements of the $k$-th \textit{hack} are stored between
+\verb|hackOffsets[k]| and 
+\verb|hackOffsets[k+1]|, similarly to what happens in the CSR format. 
+
+\begin{figure}[ht]
+	\centering
+%		\includegraphics[width=8.2cm]{../figures/hll.eps}
+\ifcase\pdfoutput
+  \includegraphics[width=.72\textwidth]{hll.png}
+\or
+  \includegraphics[width=.72\textwidth]{../figures/hll.pdf}
+\fi
+	\caption{Hacked ELLPACK compression of matrix in Figure~\ref{fig:dense}}
+	\label{fig:hll}
+\end{figure} 
+
+With this data structure a very long row only affects one hack, and
+therefore the additional memory is limited to the hack in which the
+row appears.
+
+The relevant data type is \verb|psb_T_hll_sparse_mat|:
+\ifpdf
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+  type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat
+    !
+    ! HLL format. (Hacked ELL) 
+    !     
+    integer(psb_ipk_) :: hksz
+    integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:)
+    real(psb_dpk_), allocatable :: val(:)
+
+  contains
+   ....
+  end type
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+  type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat
+    !
+    ! HLL format. (Hacked ELL) 
+    !     
+    integer(psb_ipk_) :: hksz
+    integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:)
+    real(psb_dpk_), allocatable :: val(:)
+
+  contains
+   ....
+  end type
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+\subsubsection*{Diagonal storage}
+
+
+The DIAgonal (DIA) format (shown in Figure~\ref{fig:dia}) 
+has   a 2-dimensional array \verb|AS| containing in each column the
+coefficients along a  diagonal of the matrix, and an integer array
+\verb|OFFSET|  that determines  where each diagonal starts. The
+diagonals in \verb|AS| are padded with zeros as necessary. 
+
+The code to compute the matrix-vector product $y=Ax$ is shown in Alg.~\ref{alg:dia};
+it costs one  memory read per outer iteration, 
+plus three memory reads, one memory write  and two floating-point
+operations per inner iteration. The accesses to  \verb|AS| and
+\verb|x| are in strict sequential order,  therefore no indirect
+addressing is required.  
+
+\begin{figure}[ht]
+	\centering
+%		\includegraphics[width=8.2cm]{figures/dia.eps}
+\ifcase\pdfoutput
+  \includegraphics[width=.72\textwidth]{dia.png}
+\or
+  \includegraphics[width=.72\textwidth]{figures/dia.pdf}
+\fi
+	\caption{DIA compression of matrix in Figure~\ref{fig:dense}}
+	\label{fig:dia}
+\end{figure} 
+
+
+\begin{algorithm}
+\ifpdf
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+    do j=1,ndiag
+      if (offset(j) > 0) then 
+        ir1 = 1; ir2 = m - offset(j);
+      else
+        ir1 = 1 - offset(j); ir2 = m;
+      end if
+      do i=ir1,ir2
+        y(i) = y(i) + alpha*as(i,j)*x(i+offset(j))
+      end do
+    end do
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+    do j=1,ndiag
+      if (offset(j) > 0) then 
+        ir1 = 1; ir2 = m - offset(j);
+      else
+        ir1 = 1 - offset(j); ir2 = m;
+      end if
+      do i=ir1,ir2
+        y(i) = y(i) + alpha*as(i,j)*x(i+offset(j))
+      end do
+    end do
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+  \caption{\label{alg:dia} Matrix-Vector product in DIA format}
+\end{algorithm}
+
+
+The relevant data type is \verb|psb_T_dia_sparse_mat|:
+\ifpdf
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+  type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat
+    !
+    ! DIA format, extended.
+    !     
+    
+    integer(psb_ipk_), allocatable :: offset(:)
+    integer(psb_ipk_) :: nzeros
+    real(psb_dpk_), allocatable :: data(:,:)
+
+  end type
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+  type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat
+    !
+    ! DIA format, extended.
+    !     
+    
+    integer(psb_ipk_), allocatable :: offset(:)
+    integer(psb_ipk_) :: nzeros
+    real(psb_dpk_), allocatable :: data(:,:)
+
+  end type
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+
+
+\subsubsection*{Hacked DIA}
+
+Storage by DIAgonals is an attractive option for matrices whose
+coefficients are located on a small set of diagonals, since they do
+away with storing explicitly the indices and therefore reduce
+significantly memory traffic. However, having a few coefficients
+outside of the main set of diagonals may  significantly increase the
+amount of needed padding; moreover, while the DIA code is easily
+vectorized, it does not necessarily make optimal use of the memory
+hierarchy. While processing each diagonal we are updating entries in
+the output vector \verb|y|, which is then accessed multiple times; if 
+the vector \verb|y| is too large to remain in the cache memory, the
+associated cache miss penalty is paid multiple times. 
+
+The \textit{hacked DIA} (\textbf{HDIA}) format was designed to contain
+the amount of padding, by  breaking  the original matrix
+into equally sized groups of rows (\textit{hacks}), and then storing
+these groups as independent matrices in DIA format. This approach is
+similar to that of HLL, and requires using an offset vector for each
+submatrix. Again, similarly to HLL, the various submatrices are
+stacked inside a linear array to improve memory management. The fact
+that the matrix is accessed in slices helps in reducing cache misses,
+especially regarding accesses to the %output 
+vector \verb|y|.  
+
+
+An additional vector \textit{hackOffsets} is provided to complete
+the matrix format; given  that \textit{hackSize} is the number of rows of each hack,
+the \textit{hackOffsets} vector is made by an array of
+$(m/hackSize)+1$ elements,  pointing to the first diagonal offset of a
+submatrix inside the stacked \textit{offsets} buffers, plus an
+additional element equal to the number of nonzero diagonals in the whole matrix. 
+We thus have the property that  
+the number of diagonals of the $k$-th \textit{hack} is given by
+\textit{hackOffsets[k+1] - hackOffsets[k]}.  
+
+\begin{figure}[ht]
+	\centering
+%		\includegraphics[width=8.2cm]{../figures/hdia.eps}
+\ifcase\pdfoutput
+  \includegraphics[width=.72\textwidth]{hdia.png}
+\or
+  \includegraphics[width=.72\textwidth]{../figures/hdia.pdf}
+\fi
+	\caption{Hacked DIA compression of matrix in Figure~\ref{fig:dense}}
+	\label{fig:hdia}
+\end{figure} 
+
+The relevant data type is \verb|psb_T_hdia_sparse_mat|:
+\ifpdf
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+  type pm
+     real(psb_dpk_), allocatable  :: data(:,:)
+  end type pm
+
+  type po
+     integer(psb_ipk_), allocatable  :: off(:)
+  end type po
+
+  type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat
+    !
+    ! HDIA format, extended.
+    !
+    
+    type(pm), allocatable :: hdia(:)
+    type(po), allocatable :: offset(:)
+    integer(psb_ipk_) :: nblocks, nzeros
+    integer(psb_ipk_) :: hack = 64
+    integer(psb_long_int_k_) :: dim=0
+
+  contains
+   ....
+  end type
+\end{minted}
+\else
+\begin{center}
+    \begin{minipage}[tl]{0.9\textwidth}
+\begin{verbatim} 
+  type pm
+     real(psb_dpk_), allocatable  :: data(:,:)
+  end type pm
+
+  type po
+     integer(psb_ipk_), allocatable  :: off(:)
+  end type po
+
+  type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat
+    !
+    ! HDIA format, extended.
+    !
+    
+    type(pm), allocatable :: hdia(:)
+    type(po), allocatable :: offset(:)
+    integer(psb_ipk_) :: nblocks, nzeros
+    integer(psb_ipk_) :: hack = 64
+    integer(psb_long_int_k_) :: dim=0
+
+  contains
+   ....
+  end type
+\end{verbatim}
+    \end{minipage}
+  \end{center}
+\fi
+
+
--- a/docs/src/figures/dia.pdf
+++ b/docs/src/figures/dia.pdf
--- a/docs/src/figures/dia.png
+++ b/docs/src/figures/dia.png
--- a/docs/src/figures/ell.pdf
+++ b/docs/src/figures/ell.pdf
--- a/docs/src/figures/ell.png
+++ b/docs/src/figures/ell.png
--- a/docs/src/figures/hdia.pdf
+++ b/docs/src/figures/hdia.pdf
--- a/docs/src/figures/hdia.png
+++ b/docs/src/figures/hdia.png
--- a/docs/src/figures/hll.pdf
+++ b/docs/src/figures/hll.pdf
--- a/docs/src/figures/hll.png
+++ b/docs/src/figures/hll.png
--- a/docs/src/figures/mat.pdf
+++ b/docs/src/figures/mat.pdf
--- a/docs/src/figures/mat.png
+++ b/docs/src/figures/mat.png
--- a/docs/src/figures/psblaslibraryext.png
+++ b/docs/src/figures/psblaslibraryext.png
--- a/docs/src/userguide.tex
+++ b/docs/src/userguide.tex
@ -17,6 +17,7 @@
 \newtheorem{theorem}{Theorem}
 \newtheorem{corollary}{Corollary}
 \usepackage{listings}
+\usepackage{algorithm2e}
 \usepackage{minted}
 \usemintedstyle{friendly}
 \definecolor{bg}{rgb}{0.95,0.95,0.95}
@ -36,7 +37,7 @@
 \relax
 \pdfcompresslevel=0             %-- 0 = none, 9 = best
 \pdfinfo{                       %-- Info dictionary of PDF output  /Author (Alfredo Buttari)
-  /Title (Parallel Sparse BLAS V. 3.8.0)
+  /Title (Parallel Sparse BLAS V. 3.9.0)
  /Subject (Parallel Sparse Basic Linear Algebra Subroutines)
  /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners)
  /Creator (pdfLaTeX)
@ -99,7 +100,7 @@

 \begin{document}
 {
-\pdfbookmark{PSBLAS-v3.8.0 User's Guide}{title}
+\pdfbookmark{PSBLAS-v3.9.0 User's Guide}{title}
 \lstset{language=Fortran}
 \newlength{\centeroffset}
 \setlength{\centeroffset}{-0.5\oddsidemargin}
@ -109,7 +110,7 @@
 \vspace*{\stretch{1}}
 \noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth}
 \flushright
-{\Huge\bfseries PSBLAS 3.8.0 User's guide
+{\Huge\bfseries PSBLAS 3.9.0 User's guide
 }
 \noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex]
 \hfill\emph{\Large A reference guide for the Parallel Sparse BLAS library}
@ -130,7 +131,7 @@
 {\bfseries 
 by Salvatore Filippone\\
 and Alfredo Buttari}\\ 
-May 1st, 2022
+Aug 1st, 2024
 \end{minipage}}
 }
 %\addtolength{\textwidth}{\centeroffset}
@ -159,7 +160,8 @@ May 1st, 2022
 \include{util}
 \include{precs}
 \include{methods}
-
+\include{ext-intro}
+\include{cuda}
 \cleardoublepage
 \input{biblio}

--- a/docs/src/userhtml.tex
+++ b/docs/src/userhtml.tex
@ -17,6 +17,7 @@
 \newtheorem{theorem}{Theorem}
 \newtheorem{corollary}{Corollary}
 \usepackage{listings}
+\usepackage{algorithm2e}
 \usepackage{microtype}
 \ifpdf
 \newmintinline[fortinline]{fortran}{}
@ -94,9 +95,9 @@
   Alfredo Buttari } \\
 %\\[10ex]
 %\today
-Software version: 3.8.0\\
+Software version: 3.9.0\\
 %\today
-May 1st, 2022
+Aug 1st, 2024
 \cleardoublepage
 \begingroup
  \renewcommand*{\thepage}{toc}
@ -120,7 +121,8 @@ May 1st, 2022
 \include{util}
 \include{precs}
 \include{methods}
-
+\include{ext-intro}
+\include{cuda}
 \cleardoublepage

 \input{biblio}
--- a/rsb/Makefile
+++ b/rsb/Makefile
@ -0,0 +1,53 @@
+include ../Make.inc
+#
+# Libraries used
+#
+PSBLIBDIR=$(PSBLASDIR)/lib/
+PSBINCDIR=$(PSBLASDIR)/include
+PSBMODDIR=$(PSBLASDIR)/modules
+LIBDIR=../lib
+INCDIR=../include
+MODDIR=../modules
+PSBLAS_LIB= -L$(PSBLIBDIR)  -lpsb_util  -lpsb_base
+#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base 
+LDLIBS=$(PSBLDLIBS)
+#
+# Compilers and such
+#
+#CCOPT= -g
+FINCLUDES=$(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). $(LIBRSB_INCLUDES) $(LIBRSB_DEFINES)
+CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR)
+LIBNAME=libpsb_rsb.a
+
+
+FOBJS=  rsb_mod.o psb_d_rsb_mat_mod.o \
+	psb_rsb_penv_mod.o psb_rsb_mod.o
+
+COBJS= rsb_int.o
+
+OBJS=$(COBJS) $(FOBJS)
+
+lib: objs ilib 
+	/bin/cp -p $(LIBNAME) $(LIBDIR)
+
+objs: $(OBJS) iobjs
+	/bin/cp -p *$(.mod) $(MODDIR)
+
+iobjs:
+	$(MAKE) -C impl objs
+ilib: iobjs
+	$(MAKE) -C impl lib LIBNAME=$(LIBNAME)
+
+clean: cclean iclean
+	/bin/rm -f  $(FOBJS) *$(.mod) *.a
+
+cclean: 
+	/bin/rm -f  $(COBJS) 
+iclean:
+	cd impl && $(MAKE) clean
+
+verycleanlib: 
+	(cd ../..; make veryclean)
+
+
+
--- a/rsb/impl/Makefile
+++ b/rsb/impl/Makefile
@ -0,0 +1,30 @@
+include ../../Make.inc
+PSBLIBDIR=$(PSBLASDIR)/lib/
+PSBINCDIR=$(PSBLASDIR)/include
+PSBMODDIR=$(PSBLASDIR)/modules
+LIBDIR=../../lib
+INCDIR=../../include
+MODDIR=../../modules
+PSBLAS_LIB= -L$(PSBLIBDIR)  -lpsb_util  -lpsb_base
+#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base 
+LDLIBS=$(PSBLDLIBS)
+#
+# Compilers and such
+#
+#CCOPT= -g
+FINCLUDES=$(FMFLAG).. $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_INCLUDES) $(FIFLAG).. $(LIBRSB_DEFINES)
+CINCLUDES=
+LIBNAME=libpsb_rsb.a
+
+OBJS= \
+psb_d_cp_rsb_from_coo.o \
+psb_d_mv_rsb_from_coo.o \
+psb_d_cp_rsb_to_coo.o psb_d_rsb_csmv.o
+
+objs: $(OBJS)
+
+lib: objs
+	ar cur ../$(LIBNAME) $(OBJS)
+
+clean:
+	/bin/rm -f $(OBJS)
--- a/rsb/impl/psb_d_cp_rsb_from_coo.F90
+++ b/rsb/impl/psb_d_cp_rsb_from_coo.F90
@ -0,0 +1,78 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+subroutine psb_d_cp_rsb_from_coo(a,b,info) 
+  
+  use psb_base_mod
+  use rsb_mod
+  use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_from_coo
+  implicit none 
+
+  class(psb_d_rsb_sparse_mat), intent(inout) :: a
+  class(psb_d_coo_sparse_mat), intent(in)    :: b
+  integer(psb_ipk_), intent(out)             :: info
+
+  !locals
+  type(psb_d_coo_sparse_mat) :: tmp
+  Integer(Psb_ipk_)            :: nza, nr, i,j,irw, idl,err_act, nc
+  integer(psb_ipk_)            :: nzm, ir, ic, k ,bs
+  integer(psb_ipk_)            :: debug_level, debug_unit
+  character(len=20)            :: name
+
+  info = psb_success_
+#ifdef HAVE_RSB
+  ! This is to have fix_coo called behind the scenes
+  call b%cp_to_coo(tmp,info)
+
+  call tmp%fix(info)
+  if (info /= psb_success_) return
+
+  nr  = tmp%get_nrows()
+  nc  = tmp%get_ncols()
+  nza = tmp%get_nzeros()
+  ! If it is sorted then we can lessen memory impact 
+  a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat
+
+  bs = 1!RSB_DEFAULT_BLOCKING
+
+  info = Rsb_from_coo(a%rsbMat,b%val,b%ia,b%ja,nza,nr,nc,bs,bs)
+
+  call tmp%free()
+#endif
+
+  return
+
+9999 continue
+  info = psb_err_alloc_dealloc_
+  return
+
+end subroutine psb_d_cp_rsb_from_coo
--- a/rsb/impl/psb_d_cp_rsb_to_coo.f90
+++ b/rsb/impl/psb_d_cp_rsb_to_coo.f90
@ -0,0 +1,77 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+subroutine psb_d_cp_rsb_to_coo(a,b,info) 
+  
+  use psb_base_mod
+  use rsb
+  use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_to_coo
+  implicit none 
+
+  class(psb_d_rsb_sparse_mat), intent(in)    :: a
+  class(psb_d_coo_sparse_mat), intent(inout) :: b
+  integer(psb_ipk_), intent(out)             :: info
+  real(psb_dpk_), pointer :: val_point(:)
+  type(c_ptr) :: t_p,s_p
+
+  !locals
+  integer(psb_ipk_)   :: i, j, k,nr,nza,nc
+
+  info = psb_success_
+
+  nr  = a%get_nrows()
+  nc  = a%get_ncols()
+  nza = a%get_nzeros()
+
+  call b%allocate(nr,nc,nza)
+  b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
+
+  allocate(val_point(nza))
+
+  t_p = c_loc(val_point(1))
+
+  info = rsb_mtx_get_coo(a%rsbMat, t_p, b%ia, b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE)
+
+  !info = rsb_mtx_switch_to_coo(a%rsbMat,t_p,b%ia,b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE)
+
+  k = rsb_perror(s_p,info)
+
+  do i=1,nza
+     b%val(i)=val_point(i)
+  enddo
+
+  deallocate(val_point)
+
+  call b%set_nzeros(nza)
+  call b%fix(info)
+
+end subroutine psb_d_cp_rsb_to_coo
--- a/rsb/impl/psb_d_mv_rsb_from_coo.f90
+++ b/rsb/impl/psb_d_mv_rsb_from_coo.f90
@ -0,0 +1,114 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+subroutine psb_d_mv_rsb_from_coo(a,b,info) 
+  
+  use psb_base_mod
+  use psb_d_rsb_mat_mod, psb_protect_name => psb_d_mv_rsb_from_coo
+  implicit none 
+
+  class(psb_d_rsb_sparse_mat), intent(inout) :: a
+  class(psb_d_coo_sparse_mat), intent(inout) :: b
+  integer(psb_ipk_), intent(out)             :: info
+
+  !locals
+  Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic
+
+  info = psb_success_
+
+  call b%fix(info)
+  if (info /= psb_success_) return
+
+  nr  = b%get_nrows()
+  nc  = b%get_ncols()
+  nza = b%get_nzeros()
+  ! if (b%is_sorted()) then 
+  !   ! If it is sorted then we can lessen memory impact 
+  !   a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
+
+  !   ! First compute the number of nonzeros in each row.
+  !   call psb_realloc(nr,a%irn,info) 
+  !   if (info /= 0) goto 9999
+  !   a%irn = 0
+  !   do i=1, nza
+  !     a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1
+  !   end do
+  !   nzm = 0 
+  !   do i=1, nr
+  !     nzm = max(nzm,a%irn(i))
+  !     a%irn(i) = 0
+  !   end do
+  !   ! Second: copy the column indices.
+  !   call psb_realloc(nr,a%idiag,info) 
+  !   if (info == 0) call psb_realloc(nr,nzm,a%ja,info) 
+  !   if (info /= 0) goto 9999
+  !   do i=1, nza
+  !     ir = b%ia(i)
+  !     ic = b%ja(i)
+  !     j  = a%irn(ir) + 1 
+  !     a%ja(ir,j) = ic
+  !     a%irn(ir)  = j
+  !   end do
+  !   ! Third copy the other stuff
+  !   deallocate(b%ia,b%ja,stat=info) 
+  !   if (info == 0) call psb_realloc(nr,a%idiag,info)
+  !   if (info == 0) call psb_realloc(nr,nzm,a%val,info)
+  !   if (info /= 0) goto 9999
+  !   k = 0 
+  !   do i=1, nr
+  !     a%idiag(i) = 0 
+  !     do j=1, a%irn(i)
+  !       k = k + 1 
+  !       a%val(i,j) = b%val(k)
+  !       if (i==a%ja(i,j)) a%idiag(i)=j
+  !     end do
+  !     do j=a%irn(i)+1, nzm
+  !       a%ja(i,j) = i
+  !       a%val(i,j) = dzero
+  !     end do
+  !   end do
+
+  ! else 
+    ! If b is not sorted, the only way is to copy. 
+    call a%cp_from_coo(b,info)
+    if (info /= 0) goto 9999
+  ! end if
+
+  call b%free()
+
+  return
+
+9999 continue
+  info = psb_err_alloc_dealloc_
+  return
+
+end subroutine psb_d_mv_rsb_from_coo
--- a/rsb/impl/psb_d_rsb_csmv.F90
+++ b/rsb/impl/psb_d_rsb_csmv.F90
@ -0,0 +1,138 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans) 
+  
+  use psb_base_mod
+  use rsb_mod
+  use psb_d_rsb_mat_mod, psb_protect_name => psb_d_rsb_csmv
+  implicit none 
+  class(psb_d_rsb_sparse_mat), intent(in) :: a
+  real(psb_dpk_), intent(in)          :: alpha, beta, x(:)
+  real(psb_dpk_), intent(inout)       :: y(:)
+  integer, intent(out)                :: info
+  character, optional, intent(in)     :: trans
+
+  character :: trans_
+  integer   :: i,j,k,m,n, nnz, ir, jc
+  real(psb_dpk_)  :: acc
+  type(c_ptr)    :: gpX, gpY
+  logical        :: tra
+  Integer        :: err_act
+  character(len=20)  :: name='d_rsb_csmv'
+  logical, parameter :: debug=.false.
+
+  call psb_erractionsave(err_act)
+  info = psb_success_
+#ifdef HAVE_RSB
+  if (present(trans)) then
+    trans_ = trans
+  else
+    trans_ = 'N'
+  end if
+
+  if (.not.a%is_asb()) then 
+    info = psb_err_invalid_mat_state_
+    call psb_errpush(info,name)
+    goto 9999
+  endif
+
+
+  tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
+
+  if (tra) then 
+    m = a%get_ncols()
+    n = a%get_nrows()
+  else
+    n = a%get_ncols()
+    m = a%get_nrows()
+  end if
+
+  if (size(x,1)<n) then 
+    info = 36
+    call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
+    goto 9999
+  end if
+
+  if (size(y,1)<m) then 
+    info = 36
+    call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
+    goto 9999
+  end if
+
+  ! if (tra) then 
+  !   call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) 
+  ! else
+
+  info = Rsb_spmv(a%rsbMat,x,alpha,y,beta,trans_)
+
+  if (info /= 0) goto 9999
+ 
+    ! if (info == 0) &
+    !      & info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_double)
+    ! if (alpha /= dzero) then 
+    !   if (info == 0) &
+    !        & info = writeMultiVecDevice(gpX,x)
+    ! end if
+    ! if (info == 0) &
+    !      & info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_double)
+    ! if (beta /= dzero) then 
+    !   if (info == 0) &
+    !        & info = writeMultiVecDevice(gpY,y)
+    ! end if
+    ! if (info == 0)  &
+    !      & info = spmvHllDevice(a%deviceMat,alpha,gpX,beta,gpY)
+    ! if (info == 0) &
+    !      & info = readMultiVecDevice(gpY,y)
+    ! if (info /= 0) goto 9999
+    ! call freeMultiVecDevice(gpX)
+    ! call freeMultiVecDevice(gpY)
+  ! endif
+! #else
+!   call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans)
+!    write(*,*) y
+#endif
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 continue
+  call psb_erractionrestore(err_act)
+
+  if (err_act == psb_act_abort_) then
+    call psb_error()
+    return
+  end if
+  return
+
+
+end subroutine psb_d_rsb_csmv
--- a/rsb/psb_d_rsb_mat_mod.f90
+++ b/rsb/psb_d_rsb_mat_mod.f90
@ -0,0 +1,487 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+module psb_d_rsb_mat_mod
+
+  use psb_d_base_mat_mod
+  use iso_c_binding
+
+  type, extends(psb_d_base_sparse_mat) :: psb_d_rsb_sparse_mat
+     
+     type(c_ptr) :: rsbMat = c_null_ptr
+
+  contains
+ !   procedure, pass(a) :: get_size     => d_rsb_get_size
+    procedure, pass(a) :: get_nzeros   => d_rsb_get_nzeros
+    procedure, nopass  :: get_fmt      => d_rsb_get_fmt
+    procedure, pass(a) :: sizeof       => d_rsb_sizeof
+ !   procedure, pass(a) :: csmm         => psb_d_rsb_csmm
+    procedure, pass(a) :: csmv         => psb_d_rsb_csmv
+    ! procedure, pass(a) :: inner_cssm   => psb_d_rsb_cssm
+    ! procedure, pass(a) :: inner_cssv   => psb_d_rsb_cssv
+    ! procedure, pass(a) :: scals        => psb_d_rsb_scals
+    ! procedure, pass(a) :: scalv        => psb_d_rsb_scal
+    ! procedure, pass(a) :: maxval       => psb_d_rsb_maxval
+    ! procedure, pass(a) :: csnmi        => psb_d_rsb_csnmi
+    ! procedure, pass(a) :: csnm1        => psb_d_rsb_csnm1
+    ! procedure, pass(a) :: rowsum       => psb_d_rsb_rowsum
+    ! procedure, pass(a) :: arwsum       => psb_d_rsb_arwsum
+    ! procedure, pass(a) :: colsum       => psb_d_rsb_colsum
+    ! procedure, pass(a) :: aclsum       => psb_d_rsb_aclsum
+    ! procedure, pass(a) :: reallocate_nz => psb_d_rsb_reallocate_nz
+    ! procedure, pass(a) :: allocate_mnnz => psb_d_rsb_allocate_mnnz
+    procedure, pass(a) :: cp_to_coo    => psb_d_cp_rsb_to_coo
+    procedure, pass(a) :: cp_from_coo  => psb_d_cp_rsb_from_coo
+    ! procedure, pass(a) :: cp_to_fmt    => psb_d_cp_rsb_to_fmt
+    ! procedure, pass(a) :: cp_from_fmt  => psb_d_cp_rsb_from_fmt
+!    procedure, pass(a) :: mv_to_coo    => psb_d_mv_rsb_to_coo
+    procedure, pass(a) :: mv_from_coo  => psb_d_mv_rsb_from_coo
+    ! procedure, pass(a) :: mv_to_fmt    => psb_d_mv_rsb_to_fmt
+    ! procedure, pass(a) :: mv_from_fmt  => psb_d_mv_rsb_from_fmt
+    ! procedure, pass(a) :: csput        => psb_d_rsb_csput
+    ! procedure, pass(a) :: get_diag     => psb_d_rsb_get_diag
+    ! procedure, pass(a) :: csgetptn     => psb_d_rsb_csgetptn
+    ! procedure, pass(a) :: csgetrow     => psb_d_rsb_csgetrow
+    ! procedure, pass(a) :: get_nz_row   => d_rsb_get_nz_row
+    ! procedure, pass(a) :: reinit       => psb_d_rsb_reinit
+    ! procedure, pass(a) :: trim         => psb_d_rsb_trim
+    ! procedure, pass(a) :: print        => psb_d_rsb_print
+    procedure, pass(a) :: free         => d_rsb_free
+    ! procedure, pass(a) :: mold         => psb_d_rsb_mold
+
+  end type psb_d_rsb_sparse_mat
+
+  private :: d_rsb_get_nzeros, d_rsb_free,  d_rsb_get_fmt, &
+       & d_rsb_get_size, d_rsb_sizeof, d_rsb_get_nz_row
+
+  interface
+    subroutine  psb_d_rsb_reallocate_nz(nz,a) 
+      import :: psb_d_rsb_sparse_mat, psb_ipk_
+      integer(psb_ipk_), intent(in) :: nz
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+    end subroutine psb_d_rsb_reallocate_nz
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_reinit(a,clear)
+      import :: psb_d_rsb_sparse_mat
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a   
+      logical, intent(in), optional :: clear
+    end subroutine psb_d_rsb_reinit
+  end interface
+  
+  interface
+    subroutine  psb_d_rsb_trim(a)
+      import :: psb_d_rsb_sparse_mat
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+    end subroutine psb_d_rsb_trim
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_mold(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)                  :: a
+      class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+      integer(psb_ipk_), intent(out)                           :: info
+    end subroutine psb_d_rsb_mold
+  end interface
+
+  interface
+    subroutine  psb_d_rsb_allocate_mnnz(m,n,a,nz) 
+      import :: psb_d_rsb_sparse_mat, psb_ipk_
+      integer(psb_ipk_), intent(in) :: m,n
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      integer(psb_ipk_), intent(in), optional :: nz
+    end subroutine psb_d_rsb_allocate_mnnz
+  end interface
+  
+  interface
+    subroutine psb_d_rsb_print(iout,a,iv,head,ivr,ivc)
+      import :: psb_d_rsb_sparse_mat, psb_ipk_
+      integer(psb_ipk_), intent(in)           :: iout
+      class(psb_d_rsb_sparse_mat), intent(in) :: a   
+      integer(psb_ipk_), intent(in), optional :: iv(:)
+      character(len=*), optional              :: head
+      integer(psb_ipk_), intent(in), optional :: ivr(:), ivc(:)
+    end subroutine psb_d_rsb_print
+  end interface
+  
+  interface 
+    subroutine psb_d_cp_rsb_to_coo(a,b,info) 
+      import :: psb_d_coo_sparse_mat, psb_d_rsb_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)    :: a
+      class(psb_d_coo_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)             :: info
+    end subroutine psb_d_cp_rsb_to_coo
+  end interface
+  
+  interface 
+    subroutine psb_d_cp_rsb_from_coo(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      class(psb_d_coo_sparse_mat), intent(in)    :: b
+      integer(psb_ipk_), intent(out)             :: info
+    end subroutine psb_d_cp_rsb_from_coo
+  end interface
+  
+  interface 
+    subroutine psb_d_cp_rsb_to_fmt(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)     :: a
+      class(psb_d_base_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_d_cp_rsb_to_fmt
+  end interface
+  
+  interface 
+    subroutine psb_d_cp_rsb_from_fmt(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      class(psb_d_base_sparse_mat), intent(in)   :: b
+      integer(psb_ipk_), intent(out)             :: info
+    end subroutine psb_d_cp_rsb_from_fmt
+  end interface
+  
+  interface 
+    subroutine psb_d_mv_rsb_to_coo(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      class(psb_d_coo_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)             :: info
+    end subroutine psb_d_mv_rsb_to_coo
+  end interface
+  
+  interface 
+    subroutine psb_d_mv_rsb_from_coo(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      class(psb_d_coo_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)             :: info
+    end subroutine psb_d_mv_rsb_from_coo
+  end interface
+  
+  interface 
+    subroutine psb_d_mv_rsb_to_fmt(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout)  :: a
+      class(psb_d_base_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_d_mv_rsb_to_fmt
+  end interface
+  
+  interface 
+    subroutine psb_d_mv_rsb_from_fmt(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout)  :: a
+      class(psb_d_base_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_d_mv_rsb_from_fmt
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_csput(nz,ia,ja,val,a,imin,imax,jmin,jmax,info,gtl) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      real(psb_dpk_), intent(in)      :: val(:)
+      integer(psb_ipk_), intent(in)             :: nz,ia(:), ja(:),&
+           &  imin,imax,jmin,jmax
+      integer(psb_ipk_), intent(out)            :: info
+      integer(psb_ipk_), intent(in), optional   :: gtl(:)
+    end subroutine psb_d_rsb_csput
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_csgetptn(imin,imax,a,nz,ia,ja,info,&
+         & jmin,jmax,iren,append,nzin,rscale,cscale)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)        :: a
+      integer(psb_ipk_), intent(in)                  :: imin,imax
+      integer(psb_ipk_), intent(out)                 :: nz
+      integer(psb_ipk_), allocatable, intent(inout)  :: ia(:), ja(:)
+      integer(psb_ipk_),intent(out)                  :: info
+      logical, intent(in), optional        :: append
+      integer(psb_ipk_), intent(in), optional        :: iren(:)
+      integer(psb_ipk_), intent(in), optional        :: jmin,jmax, nzin
+      logical, intent(in), optional        :: rscale,cscale
+    end subroutine psb_d_rsb_csgetptn
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
+         & jmin,jmax,iren,append,nzin,rscale,cscale)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)        :: a
+      integer(psb_ipk_), intent(in)                  :: imin,imax
+      integer(psb_ipk_), intent(out)                 :: nz
+      integer(psb_ipk_), allocatable, intent(inout)  :: ia(:), ja(:)
+      real(psb_dpk_), allocatable,  intent(inout)    :: val(:)
+      integer(psb_ipk_),intent(out)                  :: info
+      logical, intent(in), optional        :: append
+      integer(psb_ipk_), intent(in), optional        :: iren(:)
+      integer(psb_ipk_), intent(in), optional        :: jmin,jmax, nzin
+      logical, intent(in), optional        :: rscale,cscale
+    end subroutine psb_d_rsb_csgetrow
+  end interface
+
+  interface 
+    subroutine psb_d_rsb_csgetblk(imin,imax,a,b,info,&
+       & jmin,jmax,iren,append,rscale,cscale)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)    :: a
+      class(psb_d_coo_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(in)              :: imin,imax
+      integer(psb_ipk_),intent(out)              :: info
+      logical, intent(in), optional        :: append
+      integer(psb_ipk_), intent(in), optional    :: iren(:)
+      integer(psb_ipk_), intent(in), optional    :: jmin,jmax
+      logical, intent(in), optional        :: rscale,cscale
+    end subroutine psb_d_rsb_csgetblk
+  end interface
+    
+  interface 
+    subroutine psb_d_rsb_cssv(alpha,a,x,beta,y,info,trans) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(in)          :: alpha, beta, x(:)
+      real(psb_dpk_), intent(inout)       :: y(:)
+      integer(psb_ipk_), intent(out)           :: info
+      character, optional, intent(in)          :: trans
+    end subroutine psb_d_rsb_cssv
+    subroutine psb_d_rsb_cssm(alpha,a,x,beta,y,info,trans) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(in)          :: alpha, beta, x(:,:)
+      real(psb_dpk_), intent(inout)       :: y(:,:)
+      integer(psb_ipk_), intent(out)       :: info
+      character, optional, intent(in)      :: trans
+    end subroutine psb_d_rsb_cssm
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(in)          :: alpha, beta, x(:)
+      real(psb_dpk_), intent(inout)       :: y(:)
+      integer(psb_ipk_), intent(out)       :: info
+      character, optional, intent(in)      :: trans
+    end subroutine psb_d_rsb_csmv
+    subroutine psb_d_rsb_csmm(alpha,a,x,beta,y,info,trans) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(in)          :: alpha, beta, x(:,:)
+      real(psb_dpk_), intent(inout)       :: y(:,:)
+      integer(psb_ipk_), intent(out)       :: info
+      character, optional, intent(in)      :: trans
+    end subroutine psb_d_rsb_csmm
+  end interface
+  
+  
+  interface 
+    function psb_d_rsb_maxval(a) result(res)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_)         :: res
+    end function psb_d_rsb_maxval
+  end interface
+  
+  interface 
+    function psb_d_rsb_csnmi(a) result(res)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_)         :: res
+    end function psb_d_rsb_csnmi
+  end interface
+  
+  interface 
+    function psb_d_rsb_csnm1(a) result(res)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_)         :: res
+    end function psb_d_rsb_csnm1
+  end interface
+
+  interface 
+    subroutine psb_d_rsb_rowsum(d,a) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(out)              :: d(:)
+    end subroutine psb_d_rsb_rowsum
+  end interface
+
+  interface 
+    subroutine psb_d_rsb_arwsum(d,a) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(out)              :: d(:)
+    end subroutine psb_d_rsb_arwsum
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_colsum(d,a) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(out)              :: d(:)
+    end subroutine psb_d_rsb_colsum
+  end interface
+
+  interface 
+    subroutine psb_d_rsb_aclsum(d,a) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(out)              :: d(:)
+    end subroutine psb_d_rsb_aclsum
+  end interface
+    
+  interface 
+    subroutine psb_d_rsb_get_diag(a,d,info) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(out)     :: d(:)
+      integer(psb_ipk_), intent(out)   :: info
+    end subroutine psb_d_rsb_get_diag
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_scal(d,a,info,side) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      real(psb_dpk_), intent(in)      :: d(:)
+      integer(psb_ipk_), intent(out)   :: info
+      character, intent(in), optional  :: side
+    end subroutine psb_d_rsb_scal
+  end interface
+  
+  interface
+    subroutine psb_d_rsb_scals(d,a,info) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      real(psb_dpk_), intent(in)      :: d
+      integer(psb_ipk_), intent(out)   :: info
+    end subroutine psb_d_rsb_scals
+  end interface
+  
+
+
+contains 
+
+  ! == ===================================
+  !
+  !
+  !
+  ! Getters 
+  !
+  !
+  !
+  !
+  !
+  ! == ===================================
+
+  
+  function d_rsb_sizeof(a) result(res)
+    implicit none 
+    class(psb_d_rsb_sparse_mat), intent(in) :: a
+    integer(psb_epk_) :: res
+
+    
+
+      
+  end function d_rsb_sizeof
+
+  function d_rsb_get_fmt() result(res)
+    implicit none 
+    character(len=5) :: res
+    res = 'RSB'
+  end function d_rsb_get_fmt
+  
+  function d_rsb_get_nzeros(a) result(res)
+    use rsb_mod
+    implicit none
+    class(psb_d_rsb_sparse_mat), intent(in) :: a
+    integer(psb_ipk_) :: res
+
+    res = Rsb_get_nzeros(a%rsbMat)
+    
+  end function d_rsb_get_nzeros
+
+  function d_rsb_get_size(a) result(res)
+    implicit none 
+    class(psb_d_rsb_sparse_mat), intent(in) :: a
+    integer(psb_ipk_) :: res
+
+  end function d_rsb_get_size
+
+
+  function  d_rsb_get_nz_row(idx,a) result(res)
+
+    implicit none
+    
+    class(psb_d_rsb_sparse_mat), intent(in) :: a
+    integer(psb_ipk_), intent(in)           :: idx
+    integer(psb_ipk_)                       :: res
+    
+    res = 0 
+ 
+    
+  end function d_rsb_get_nz_row
+
+
+
+  ! == ===================================
+  !
+  !
+  !
+  ! Data management
+  !
+  !
+  !
+  !
+  !
+  ! == ===================================  
+
+  subroutine  d_rsb_free(a)
+    use rsb_mod
+    implicit none 
+
+    class(psb_d_rsb_sparse_mat), intent(inout) :: a
+
+    call freeRsbMat(a%rsbMat)
+    
+    call a%set_null()
+    call a%set_nrows(0)
+    call a%set_ncols(0)
+    
+    return
+
+  end subroutine d_rsb_free
+
+
+end module psb_d_rsb_mat_mod
--- a/rsb/psb_rsb_mod.F90
+++ b/rsb/psb_rsb_mod.F90
@ -0,0 +1,50 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+module psb_rsb_mod
+  use psb_const_mod
+  use rsb_mod
+  use psb_rsb_penv_mod
+  ! use psb_d_ell_mat_mod
+  ! use psb_s_ell_mat_mod
+  ! use psb_z_ell_mat_mod
+  ! use psb_c_ell_mat_mod
+
+  ! use psb_s_hll_mat_mod
+  ! use psb_d_hll_mat_mod
+  ! use psb_c_hll_mat_mod
+  ! use psb_z_hll_mat_mod
+  
+  ! use psb_d_dia_mat_mod
+  ! use psb_d_hdia_mat_mod
+  use psb_d_rsb_mat_mod
+end module psb_rsb_mod
--- a/rsb/psb_rsb_penv_mod.F90
+++ b/rsb/psb_rsb_penv_mod.F90
@ -0,0 +1,99 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+module psb_rsb_penv_mod
+  use psb_const_mod
+  use psb_penv_mod
+  !use psi_comm_buffers_mod, only : psb_buffer_queue
+  use iso_c_binding
+
+!  interface psb_rsb_init
+!    module procedure  psb_rsb_init
+!  end interface
+#if defined(HAVE_RSB)
+  interface 
+    function psb_C_rsb_init() &
+         & result(res) bind(c,name='rsbInit')
+      use iso_c_binding
+      integer(c_int)		:: res
+    end function psb_C_rsb_init
+ end interface
+ 
+  interface 
+     function psb_C_rsb_exit() &
+         & result(res) bind(c,name='rsbExit')
+       use iso_c_binding
+       integer(c_int)		:: res
+     end function psb_C_rsb_exit
+  end interface
+
+#endif
+
+contains
+  ! !!!!!!!!!!!!!!!!!!!!!!
+  !
+  ! Environment handling 
+  !
+  ! !!!!!!!!!!!!!!!!!!!!!!
+
+
+  subroutine psb_rsb_init()
+    use psb_penv_mod
+    use psb_const_mod
+    use psb_error_mod
+    ! type(psb_ctxt_type), intent(in) :: ctxt
+    ! integer, intent(in), optional :: dev
+
+    integer :: info
+
+#if defined (HAVE_RSB)
+    info = psb_C_rsb_init()
+    if (info/=0) write(*,*) 'error during rsb_init'
+#endif
+  end subroutine psb_rsb_init
+
+  subroutine psb_rsb_exit()
+    use psb_penv_mod
+    use psb_const_mod
+    use psb_error_mod
+    ! type(psb_ctxt_type), intent(in) :: ctxt
+    ! integer, intent(in), optional :: dev
+
+    integer :: info
+
+#if defined (HAVE_RSB)
+    info = psb_C_rsb_exit()
+    if (info/=0) write(*,*) 'error during rsb_exit'
+#endif
+  end subroutine psb_rsb_exit
+
+end module psb_rsb_penv_mod
--- a/rsb/rsb_int.c
+++ b/rsb/rsb_int.c
@ -0,0 +1,110 @@
+  /*             Parallel Sparse BLAS   GPU plugin  */
+  /*   (C) Copyright 2013 */
+
+  /*                      Salvatore Filippone */
+  /*                      Alessandro Fanfarillo */
+ 
+  /* Redistribution and use in source and binary forms, with or without */
+  /* modification, are permitted provided that the following conditions */
+  /* are met: */
+  /*   1. Redistributions of source code must retain the above copyright */
+  /*      notice, this list of conditions and the following disclaimer. */
+  /*   2. Redistributions in binary form must reproduce the above copyright */
+  /*      notice, this list of conditions, and the following disclaimer in the */
+  /*      documentation and/or other materials provided with the distribution. */
+  /*   3. The name of the PSBLAS group or the names of its contributors may */
+  /*      not be used to endorse or promote products derived from this */
+  /*      software without specific written permission. */
+ 
+  /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
+  /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
+  /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
+  /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
+  /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
+  /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
+  /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
+  /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
+  /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
+  /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
+  /* POSSIBILITY OF SUCH DAMAGE. */
+ 
+#include <sys/time.h>
+#if defined(HAVE_RSB)
+#include "rsb.h"
+#include "rsb_int.h"
+
+int rsbInit()
+{
+  rsb_err_t errval = RSB_ERR_NO_ERROR;
+
+  if((errval = rsb_lib_init(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR)
+    {
+      printf("Error initializing the library!\n");
+      return 1;
+    }
+  
+  return 0;
+}
+
+int rsbExit()
+{
+  rsb_err_t errval = RSB_ERR_NO_ERROR;
+
+  if((errval = rsb_lib_exit(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR)
+    {
+      printf("Error finalizing the library!\n");
+      return 1;
+    }
+  
+  return 0;
+}
+
+int Rsb_double_from_coo(void **rsbMat, double *va, int *ia,int *ja,int nnz,int nr,
+			int nc, int br, int bc)
+{
+  int i=0;
+  rsb_err_t errval = RSB_ERR_NO_ERROR;
+
+  *rsbMat = rsb_mtx_alloc_from_coo_const(va,ia,ja,nnz,RSB_NUMERICAL_TYPE_DOUBLE,nr,nc,br,bc,RSB_FLAG_FORTRAN_INDICES_INTERFACE,&errval);
+
+  if((!*rsbMat) || (errval != RSB_ERR_NO_ERROR))
+    {
+      printf("Error while allocating the matrix!\n");
+      return 1;
+    }
+  return 0;
+}
+
+//X is the input and y is the output
+int Rsb_double_spmv(void *rsbMat, double *x, double alfa, double *y, double beta,char trans)
+{
+  rsb_err_t errval = RSB_ERR_NO_ERROR;
+
+  if(trans=='N')
+    errval = rsb_spmv(RSB_TRANSPOSITION_N,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1);
+  else
+    errval = rsb_spmv(RSB_TRANSPOSITION_T,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1);
+  
+  if(errval != RSB_ERR_NO_ERROR)
+    {
+      printf("Error performing a multiplication!\n");
+      return 1;
+    }
+  
+  return 0;
+}
+
+//Should it return a long instead of integer?
+int Rsb_getNZeros(void *rsbMat)
+{
+  int res = 0;
+  rsb_mtx_get_info((struct rsb_mtx_t *)rsbMat,RSB_MIF_MATRIX_NNZ__TO__RSB_NNZ_INDEX_T,(void *)&res);
+  return res;
+}
+
+void freeRsbMat(void *rsbMat)
+{
+  rsb_mtx_free(rsbMat);
+}
+
+#endif
--- a/rsb/rsb_int.h
+++ b/rsb/rsb_int.h
@ -0,0 +1,2 @@
+int Rsb_double_from_coo(void **rsbMat,double *va, int *ia,int *ja,int nnz,int nr,
+			int nc, int br, int bc);
--- a/rsb/rsb_mod.F90
+++ b/rsb/rsb_mod.F90
@ -0,0 +1,235 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+module rsb_mod
+  use rsb
+  use iso_c_binding 
+
+#ifdef HAVE_RSB
+
+ interface Rsb_from_coo
+    function Rsb_double_from_coo(rsbMat,va,ia,ja,nnz,nr,nc,br,bc) &
+         & result(res) bind(c,name='Rsb_double_from_coo')
+      use iso_c_binding
+      integer(c_int) :: res
+      type(c_ptr) :: rsbMat
+      real(c_double) :: va(*)
+      integer(c_int) :: ia(*),ja(*)
+      integer(c_int),value :: nnz,nr,nc,br,bc
+    end function Rsb_double_from_coo
+ end interface Rsb_from_coo
+
+ interface
+    function Rsb_get_nzeros(rsbMat) &
+         & result(res) bind(c,name='Rsb_getNZeros')
+      use iso_c_binding
+      integer(c_int) :: res
+      type(c_ptr),value :: rsbMat
+    end function Rsb_get_nzeros
+ end interface
+
+ interface Rsb_spmv
+    function Rsb_double_spmv(rsbMat,x,alfa,y,beta,trans) &
+         & result(res) bind(c,name='Rsb_double_spmv')
+      use iso_c_binding
+      integer(c_int) :: res
+      type(c_ptr),value :: rsbMat
+      real(c_double) :: x(*),y(*)
+      real(c_double),value :: alfa,beta
+      character(c_char),value :: trans
+    end function Rsb_double_spmv
+ end interface Rsb_spmv
+
+  interface 
+    subroutine  freeRsbMat(rsbMat) &
+         & bind(c,name='freeRsbMat')
+      use iso_c_binding
+      type(c_ptr), value  :: rsbMat
+    end subroutine freeRsbMat
+  end interface
+
+  ! interface writeEllDevice
+ 
+  !   function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='writeEllDeviceFloat')
+  !     use iso_c_binding
+  !     integer(c_int)      :: res
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int), value :: ldj
+  !     real(c_float)       :: val(ldj,*)
+  !     integer(c_int)      :: ja(ldj,*),irn(*)
+  !   end function writeEllDeviceFloat
+
+  !   function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='writeEllDeviceDouble')
+  !     use iso_c_binding
+  !     integer(c_int)      :: res
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int), value :: ldj
+  !     real(c_double)      :: val(ldj,*)
+  !     integer(c_int)      :: ja(ldj,*),irn(*)
+  !   end function writeEllDeviceDouble
+
+  !   function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='writeEllDeviceFloatComplex')
+  !     use iso_c_binding
+  !     integer(c_int)           :: res
+  !     type(c_ptr), value       :: deviceMat
+  !     integer(c_int), value    :: ldj
+  !     complex(c_float_complex) :: val(ldj,*)
+  !     integer(c_int)           :: ja(ldj,*),irn(*)
+  !   end function writeEllDeviceFloatComplex
+
+  !   function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='writeEllDeviceDoubleComplex')
+  !     use iso_c_binding
+  !     integer(c_int)            :: res
+  !     type(c_ptr), value        :: deviceMat
+  !     integer(c_int), value     :: ldj
+  !     complex(c_double_complex) :: val(ldj,*)
+  !     integer(c_int)            :: ja(ldj,*),irn(*)
+  !   end function writeEllDeviceDoubleComplex
+
+  ! end interface writeEllDevice
+
+  ! interface readEllDevice 
+
+  !   function readEllDeviceFloat(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='readEllDeviceFloat')
+  !     use iso_c_binding
+  !     integer(c_int)      :: res
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int), value :: ldj
+  !     real(c_float)       :: val(ldj,*)
+  !     integer(c_int)      :: ja(ldj,*),irn(*)
+  !   end function readEllDeviceFloat
+
+  !   function readEllDeviceDouble(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='readEllDeviceDouble')
+  !     use iso_c_binding
+  !     integer(c_int)      :: res
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int), value :: ldj
+  !     real(c_double)      :: val(ldj,*)
+  !     integer(c_int)      :: ja(ldj,*),irn(*)
+  !   end function readEllDeviceDouble
+
+  !   function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='readEllDeviceFloatComplex')
+  !     use iso_c_binding
+  !     integer(c_int)           :: res
+  !     type(c_ptr), value       :: deviceMat
+  !     integer(c_int), value    :: ldj
+  !     complex(c_float_complex) :: val(ldj,*)
+  !     integer(c_int)           :: ja(ldj,*),irn(*)
+  !   end function readEllDeviceFloatComplex
+
+  !   function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='readEllDeviceDoubleComplex')
+  !     use iso_c_binding
+  !     integer(c_int)           :: res
+  !     type(c_ptr), value       :: deviceMat
+  !     integer(c_int), value    :: ldj
+  !     complex(c_double_complex) :: val(ldj,*)
+  !     integer(c_int)           :: ja(ldj,*),irn(*)
+  !   end function readEllDeviceDoubleComplex
+
+  ! end interface readEllDevice
+
+  ! interface 
+  !   subroutine resetEllTimer() bind(c,name='resetEllTimer')
+  !     use iso_c_binding
+  !   end subroutine resetEllTimer
+  ! end interface
+  ! interface 
+  !   function  getEllTimer() &
+  !        & bind(c,name='getEllTimer') result(res)
+  !     use iso_c_binding
+  !     real(c_double)      :: res
+  !   end function getEllTimer
+  ! end interface
+
+
+  ! interface 
+  !   function  getEllDevicePitch(deviceMat) &
+  !        & bind(c,name='getEllDevicePitch') result(res)
+  !     use iso_c_binding
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int)      :: res
+  !   end function getEllDevicePitch
+  ! end interface
+
+  ! interface 
+  !   function  getEllDeviceMaxRowSize(deviceMat) &
+  !        & bind(c,name='getEllDeviceMaxRowSize') result(res)
+  !     use iso_c_binding
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int)      :: res
+  !   end function getEllDeviceMaxRowSize
+  ! end interface
+
+
+  ! interface spmvEllDevice
+  !   function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) &
+  !        & result(res) bind(c,name='spmvEllDeviceFloat')
+  !     use iso_c_binding
+  !     integer(c_int)		:: res
+  !     type(c_ptr), value 	:: deviceMat, x, y
+  !     real(c_float),value     	:: alpha, beta
+  !   end function spmvEllDeviceFloat
+  !   function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) &
+  !        & result(res) bind(c,name='spmvEllDeviceDouble')
+  !     use iso_c_binding
+  !     integer(c_int)		:: res
+  !     type(c_ptr), value	:: deviceMat, x, y 
+  !     real(c_double),value     	:: alpha,  beta
+  !   end function spmvEllDeviceDouble
+  !   function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
+  !        & result(res) bind(c,name='spmvEllDeviceFloatComplex')
+  !     use iso_c_binding
+  !     integer(c_int)		     :: res
+  !     type(c_ptr), value	     :: deviceMat, x, y 
+  !     complex(c_float_complex),value :: alpha,  beta
+  !   end function spmvEllDeviceFloatComplex
+  !   function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
+  !        & result(res) bind(c,name='spmvEllDeviceDoubleComplex')
+  !     use iso_c_binding
+  !     integer(c_int)		      :: res
+  !     type(c_ptr), value	      :: deviceMat, x, y 
+  !     complex(c_double_complex),value :: alpha,  beta
+  !   end function spmvEllDeviceDoubleComplex
+  ! end interface spmvEllDevice
+    
+#endif
+
+
+end module rsb_mod