Merge branch 'repackage' into oacc_loloum

oacc_loloum
sfilippone 6 months ago
commit 99a334d93d

@ -25,7 +25,7 @@ Harwell-Boeing and MatrixMarket file formats.
DOCUMENTATION
-------------
See docs/psblas-3.8.pdf; an HTML version of the same document is
See docs/psblas-3.9.pdf; an HTML version of the same document is
available in docs/html. Please consult the sample programs, especially
test/pargen/psb_[sd]_pde[23]d.f90

@ -2018,6 +2018,75 @@ CPPFLAGS="$SAVE_CPPFLAGS";
])dnl
dnl @synopsis PAC_ARG_WITH_LIBRSB
dnl
dnl Test for --with-librsb="pathname".
dnl
dnl Defines the path to LIBRSB build dir.
dnl
dnl note: Renamed after PAC_ARG_WITH_LIBS as in the Trilinos package.
dnl
dnl Example use:
dnl
dnl PAC_ARG_WITH_LIBRSB
dnl
dnl tests for --with-librsb and pre-pends to LIBRSB_PATH
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_ARG_WITH_LIBRSB,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
AC_ARG_WITH(librsb,
AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
--with-librsb=/opt/packages/librsb]),
[pac_cv_librsb_dir=$withval],
[pac_cv_librsb_dir=''])
if test "x$pac_cv_librsb_dir" != "x"; then
LIBS="-L$pac_cv_librsb_dir $LIBS"
RSB_INCLUDES="-I$pac_cv_librsb_dir"
# CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
RSB_LIBDIR="-L$pac_cv_librsb_dir"
fi
#AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
[pac_rsb_header_ok=yes],
[pac_rsb_header_ok=no; RSB_INCLUDES=""])
if test "x$pac_rsb_header_ok" == "xyes" ; then
RSB_LIBS="-lrsb $RSB_LIBDIR"
# LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
# AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
# AC_TRY_LINK_FUNC(spgpuCreate,
# [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
# [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
# AC_MSG_RESULT($pac_gpu_lib_ok)
# if test "x$pac_cv_have_spgpu" == "xyes" ; then
# AC_MSG_NOTICE([Have found SPGPU])
RSBLIBNAME="librsb.a";
LIBRSB_DIR="$pac_cv_librsb_dir";
# SPGPU_DEFINES="-DHAVE_SPGPU";
LIBRSB_INCDIR="$LIBRSB_DIR";
LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
# CUDA_DIR="$pac_cv_cuda_dir";
LIBRSB_DEFINES="-DHAVE_RSB";
LRSB=-lpsb_rsb
# CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
# CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
fi
# fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])
dnl
dnl @synopsis PAC_CHECK_SPGPU
dnl
dnl Will try to find the spgpu library and headers.

@ -204,7 +204,7 @@ PAC_ARG_WITH_FLAGS(module-path,MODULE_PATH)
# we just gave the user the chance to append values to these variables
###############################################################################
dnl Library oriented Autotools facilities (we don't care about this for now)
@ -858,6 +858,30 @@ if test x"$pac_cv_openacc" == x"yes" ; then
#FLINK="$FLINK $FCOPENACC";
fi
###############################################################################
PAC_ARG_WITH_LIBRSB
LIBRSB_DIR="$pac_cv_librsb_dir";
AC_MSG_CHECKING([for LIBRSB install dir])
case $LIBRSB_DIR in
/*) ;;
*) dnl AC_MSG_ERROR([The LIBRSB installation dir must be an absolute pathname
dnl specified with --with-librsb=/path/to/librsb])
esac
dnl if test ! -d "$LIBRSB_DIR" ; then
dnl AC_MSG_ERROR([Could not find LIBRSB build dir $LIBRSB_DIR!])
dnl fi
pac_cv_status_file="$LIBRSB_DIR/librsb.a"
if test ! -f "$pac_cv_status_file" ; then
AC_MSG_RESULT([no])
#AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.])
else
AC_MSG_RESULT([$LIBRSB_DIR])
RSBTARGETLIB=rsbd;
RSBTARGETOBJ=rsbobj;
fi
###############################################################################
@ -964,6 +988,12 @@ AC_SUBST(CUDEFINES)
AC_SUBST(CUDAD)
AC_SUBST(CUDALD)
AC_SUBST(LCUDA)
AC_SUBST(LIBRSB_LIBS)
AC_SUBST(LIBRSB_INCLUDES)
AC_SUBST(LIBRSB_INCDIR)
AC_SUBST(LIBRSB_DIR)
AC_SUBST(LIBRSB_DEFINES)
AC_SUBST(LRSB)
###############################################################################
# the following files will be created by Automake

@ -1,8 +1,6 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#include "psi_cuda_common.cuh"
@ -62,11 +60,9 @@ __global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws,
ir += ldv;
}
idiag[i]=idval;
}
void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, int i, int nr, int nza,
int baseIdx, int hacksz, int ldv, int nzm,
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
@ -76,8 +72,10 @@ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, i
dim3 grid ((nrws + THREAD_BLOCK - 1) / THREAD_BLOCK);
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)
<<< grid, block, 0, handle->currentStream >>>(i,nrws, nr, nza, baseIdx, hacksz, ldv, nzm,
rS,devIdisp,devJa,devVal,idiag, rP,cM);
<<< grid, block, 0, handle->currentStream >>>(i,nrws, nr, nza, baseIdx,
hacksz, ldv, nzm,
rS,devIdisp,devJa,devVal,
idiag, rP,cM);
}
@ -89,16 +87,17 @@ GEN_PSI_FUNC_NAME(TYPE_SYMBOL)
(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm,
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
int *idiag, int *rP, VALUE_TYPE *cM)
{ int i,j, nrws;
{ int i, nrws;
//int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX;
int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX);
//fprintf(stderr,"Loop on j: %d\n",j);
for (i=0; i<nr; i+=nrws) {
nrws = MIN(maxNForACall, nr - i);
//fprintf(stderr,"ifirst: %d i : %d nrws: %d i + ifirst + (nrws -1) -1 %d \n",ifirst,i,nrws,i + ifirst + (nrws -1) -1);
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(handle,nrws,i, nr, nza, baseIdx, hacksz, ldv, nzm,
rS,devIdisp, devJa, devVal, idiag, rP, cM);
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(handle,nrws,i, nr, nza, baseIdx,
hacksz, ldv, nzm,
rS,devIdisp, devJa, devVal,
idiag, rP, cM);
}
}

@ -1,8 +1,6 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#include "psi_cuda_common.cuh"

@ -2,9 +2,7 @@
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE cuFloatComplex
#define TYPE_SYMBOL c
#include "psi_cuda_CopyCooToElg.cuh"

@ -2,9 +2,6 @@
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE cuFloatComplex
#define TYPE_SYMBOL c
#include "psi_cuda_CopyCooToHlg.cuh"

@ -2,9 +2,6 @@
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE double
#define TYPE_SYMBOL d
#include "psi_cuda_CopyCooToElg.cuh"

@ -2,9 +2,6 @@
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE double
#define TYPE_SYMBOL d
#include "psi_cuda_CopyCooToHlg.cuh"

@ -2,9 +2,6 @@
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE float
#define TYPE_SYMBOL s
#include "psi_cuda_CopyCooToElg.cuh"

@ -2,9 +2,6 @@
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE float
#define TYPE_SYMBOL s
#include "psi_cuda_CopyCooToHlg.cuh"

@ -2,9 +2,6 @@
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE cuDoubleComplex
#define TYPE_SYMBOL z
#include "psi_cuda_CopyCooToElg.cuh"

@ -2,9 +2,6 @@
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE cuDoubleComplex
#define TYPE_SYMBOL z
#include "psi_cuda_CopyCooToHlg.cuh"

@ -35,7 +35,6 @@
#include <cuda_runtime.h>
#include <cusparse_v2.h>
#include "cintrf.h"
#include "fcusparse.h"
#include "ccusparse.h"

@ -37,9 +37,6 @@
#include <stdio.h>
#include "core.h"
#include "cuda_util.h"
#include "vector.h"
#include "vectordev.h"
#define ELL_PITCH_ALIGN_S 32
#define ELL_PITCH_ALIGN_D 16

@ -35,7 +35,6 @@
#include <cuda_runtime.h>
#include <cusparse_v2.h>
#include "cintrf.h"
#include "fcusparse.h"
#include "dcusparse.h"

@ -33,6 +33,7 @@
#define _DIAGDEV_H_
#include "cintrf.h"
#include "vectordev.h"
#include "dia.h"
struct DiagDevice

@ -34,6 +34,7 @@
#define _DNSDEV_H_
#include "cintrf.h"
#include "vectordev.h"
#include "cuComplex.h"
#include "cublas_v2.h"

@ -34,6 +34,7 @@
#define _ELLDEV_H_
#include "cintrf.h"
#include "vectordev.h"
#include "cuComplex.h"
#include "ell.h"

@ -34,7 +34,6 @@
#include <stdlib.h>
#include <cuda_runtime.h>
#include "cintrf.h"
#include "fcusparse.h"
static cusparseHandle_t *cusparse_handle=NULL;

@ -40,6 +40,7 @@
#include <cusparse.h>
#endif
#include "cintrf.h"
#include "vectordev.h"
int FcusparseCreate();
int FcusparseDestroy();

@ -33,6 +33,7 @@
#define _HDIAGDEV_H_
#include "cintrf.h"
#include "vectordev.h"
#include "hdia.h"
struct HdiagDevice

@ -34,7 +34,7 @@
#include "cintrf.h"
#include "hell.h"
#include "vectordev.h"
typedef struct hlldevice
{

@ -35,7 +35,6 @@
#include <cuda_runtime.h>
#include <cusparse_v2.h>
#include "cintrf.h"
#include "fcusparse.h"
#include "scusparse.h"

@ -29,12 +29,11 @@
/* POSSIBILITY OF SUCH DAMAGE. */
#pragma once
//#include "utils.h"
#include "cuda_runtime.h"
//#include "common.h"
#include "cintrf.h"
//#include "cintrf.h"
#include <complex.h>
struct MultiVectDevice
@ -85,3 +84,4 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
int getMultiVecDeviceSize(void* deviceVec);
int getMultiVecDeviceCount(void* deviceVec);
int getMultiVecDevicePitch(void* deviceVec);

@ -35,7 +35,6 @@
#include <cuda_runtime.h>
#include <cusparse_v2.h>
#include "cintrf.h"
#include "fcusparse.h"
#include "zcusparse.h"

Binary file not shown.

Before

Width:  |  Height:  |  Size: 334 B

After

Width:  |  Height:  |  Size: 328 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 382 B

After

Width:  |  Height:  |  Size: 366 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 296 B

After

Width:  |  Height:  |  Size: 289 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

@ -10,7 +10,7 @@
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 90--><p class="noindent" ><span
<!--l. 91--><p class="noindent" ><span
class="cmbx-12x-x-144">PSBLAS</span><br
class="newline" /> <span
class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br
@ -20,8 +20,8 @@ class="newline" /> <span
class="cmbx-10">Salvatore Filippone</span><br
class="newline" /><span
class="cmbx-10">Alfredo Buttari </span><br
class="newline" />Software version: 3.8.0<br
class="newline" />May 1st, 2022
class="newline" />Software version: 3.9.0<br
class="newline" />Aug 1st, 2024
@ -29,219 +29,39 @@ class="newline" />May 1st, 2022
<div class="tableofcontents">
<span class="likesectionToc" ><a
&#x00A0;<span class="likesectionToc" ><a
href="userhtmlli1.html#x2-1000" id="QQ2-2-1">Contents</a></span>
<br /> <span class="sectionToc" >1 <a
<br /> &#x00A0;<span class="sectionToc" >1 <a
href="userhtmlse1.html#x3-20001" id="QQ2-3-2">Introduction</a></span>
<br /> <span class="sectionToc" >2 <a
<br /> &#x00A0;<span class="sectionToc" >2 <a
href="userhtmlse2.html#x4-30002" id="QQ2-4-3">General overview</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.1 <a
href="userhtmlsu1.html#x6-40002.1" id="QQ2-6-5">Basic Nomenclature</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.2 <a
href="userhtmlsu2.html#x8-50002.2" id="QQ2-8-7">Library contents</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.3 <a
href="userhtmlsu3.html#x9-60002.3" id="QQ2-9-8">Application structure</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.4 <a
href="userhtmlsu4.html#x11-80002.4" id="QQ2-11-10">Programming model</a></span>
<br /> <span class="sectionToc" >3 <a
href="userhtmlse3.html#x12-90003" id="QQ2-12-11">Data Structures and Classes</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.1 <a
href="userhtmlsu5.html#x13-100003.1" id="QQ2-13-12">Descriptor data structure</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.2 <a
href="userhtmlsu6.html#x14-260003.2" id="QQ2-14-29">Sparse Matrix class</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.3 <a
href="userhtmlsu7.html#x15-460003.3" id="QQ2-15-50">Dense Vector Data Structure</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.4 <a
href="userhtmlsu8.html#x16-530003.4" id="QQ2-16-58">Preconditioner data structure</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.5 <a
href="userhtmlsu9.html#x17-540003.5" id="QQ2-17-60">Heap data structure</a></span>
<br /> <span class="sectionToc" >4 <a
href="userhtmlse4.html#x18-550004" id="QQ2-18-61">Computational routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.1 <a
href="userhtmlsu10.html#x19-560004.1" id="QQ2-19-62">psb_geaxpby &#8212; General Dense Matrix Sum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.2 <a
href="userhtmlsu11.html#x20-570004.2" id="QQ2-20-64">psb_gedot &#8212; Dot Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.3 <a
href="userhtmlsu12.html#x21-580004.3" id="QQ2-21-66">psb_gedots &#8212; Generalized Dot Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.4 <a
href="userhtmlsu13.html#x22-590004.4" id="QQ2-22-68">psb_normi &#8212; Infinity-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.5 <a
href="userhtmlsu14.html#x23-600004.5" id="QQ2-23-70">psb_geamaxs &#8212; Generalized Infinity Norm</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.6 <a
href="userhtmlsu15.html#x24-610004.6" id="QQ2-24-72">psb_norm1 &#8212; 1-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.7 <a
href="userhtmlsu16.html#x25-620004.7" id="QQ2-25-74">psb_geasums &#8212; Generalized 1-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.8 <a
href="userhtmlsu17.html#x26-630004.8" id="QQ2-26-76">psb_norm2 &#8212; 2-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.9 <a
href="userhtmlsu18.html#x27-640004.9" id="QQ2-27-78">psb_genrm2s &#8212; Generalized 2-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.10 <a
href="userhtmlsu19.html#x28-650004.10" id="QQ2-28-80">psb_norm1 &#8212; 1-Norm of Sparse Matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.11 <a
href="userhtmlsu20.html#x29-660004.11" id="QQ2-29-82">psb_normi &#8212; Infinity Norm of Sparse Matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.12 <a
href="userhtmlsu21.html#x30-670004.12" id="QQ2-30-84">psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.13 <a
href="userhtmlsu22.html#x31-680004.13" id="QQ2-31-86">psb_spsm &#8212; Triangular System Solve</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.14 <a
href="userhtmlsu23.html#x32-690004.14" id="QQ2-32-88">psb_gemlt &#8212; Entrywise Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.15 <a
href="userhtmlsu24.html#x33-700004.15" id="QQ2-33-90">psb_gediv &#8212; Entrywise Division</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.16 <a
href="userhtmlsu25.html#x34-710004.16" id="QQ2-34-92">psb_geinv &#8212; Entrywise Inversion</a></span>
<br /> <span class="sectionToc" >5 <a
href="userhtmlse5.html#x35-720005" id="QQ2-35-94">Communication routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.1 <a
href="userhtmlsu26.html#x36-730005.1" id="QQ2-36-95">psb_halo &#8212; Halo Data Communication</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.2 <a
href="userhtmlsu27.html#x37-740005.2" id="QQ2-37-98">psb_ovrl &#8212; Overlap Update</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.3 <a
href="userhtmlsu28.html#x38-750005.3" id="QQ2-38-101">psb_gather &#8212; Gather Global Dense Matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.4 <a
href="userhtmlsu29.html#x39-760005.4" id="QQ2-39-103">psb_scatter &#8212; Scatter Global Dense Matrix</a></span>
<br /> <span class="sectionToc" >6 <a
href="userhtmlse6.html#x40-770006" id="QQ2-40-105">Data management routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.1 <a
href="userhtmlsu30.html#x41-780006.1" id="QQ2-41-106">psb_cdall &#8212; Allocates a communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.2 <a
href="userhtmlsu31.html#x42-790006.2" id="QQ2-42-107">psb_cdins &#8212; Communication descriptor insert routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.3 <a
href="userhtmlsu32.html#x43-800006.3" id="QQ2-43-108">psb_cdasb &#8212; Communication descriptor assembly routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.4 <a
href="userhtmlsu33.html#x44-810006.4" id="QQ2-44-109">psb_cdcpy &#8212; Copies a communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.5 <a
href="userhtmlsu34.html#x45-820006.5" id="QQ2-45-110">psb_cdfree &#8212; Frees a communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.6 <a
href="userhtmlsu35.html#x46-830006.6" id="QQ2-46-111">psb_cdbldext &#8212; Build an extended communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.7 <a
href="userhtmlsu36.html#x47-840006.7" id="QQ2-47-112">psb_spall &#8212; Allocates a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.8 <a
href="userhtmlsu37.html#x48-850006.8" id="QQ2-48-113">psb_spins &#8212; Insert a set of coefficients into a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.9 <a
href="userhtmlsu38.html#x49-860006.9" id="QQ2-49-114">psb_spasb &#8212; Sparse matrix assembly routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.10 <a
href="userhtmlsu39.html#x50-870006.10" id="QQ2-50-115">psb_spfree &#8212; Frees a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.11 <a
href="userhtmlsu40.html#x51-880006.11" id="QQ2-51-116">psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.12 <a
href="userhtmlsu41.html#x52-890006.12" id="QQ2-52-117">psb_geall &#8212; Allocates a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.13 <a
href="userhtmlsu42.html#x53-900006.13" id="QQ2-53-118">psb_geins &#8212; Dense matrix insertion routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.14 <a
href="userhtmlsu43.html#x54-910006.14" id="QQ2-54-119">psb_geasb &#8212; Assembly a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.15 <a
href="userhtmlsu44.html#x55-920006.15" id="QQ2-55-120">psb_gefree &#8212; Frees a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.16 <a
href="userhtmlsu45.html#x56-930006.16" id="QQ2-56-121">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.17 <a
href="userhtmlsu46.html#x57-940006.17" id="QQ2-57-122">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.18 <a
href="userhtmlsu47.html#x58-950006.18" id="QQ2-58-123">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.19 <a
href="userhtmlsu48.html#x59-960006.19" id="QQ2-59-124">psb_is_owned &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.20 <a
href="userhtmlsu49.html#x60-970006.20" id="QQ2-60-125">psb_owned_index &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.21 <a
href="userhtmlsu50.html#x61-980006.21" id="QQ2-61-126">psb_is_local &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.22 <a
href="userhtmlsu51.html#x62-990006.22" id="QQ2-62-127">psb_local_index &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.23 <a
href="userhtmlsu52.html#x63-1000006.23" id="QQ2-63-128">psb_get_boundary &#8212; Extract list of boundary elements</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.24 <a
href="userhtmlsu53.html#x64-1010006.24" id="QQ2-64-129">psb_get_overlap &#8212; Extract list of overlap elements</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.25 <a
href="userhtmlsu54.html#x65-1020006.25" id="QQ2-65-130">psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.26 <a
href="userhtmlsu55.html#x66-1030006.26" id="QQ2-66-131">psb_sizeof &#8212; Memory occupation</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.27 <a
href="userhtmlsu56.html#x67-1040006.27" id="QQ2-67-132">Sorting utilities &#8212; </a></span>
<br /> <span class="sectionToc" >7 <a
href="userhtmlse7.html#x68-1050007" id="QQ2-68-133">Parallel environment routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.1 <a
href="userhtmlsu57.html#x69-1060007.1" id="QQ2-69-134">psb_init &#8212; Initializes PSBLAS parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.2 <a
href="userhtmlsu58.html#x70-1070007.2" id="QQ2-70-135">psb_info &#8212; Return information about PSBLAS parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.3 <a
href="userhtmlsu59.html#x71-1080007.3" id="QQ2-71-136">psb_exit &#8212; Exit from PSBLAS parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.4 <a
href="userhtmlsu60.html#x72-1090007.4" id="QQ2-72-137">psb_get_mpi_comm &#8212; Get the MPI communicator</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.5 <a
href="userhtmlsu61.html#x73-1100007.5" id="QQ2-73-138">psb_get_mpi_rank &#8212; Get the MPI rank</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.6 <a
href="userhtmlsu62.html#x74-1110007.6" id="QQ2-74-139">psb_wtime &#8212; Wall clock timing</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.7 <a
href="userhtmlsu63.html#x75-1120007.7" id="QQ2-75-140">psb_barrier &#8212; Sinchronization point parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.8 <a
href="userhtmlsu64.html#x76-1130007.8" id="QQ2-76-141">psb_abort &#8212; Abort a computation</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.9 <a
href="userhtmlsu65.html#x77-1140007.9" id="QQ2-77-142">psb_bcast &#8212; Broadcast data</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.10 <a
href="userhtmlsu66.html#x78-1150007.10" id="QQ2-78-143">psb_sum &#8212; Global sum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.11 <a
href="userhtmlsu67.html#x79-1160007.11" id="QQ2-79-144">psb_max &#8212; Global maximum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.12 <a
href="userhtmlsu68.html#x80-1170007.12" id="QQ2-80-145">psb_min &#8212; Global minimum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.13 <a
href="userhtmlsu69.html#x81-1180007.13" id="QQ2-81-146">psb_amx &#8212; Global maximum absolute value</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.14 <a
href="userhtmlsu70.html#x82-1190007.14" id="QQ2-82-147">psb_amn &#8212; Global minimum absolute value</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.15 <a
href="userhtmlsu71.html#x83-1200007.15" id="QQ2-83-148">psb_nrm2 &#8212; Global 2-norm reduction</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.16 <a
href="userhtmlsu72.html#x84-1210007.16" id="QQ2-84-149">psb_snd &#8212; Send data</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.17 <a
href="userhtmlsu73.html#x85-1220007.17" id="QQ2-85-150">psb_rcv &#8212; Receive data</a></span>
<br /> <span class="sectionToc" >8 <a
href="userhtmlse8.html#x86-1230008" id="QQ2-86-151">Error handling</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.1 <a
href="userhtmlsu74.html#x87-1240008.1" id="QQ2-87-154">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.2 <a
href="userhtmlsu75.html#x88-1250008.2" id="QQ2-88-155">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.3 <a
href="userhtmlsu76.html#x89-1260008.3" id="QQ2-89-156">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.4 <a
href="userhtmlsu77.html#x90-1270008.4" id="QQ2-90-157">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
<br /> <span class="sectionToc" >9 <a
href="userhtmlse9.html#x91-1280009" id="QQ2-91-158">Utilities</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.1 <a
href="userhtmlsu78.html#x92-1290009.1" id="QQ2-92-159"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.2 <a
href="userhtmlsu79.html#x93-1300009.2" id="QQ2-93-160">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.3 <a
href="userhtmlsu80.html#x94-1310009.3" id="QQ2-94-161">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.4 <a
href="userhtmlsu81.html#x95-1320009.4" id="QQ2-95-162">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.5 <a
href="userhtmlsu82.html#x96-1330009.5" id="QQ2-96-163">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.6 <a
href="userhtmlsu83.html#x97-1340009.6" id="QQ2-97-164">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
<br /> <span class="sectionToc" >10 <a
href="userhtmlse10.html#x98-13500010" id="QQ2-98-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.1 <a
href="userhtmlsu84.html#x99-13600010.1" id="QQ2-99-166">init &#8212; Initialize a preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.2 <a
href="userhtmlsu85.html#x101-13700010.2" id="QQ2-101-167">build &#8212; Builds a preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.3 <a
href="userhtmlsu86.html#x102-13800010.3" id="QQ2-102-168">apply &#8212; Preconditioner application routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.4 <a
href="userhtmlsu87.html#x103-13900010.4" id="QQ2-103-169">descr &#8212; Prints a description of current preconditioner</a></span>
<br /> &#x00A0;<span class="sectionToc" >3 <a
href="userhtmlse3.html#x8-90003" id="QQ2-8-11">Data Structures and Classes</a></span>
<br /> &#x00A0;<span class="sectionToc" >4 <a
href="userhtmlse4.html#x9-550004" id="QQ2-9-61">Computational routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >5 <a
href="userhtmlse5.html#x10-720005" id="QQ2-10-94">Communication routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >6 <a
href="userhtmlse6.html#x11-770006" id="QQ2-11-105">Data management routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >7 <a
href="userhtmlse7.html#x12-1050007" id="QQ2-12-133">Parallel environment routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >8 <a
href="userhtmlse8.html#x13-1230008" id="QQ2-13-151">Error handling</a></span>
<br /> &#x00A0;<span class="sectionToc" >9 <a
href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
</div>
<br /> &#x00A0;<span class="subsectionToc" >10.5 <a
href="userhtmlsu88.html#x104-14000010.5" id="QQ2-104-170">clone &#8212; clone current preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.6 <a
href="userhtmlsu89.html#x105-14100010.6" id="QQ2-105-171">free &#8212; Free a preconditioner</a></span>
<br /> <span class="sectionToc" >11 <a
href="userhtmlse11.html#x106-14200011" id="QQ2-106-172">Iterative Methods</a></span>
<br /> &#x00A0;<span class="subsectionToc" >11.1 <a
href="userhtmlsu90.html#x107-14300011.1" id="QQ2-107-173">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
<br /> <span class="likesectionToc" ><a
href="userhtmlli2.html#x109-14400011.1" id="QQ2-109-174">References</a></span>
</div>

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

@ -33,21 +33,26 @@ p.indent{text-indent:0;}
p + p{margin-top:1em;}
p + div, p + pre {margin-top:1em;}
div + p, pre + p {margin-top:1em;}
a { overflow-wrap: break-word; word-wrap: break-word; word-break: break-word; hyphens: auto; }
@media print {div.crosslinks {visibility:hidden;}}
table.tabular{border-collapse: collapse; border-spacing: 0;}
a img { border-top: 0; border-left: 0; border-right: 0; }
center { margin-top:1em; margin-bottom:1em; }
td center { margin-top:0em; margin-bottom:0em; }
.Canvas { position:relative; }
img.math{vertical-align:middle;}
div.par-math-display, div.math-display{text-align:center;}
li p.indent { text-indent: 0em }
li p:first-child{ margin-top:0em; }
li p:last-child, li div:last-child { margin-bottom:0.5em; }
li p:first-child{ margin-bottom:0; }
li p~ul:last-child, li p~ol:last-child{ margin-bottom:0.5em; }
.enumerate1 {list-style-type:decimal;}
.enumerate2 {list-style-type:lower-alpha;}
.enumerate3 {list-style-type:lower-roman;}
.enumerate4 {list-style-type:upper-alpha;}
div.newtheorem { margin-bottom: 2em; margin-top: 2em;}
div.newtheorem .head{font-weight: bold;}
.obeylines-h,.obeylines-v {white-space: nowrap; }
div.obeylines-v p { margin-top:0; margin-bottom:0; }
.overline{ text-decoration:overline; }
@ -91,6 +96,9 @@ table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; }
.hline hr, .cline hr{ height : 0px; margin:0px; }
.hline td, .cline td{ padding: 0; }
.hline hr, .cline hr{border:none;border-top:1px solid black;}
.hline {border-top: 1px solid black;}
.hline + .vspace:last-child{display:none;}
.hline:first-child{border-bottom:1px solid black;border-top:none;}
.tabbing-right {text-align:right;}
div.float, div.figure {margin-left: auto; margin-right: auto;}
div.float img {text-align:center;}
@ -115,15 +123,16 @@ table.pmatrix {width:100%;}
span.bar-css {text-decoration:overline;}
img.cdots{vertical-align:middle;}
.partToc a, .partToc, .likepartToc a, .likepartToc {line-height: 200%; font-weight:bold; font-size:110%;}
.chapterToc a, .chapterToc, .likechapterToc a, .likechapterToc, .appendixToc a, .appendixToc {line-height: 200%; font-weight:bold;}
.index-item, .index-subitem, .index-subsubitem {display:block}
div.caption {text-indent:-2em; margin-left:3em; margin-right:1em; text-align:left;}
div.caption span.id{font-weight: bold; white-space: nowrap; }
h1.partHead{text-align: center}
p.bibitem { text-indent: -2em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; }
p.bibitem-p { text-indent: 0em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; }
.subsubsectionHead, .likesubsubsectionHead { font-size: 1em; }
.paragraphHead, .likeparagraphHead { margin-top:2em; font-weight: bold;}
.subparagraphHead, .likesubparagraphHead { font-weight: bold;}
.quote {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; margin-right:1em; text-align:justify;}
.verse{white-space:nowrap; margin-left:2em}
div.maketitle {text-align:center;}
h2.titleHead{text-align:center;}
@ -131,19 +140,23 @@ div.maketitle{ margin-bottom: 2em; }
div.author, div.date {text-align:center;}
div.thanks{text-align:left; margin-left:10%; font-size:85%; font-style:italic; }
div.author{white-space: nowrap;}
.quotation {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; }
.abstract p {margin-left:5%; margin-right:5%;}
div.abstract p {margin-left:5%; margin-right:5%;}
div.abstract {width:100%;}
.abstracttitle{text-align:center;margin-bottom:1em;}
.subsectionToc, .likesubsectionToc {margin-left:2em;}
.subsubsectionToc, .likesubsubsectionToc {margin-left:4em;}
.paragraphToc, .likeparagraphToc {margin-left:6em;}
.subparagraphToc, .likesubparagraphToc {margin-left:8em;}
.ovalbox { padding-left:3pt; padding-right:3pt; border:solid thin; }
.Ovalbox-thick { padding-left:3pt; padding-right:3pt; border:solid thick; }
.shadowbox { padding-left:3pt; padding-right:3pt; border:solid thin; border-right:solid thick; border-bottom:solid thick; }
.doublebox { padding-left:3pt; padding-right:3pt; border-style:double; border:solid thick; }
.rotatebox{display: inline-block;}
code.lstinline{font-family:monospace,monospace;}
pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; }
.lstlisting .label{margin-right:0.5em; }
div.lstlisting{font-family: monospace,monospace; white-space: nowrap; margin-top:0.5em; margin-bottom:0.5em; }
div.lstinputlisting{ font-family: monospace,monospace; white-space: nowrap; }
pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; }
pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; }
.lstinputlisting .label{margin-right:0.5em;}
/* end css.sty */

@ -10,7 +10,7 @@
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 90--><p class="noindent" ><span
<!--l. 91--><p class="noindent" ><span
class="cmbx-12x-x-144">PSBLAS</span><br
class="newline" /> <span
class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br
@ -20,8 +20,8 @@ class="newline" /> <span
class="cmbx-10">Salvatore Filippone</span><br
class="newline" /><span
class="cmbx-10">Alfredo Buttari </span><br
class="newline" />Software version: 3.8.0<br
class="newline" />May 1st, 2022
class="newline" />Software version: 3.9.0<br
class="newline" />Aug 1st, 2024
@ -29,219 +29,39 @@ class="newline" />May 1st, 2022
<div class="tableofcontents">
<span class="likesectionToc" ><a
&#x00A0;<span class="likesectionToc" ><a
href="userhtmlli1.html#x2-1000" id="QQ2-2-1">Contents</a></span>
<br /> <span class="sectionToc" >1 <a
<br /> &#x00A0;<span class="sectionToc" >1 <a
href="userhtmlse1.html#x3-20001" id="QQ2-3-2">Introduction</a></span>
<br /> <span class="sectionToc" >2 <a
<br /> &#x00A0;<span class="sectionToc" >2 <a
href="userhtmlse2.html#x4-30002" id="QQ2-4-3">General overview</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.1 <a
href="userhtmlsu1.html#x6-40002.1" id="QQ2-6-5">Basic Nomenclature</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.2 <a
href="userhtmlsu2.html#x8-50002.2" id="QQ2-8-7">Library contents</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.3 <a
href="userhtmlsu3.html#x9-60002.3" id="QQ2-9-8">Application structure</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.4 <a
href="userhtmlsu4.html#x11-80002.4" id="QQ2-11-10">Programming model</a></span>
<br /> <span class="sectionToc" >3 <a
href="userhtmlse3.html#x12-90003" id="QQ2-12-11">Data Structures and Classes</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.1 <a
href="userhtmlsu5.html#x13-100003.1" id="QQ2-13-12">Descriptor data structure</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.2 <a
href="userhtmlsu6.html#x14-260003.2" id="QQ2-14-29">Sparse Matrix class</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.3 <a
href="userhtmlsu7.html#x15-460003.3" id="QQ2-15-50">Dense Vector Data Structure</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.4 <a
href="userhtmlsu8.html#x16-530003.4" id="QQ2-16-58">Preconditioner data structure</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.5 <a
href="userhtmlsu9.html#x17-540003.5" id="QQ2-17-60">Heap data structure</a></span>
<br /> <span class="sectionToc" >4 <a
href="userhtmlse4.html#x18-550004" id="QQ2-18-61">Computational routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.1 <a
href="userhtmlsu10.html#x19-560004.1" id="QQ2-19-62">psb_geaxpby &#8212; General Dense Matrix Sum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.2 <a
href="userhtmlsu11.html#x20-570004.2" id="QQ2-20-64">psb_gedot &#8212; Dot Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.3 <a
href="userhtmlsu12.html#x21-580004.3" id="QQ2-21-66">psb_gedots &#8212; Generalized Dot Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.4 <a
href="userhtmlsu13.html#x22-590004.4" id="QQ2-22-68">psb_normi &#8212; Infinity-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.5 <a
href="userhtmlsu14.html#x23-600004.5" id="QQ2-23-70">psb_geamaxs &#8212; Generalized Infinity Norm</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.6 <a
href="userhtmlsu15.html#x24-610004.6" id="QQ2-24-72">psb_norm1 &#8212; 1-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.7 <a
href="userhtmlsu16.html#x25-620004.7" id="QQ2-25-74">psb_geasums &#8212; Generalized 1-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.8 <a
href="userhtmlsu17.html#x26-630004.8" id="QQ2-26-76">psb_norm2 &#8212; 2-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.9 <a
href="userhtmlsu18.html#x27-640004.9" id="QQ2-27-78">psb_genrm2s &#8212; Generalized 2-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.10 <a
href="userhtmlsu19.html#x28-650004.10" id="QQ2-28-80">psb_norm1 &#8212; 1-Norm of Sparse Matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.11 <a
href="userhtmlsu20.html#x29-660004.11" id="QQ2-29-82">psb_normi &#8212; Infinity Norm of Sparse Matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.12 <a
href="userhtmlsu21.html#x30-670004.12" id="QQ2-30-84">psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.13 <a
href="userhtmlsu22.html#x31-680004.13" id="QQ2-31-86">psb_spsm &#8212; Triangular System Solve</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.14 <a
href="userhtmlsu23.html#x32-690004.14" id="QQ2-32-88">psb_gemlt &#8212; Entrywise Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.15 <a
href="userhtmlsu24.html#x33-700004.15" id="QQ2-33-90">psb_gediv &#8212; Entrywise Division</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.16 <a
href="userhtmlsu25.html#x34-710004.16" id="QQ2-34-92">psb_geinv &#8212; Entrywise Inversion</a></span>
<br /> <span class="sectionToc" >5 <a
href="userhtmlse5.html#x35-720005" id="QQ2-35-94">Communication routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.1 <a
href="userhtmlsu26.html#x36-730005.1" id="QQ2-36-95">psb_halo &#8212; Halo Data Communication</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.2 <a
href="userhtmlsu27.html#x37-740005.2" id="QQ2-37-98">psb_ovrl &#8212; Overlap Update</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.3 <a
href="userhtmlsu28.html#x38-750005.3" id="QQ2-38-101">psb_gather &#8212; Gather Global Dense Matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.4 <a
href="userhtmlsu29.html#x39-760005.4" id="QQ2-39-103">psb_scatter &#8212; Scatter Global Dense Matrix</a></span>
<br /> <span class="sectionToc" >6 <a
href="userhtmlse6.html#x40-770006" id="QQ2-40-105">Data management routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.1 <a
href="userhtmlsu30.html#x41-780006.1" id="QQ2-41-106">psb_cdall &#8212; Allocates a communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.2 <a
href="userhtmlsu31.html#x42-790006.2" id="QQ2-42-107">psb_cdins &#8212; Communication descriptor insert routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.3 <a
href="userhtmlsu32.html#x43-800006.3" id="QQ2-43-108">psb_cdasb &#8212; Communication descriptor assembly routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.4 <a
href="userhtmlsu33.html#x44-810006.4" id="QQ2-44-109">psb_cdcpy &#8212; Copies a communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.5 <a
href="userhtmlsu34.html#x45-820006.5" id="QQ2-45-110">psb_cdfree &#8212; Frees a communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.6 <a
href="userhtmlsu35.html#x46-830006.6" id="QQ2-46-111">psb_cdbldext &#8212; Build an extended communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.7 <a
href="userhtmlsu36.html#x47-840006.7" id="QQ2-47-112">psb_spall &#8212; Allocates a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.8 <a
href="userhtmlsu37.html#x48-850006.8" id="QQ2-48-113">psb_spins &#8212; Insert a set of coefficients into a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.9 <a
href="userhtmlsu38.html#x49-860006.9" id="QQ2-49-114">psb_spasb &#8212; Sparse matrix assembly routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.10 <a
href="userhtmlsu39.html#x50-870006.10" id="QQ2-50-115">psb_spfree &#8212; Frees a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.11 <a
href="userhtmlsu40.html#x51-880006.11" id="QQ2-51-116">psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.12 <a
href="userhtmlsu41.html#x52-890006.12" id="QQ2-52-117">psb_geall &#8212; Allocates a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.13 <a
href="userhtmlsu42.html#x53-900006.13" id="QQ2-53-118">psb_geins &#8212; Dense matrix insertion routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.14 <a
href="userhtmlsu43.html#x54-910006.14" id="QQ2-54-119">psb_geasb &#8212; Assembly a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.15 <a
href="userhtmlsu44.html#x55-920006.15" id="QQ2-55-120">psb_gefree &#8212; Frees a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.16 <a
href="userhtmlsu45.html#x56-930006.16" id="QQ2-56-121">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.17 <a
href="userhtmlsu46.html#x57-940006.17" id="QQ2-57-122">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.18 <a
href="userhtmlsu47.html#x58-950006.18" id="QQ2-58-123">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.19 <a
href="userhtmlsu48.html#x59-960006.19" id="QQ2-59-124">psb_is_owned &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.20 <a
href="userhtmlsu49.html#x60-970006.20" id="QQ2-60-125">psb_owned_index &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.21 <a
href="userhtmlsu50.html#x61-980006.21" id="QQ2-61-126">psb_is_local &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.22 <a
href="userhtmlsu51.html#x62-990006.22" id="QQ2-62-127">psb_local_index &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.23 <a
href="userhtmlsu52.html#x63-1000006.23" id="QQ2-63-128">psb_get_boundary &#8212; Extract list of boundary elements</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.24 <a
href="userhtmlsu53.html#x64-1010006.24" id="QQ2-64-129">psb_get_overlap &#8212; Extract list of overlap elements</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.25 <a
href="userhtmlsu54.html#x65-1020006.25" id="QQ2-65-130">psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.26 <a
href="userhtmlsu55.html#x66-1030006.26" id="QQ2-66-131">psb_sizeof &#8212; Memory occupation</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.27 <a
href="userhtmlsu56.html#x67-1040006.27" id="QQ2-67-132">Sorting utilities &#8212; </a></span>
<br /> <span class="sectionToc" >7 <a
href="userhtmlse7.html#x68-1050007" id="QQ2-68-133">Parallel environment routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.1 <a
href="userhtmlsu57.html#x69-1060007.1" id="QQ2-69-134">psb_init &#8212; Initializes PSBLAS parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.2 <a
href="userhtmlsu58.html#x70-1070007.2" id="QQ2-70-135">psb_info &#8212; Return information about PSBLAS parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.3 <a
href="userhtmlsu59.html#x71-1080007.3" id="QQ2-71-136">psb_exit &#8212; Exit from PSBLAS parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.4 <a
href="userhtmlsu60.html#x72-1090007.4" id="QQ2-72-137">psb_get_mpi_comm &#8212; Get the MPI communicator</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.5 <a
href="userhtmlsu61.html#x73-1100007.5" id="QQ2-73-138">psb_get_mpi_rank &#8212; Get the MPI rank</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.6 <a
href="userhtmlsu62.html#x74-1110007.6" id="QQ2-74-139">psb_wtime &#8212; Wall clock timing</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.7 <a
href="userhtmlsu63.html#x75-1120007.7" id="QQ2-75-140">psb_barrier &#8212; Sinchronization point parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.8 <a
href="userhtmlsu64.html#x76-1130007.8" id="QQ2-76-141">psb_abort &#8212; Abort a computation</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.9 <a
href="userhtmlsu65.html#x77-1140007.9" id="QQ2-77-142">psb_bcast &#8212; Broadcast data</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.10 <a
href="userhtmlsu66.html#x78-1150007.10" id="QQ2-78-143">psb_sum &#8212; Global sum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.11 <a
href="userhtmlsu67.html#x79-1160007.11" id="QQ2-79-144">psb_max &#8212; Global maximum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.12 <a
href="userhtmlsu68.html#x80-1170007.12" id="QQ2-80-145">psb_min &#8212; Global minimum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.13 <a
href="userhtmlsu69.html#x81-1180007.13" id="QQ2-81-146">psb_amx &#8212; Global maximum absolute value</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.14 <a
href="userhtmlsu70.html#x82-1190007.14" id="QQ2-82-147">psb_amn &#8212; Global minimum absolute value</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.15 <a
href="userhtmlsu71.html#x83-1200007.15" id="QQ2-83-148">psb_nrm2 &#8212; Global 2-norm reduction</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.16 <a
href="userhtmlsu72.html#x84-1210007.16" id="QQ2-84-149">psb_snd &#8212; Send data</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.17 <a
href="userhtmlsu73.html#x85-1220007.17" id="QQ2-85-150">psb_rcv &#8212; Receive data</a></span>
<br /> <span class="sectionToc" >8 <a
href="userhtmlse8.html#x86-1230008" id="QQ2-86-151">Error handling</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.1 <a
href="userhtmlsu74.html#x87-1240008.1" id="QQ2-87-154">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.2 <a
href="userhtmlsu75.html#x88-1250008.2" id="QQ2-88-155">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.3 <a
href="userhtmlsu76.html#x89-1260008.3" id="QQ2-89-156">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.4 <a
href="userhtmlsu77.html#x90-1270008.4" id="QQ2-90-157">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
<br /> <span class="sectionToc" >9 <a
href="userhtmlse9.html#x91-1280009" id="QQ2-91-158">Utilities</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.1 <a
href="userhtmlsu78.html#x92-1290009.1" id="QQ2-92-159"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.2 <a
href="userhtmlsu79.html#x93-1300009.2" id="QQ2-93-160">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.3 <a
href="userhtmlsu80.html#x94-1310009.3" id="QQ2-94-161">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.4 <a
href="userhtmlsu81.html#x95-1320009.4" id="QQ2-95-162">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.5 <a
href="userhtmlsu82.html#x96-1330009.5" id="QQ2-96-163">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.6 <a
href="userhtmlsu83.html#x97-1340009.6" id="QQ2-97-164">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
<br /> <span class="sectionToc" >10 <a
href="userhtmlse10.html#x98-13500010" id="QQ2-98-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.1 <a
href="userhtmlsu84.html#x99-13600010.1" id="QQ2-99-166">init &#8212; Initialize a preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.2 <a
href="userhtmlsu85.html#x101-13700010.2" id="QQ2-101-167">build &#8212; Builds a preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.3 <a
href="userhtmlsu86.html#x102-13800010.3" id="QQ2-102-168">apply &#8212; Preconditioner application routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.4 <a
href="userhtmlsu87.html#x103-13900010.4" id="QQ2-103-169">descr &#8212; Prints a description of current preconditioner</a></span>
<br /> &#x00A0;<span class="sectionToc" >3 <a
href="userhtmlse3.html#x8-90003" id="QQ2-8-11">Data Structures and Classes</a></span>
<br /> &#x00A0;<span class="sectionToc" >4 <a
href="userhtmlse4.html#x9-550004" id="QQ2-9-61">Computational routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >5 <a
href="userhtmlse5.html#x10-720005" id="QQ2-10-94">Communication routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >6 <a
href="userhtmlse6.html#x11-770006" id="QQ2-11-105">Data management routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >7 <a
href="userhtmlse7.html#x12-1050007" id="QQ2-12-133">Parallel environment routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >8 <a
href="userhtmlse8.html#x13-1230008" id="QQ2-13-151">Error handling</a></span>
<br /> &#x00A0;<span class="sectionToc" >9 <a
href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
</div>
<br /> &#x00A0;<span class="subsectionToc" >10.5 <a
href="userhtmlsu88.html#x104-14000010.5" id="QQ2-104-170">clone &#8212; clone current preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.6 <a
href="userhtmlsu89.html#x105-14100010.6" id="QQ2-105-171">free &#8212; Free a preconditioner</a></span>
<br /> <span class="sectionToc" >11 <a
href="userhtmlse11.html#x106-14200011" id="QQ2-106-172">Iterative Methods</a></span>
<br /> &#x00A0;<span class="subsectionToc" >11.1 <a
href="userhtmlsu90.html#x107-14300011.1" id="QQ2-107-173">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
<br /> <span class="likesectionToc" ><a
href="userhtmlli2.html#x109-14400011.1" id="QQ2-109-174">References</a></span>
</div>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.0 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

@ -0,0 +1,19 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html >
<head><title></title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)">
<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)">
<!-- html,3 -->
<meta name="src" content="userhtml.tex">
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<div class="footnote-text">
<!--l. 72--><p class="indent" > <span class="footnote-mark"><a
id="fn4x0"><a
id="x16-136002x10.1"></a> <sup class="textsuperscript">4</sup></a></span><span
class="cmr-8">The string is case-insensitive</span></div>
</body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 968 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

@ -0,0 +1,20 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html >
<head><title></title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)">
<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)">
<!-- html,3 -->
<meta name="src" content="userhtml.tex">
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<div class="footnote-text">
<!--l. 53--><p class="noindent" ><span class="footnote-mark"><a
id="fn5x0"><a
id="x18-143004x11.1"></a> <sup class="textsuperscript">5</sup></a></span><span
class="cmr-8">Note: the implementation is for </span><span
class="cmmi-8">FCG</span><span
class="cmr-8">(1).</span></div>
</body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

After

Width:  |  Height:  |  Size: 7.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 970 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 439 B

After

Width:  |  Height:  |  Size: 420 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 763 B

After

Width:  |  Height:  |  Size: 710 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.0 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1016 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.6 KiB

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

@ -12,7 +12,8 @@
>
<div class="footnote-text">
<!--l. 151--><p class="indent" > <span class="footnote-mark"><a
id="fn1x0"> <sup class="textsuperscript">1</sup></a></span><span
id="fn1x0"><a
id="x5-3003x2"></a> <sup class="textsuperscript">1</sup></a></span><span
class="cmr-8">In our prototype implementation we provide sample scatter/gather routines.</span></div>
</body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

@ -0,0 +1,24 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html >
<head><title></title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)">
<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)">
<!-- html,3 -->
<meta name="src" content="userhtml.tex">
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<div class="footnote-text">
<!--l. 195--><p class="noindent" ><span class="footnote-mark"><a
id="fn2x0"><a
id="x6-4002x2.1"></a> <sup class="textsuperscript">2</sup></a></span><span
class="cmr-8">This is the normal situation when the pattern of the sparse matrix is symmetric, which is</span>
<span
class="cmr-8">equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is</span>
<span
class="cmr-8">non-symmetric we may have one-way interactions, and these could cause a situation in which a</span>
<span
class="cmr-8">boundary point is not a halo point for its neighbour.</span></div>
</body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.1 KiB

@ -11,13 +11,16 @@
</head><body
>
<div class="footnote-text">
<!--l. 195--><p class="noindent" ><span class="footnote-mark"><a
id="fn2x0"> <sup class="textsuperscript">2</sup></a></span><span
class="cmr-8">This is the normal situation when the pattern of the sparse matrix is symmetric, which is</span>
<!--l. 362--><p class="noindent" ><span class="footnote-mark"><a
id="fn3x0"><a
id="x7-6020x3"></a> <sup class="textsuperscript">3</sup></a></span><span
class="cmr-8">The subroutine style </span><span
class="cmtt-8">psb</span><span
class="cmtt-8">_precinit </span><span
class="cmr-8">and </span><span
class="cmtt-8">psb</span><span
class="cmtt-8">_precbl </span><span
class="cmr-8">are still supported for backward</span>
<span
class="cmr-8">equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is</span>
<span
class="cmr-8">non-symmetric we may have one-way interactions, and these could cause a situation in which a</span>
<span
class="cmr-8">boundary point is not a halo point for its neighbour.</span></div>
class="cmr-8">compatibility</span></div>
</body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.0 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

@ -10,306 +10,346 @@
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 105--><div class="crosslinks"><p class="noindent">[<a
<!--l. 106--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse1.html" >next</a>] [<a
href="#tailuserhtmlli1.html">tail</a>] [<a
href="userhtml.html#userhtmlli1.html" >up</a>] </p></div>
<h3 class="likesectionHead"><a
id="x2-1000"></a>Contents</h3>
<div class="tableofcontents">
<span class="sectionToc" >1 <a
&#x00A0;<span class="sectionToc" >1 <a
href="userhtmlse1.html#x3-20001">Introduction</a></span>
<br /> <span class="sectionToc" >2 <a
<br /> &#x00A0;<span class="sectionToc" >2 <a
href="userhtmlse2.html#x4-30002">General overview</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.1 <a
href="userhtmlsu1.html#x6-40002.1">Basic Nomenclature</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.2 <a
href="userhtmlsu2.html#x8-50002.2">Library contents</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.3 <a
href="userhtmlsu3.html#x9-60002.3">Application structure</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >2.3.1 <a
href="userhtmlsu3.html#x9-70002.3.1" id="QQ2-9-9">User-defined index mappings</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.4 <a
href="userhtmlsu4.html#x11-80002.4">Programming model</a></span>
<br /> <span class="sectionToc" >3 <a
href="userhtmlse3.html#x12-90003">Data Structures and Classes</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.1 <a
href="userhtmlsu5.html#x13-100003.1">Descriptor data structure</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.1 <a
href="userhtmlsu5.html#x13-110003.1.1" id="QQ2-13-14">Descriptor Methods</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.2 <a
href="userhtmlsu5.html#x13-120003.1.2" id="QQ2-13-15">get_local_rows &#8212; Get number of local rows</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.3 <a
href="userhtmlsu5.html#x13-130003.1.3" id="QQ2-13-16">get_local_cols &#8212; Get number of local cols</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.4 <a
href="userhtmlsu5.html#x13-140003.1.4" id="QQ2-13-17">get_global_rows &#8212; Get number of global rows</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.5 <a
href="userhtmlsu5.html#x13-150003.1.5" id="QQ2-13-18">get_global_cols &#8212; Get number of global cols</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.6 <a
href="userhtmlsu5.html#x13-160003.1.6" id="QQ2-13-19">get_global_indices &#8212; Get vector of global indices</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.7 <a
href="userhtmlsu5.html#x13-170003.1.7" id="QQ2-13-20">get_context &#8212; Get communication context</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.8 <a
href="userhtmlsu5.html#x13-180003.1.8" id="QQ2-13-21">Clone &#8212; clone current object</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.9 <a
href="userhtmlsu5.html#x13-190003.1.9" id="QQ2-13-22">CNV &#8212; convert internal storage format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.10 <a
href="userhtmlsu5.html#x13-200003.1.10" id="QQ2-13-23">psb_cd_get_large_threshold &#8212; Get threshold for index mapping switch</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.11 <a
href="userhtmlsu5.html#x13-210003.1.11" id="QQ2-13-24">psb_cd_set_large_threshold &#8212; Set threshold for index mapping switch</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.12 <a
href="userhtmlsu5.html#x13-220003.1.12" id="QQ2-13-25">get_p_adjcncy &#8212; Get process adjacency list</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.13 <a
href="userhtmlsu5.html#x13-230003.1.13" id="QQ2-13-26">set_p_adjcncy &#8212; Set process adjacency list</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.14 <a
href="userhtmlsu5.html#x13-240003.1.14" id="QQ2-13-27">fnd_owner &#8212; Find the owner process of a set of indices</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.15 <a
href="userhtmlsu5.html#x13-250003.1.15" id="QQ2-13-28">Named Constants</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.2 <a
href="userhtmlsu6.html#x14-260003.2">Sparse Matrix class</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.1 <a
href="userhtmlsu6.html#x14-270003.2.1" id="QQ2-14-31">Sparse Matrix Methods</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.2 <a
href="userhtmlsu6.html#x14-280003.2.2" id="QQ2-14-32">get_nrows &#8212; Get number of rows in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.3 <a
href="userhtmlsu6.html#x14-290003.2.3" id="QQ2-14-33">get_ncols &#8212; Get number of columns in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.4 <a
href="userhtmlsu6.html#x14-300003.2.4" id="QQ2-14-34">get_nnzeros &#8212; Get number of nonzero elements in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.5 <a
href="userhtmlsu6.html#x14-310003.2.5" id="QQ2-14-35">get_size &#8212; Get maximum number of nonzero elements in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.6 <a
href="userhtmlsu6.html#x14-320003.2.6" id="QQ2-14-36">sizeof &#8212; Get memory occupation in bytes of a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.7 <a
href="userhtmlsu6.html#x14-330003.2.7" id="QQ2-14-37">get_fmt &#8212; Short description of the dynamic type</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.8 <a
href="userhtmlsu6.html#x14-340003.2.8" id="QQ2-14-38">is_bld, is_upd, is_asb &#8212; Status check</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.9 <a
href="userhtmlsu6.html#x14-350003.2.9" id="QQ2-14-39">is_lower, is_upper, is_triangle, is_unit &#8212; Format check</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.10 <a
href="userhtmlsu6.html#x14-360003.2.10" id="QQ2-14-40">cscnv &#8212; Convert to a different storage format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.11 <a
href="userhtmlsu6.html#x14-370003.2.11" id="QQ2-14-41">csclip &#8212; Reduce to a submatrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.12 <a
href="userhtmlsu6.html#x14-380003.2.12" id="QQ2-14-42">clean_zeros &#8212; Eliminate zero coefficients</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.13 <a
href="userhtmlsu6.html#x14-390003.2.13" id="QQ2-14-43">get_diag &#8212; Get main diagonal</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.14 <a
href="userhtmlsu6.html#x14-400003.2.14" id="QQ2-14-44">clip_diag &#8212; Cut out main diagonal</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >2.1 <a
href="userhtmlse2.html#x4-40002.1" id="QQ2-4-5">Basic Nomenclature</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >2.2 <a
href="userhtmlse2.html#x4-50002.2" id="QQ2-4-7">Library contents</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >2.3 <a
href="userhtmlse2.html#x4-60002.3" id="QQ2-4-8">Application structure</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >2.3.1 <a
href="userhtmlse2.html#x4-70002.3.1" id="QQ2-4-9">User-defined index mappings</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >2.4 <a
href="userhtmlse2.html#x4-80002.4" id="QQ2-4-10">Programming model</a></span>
<br /> &#x00A0;<span class="sectionToc" >3 <a
href="userhtmlse3.html#x8-90003">Data Structures and Classes</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.1 <a
href="userhtmlse3.html#x8-100003.1" id="QQ2-8-12">Descriptor data structure</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.1 <a
href="userhtmlse3.html#x8-110003.1.1" id="QQ2-8-14">Descriptor Methods</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.2 <a
href="userhtmlse3.html#x8-120003.1.2" id="QQ2-8-15">get_local_rows &#8212; Get number of local rows</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.3 <a
href="userhtmlse3.html#x8-130003.1.3" id="QQ2-8-16">get_local_cols &#8212; Get number of local cols</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.4 <a
href="userhtmlse3.html#x8-140003.1.4" id="QQ2-8-17">get_global_rows &#8212; Get number of global rows</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.5 <a
href="userhtmlse3.html#x8-150003.1.5" id="QQ2-8-18">get_global_cols &#8212; Get number of global cols</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.6 <a
href="userhtmlse3.html#x8-160003.1.6" id="QQ2-8-19">get_global_indices &#8212; Get vector of global indices</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.7 <a
href="userhtmlse3.html#x8-170003.1.7" id="QQ2-8-20">get_context &#8212; Get communication context</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.8 <a
href="userhtmlse3.html#x8-180003.1.8" id="QQ2-8-21">Clone &#8212; clone current object</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.9 <a
href="userhtmlse3.html#x8-190003.1.9" id="QQ2-8-22">CNV &#8212; convert internal storage format</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.10 <a
href="userhtmlse3.html#x8-200003.1.10" id="QQ2-8-23">psb_cd_get_large_threshold &#8212; Get threshold for index mapping switch</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.11 <a
href="userhtmlse3.html#x8-210003.1.11" id="QQ2-8-24">psb_cd_set_large_threshold &#8212; Set threshold for index mapping switch</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.12 <a
href="userhtmlse3.html#x8-220003.1.12" id="QQ2-8-25">get_p_adjcncy &#8212; Get process adjacency list</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.13 <a
href="userhtmlse3.html#x8-230003.1.13" id="QQ2-8-26">set_p_adjcncy &#8212; Set process adjacency list</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.14 <a
href="userhtmlse3.html#x8-240003.1.14" id="QQ2-8-27">fnd_owner &#8212; Find the owner process of a set of indices</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.1.15 <a
href="userhtmlse3.html#x8-250003.1.15" id="QQ2-8-28">Named Constants</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.2 <a
href="userhtmlse3.html#x8-260003.2" id="QQ2-8-29">Sparse Matrix class</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.1 <a
href="userhtmlse3.html#x8-270003.2.1" id="QQ2-8-31">Sparse Matrix Methods</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.2 <a
href="userhtmlse3.html#x8-280003.2.2" id="QQ2-8-32">get_nrows &#8212; Get number of rows in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.15 <a
href="userhtmlsu6.html#x14-410003.2.15" id="QQ2-14-45">tril &#8212; Return the lower triangle</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.16 <a
href="userhtmlsu6.html#x14-420003.2.16" id="QQ2-14-46">triu &#8212; Return the upper triangle</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.17 <a
href="userhtmlsu6.html#x14-430003.2.17" id="QQ2-14-47">psb_set_mat_default &#8212; Set default storage format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.18 <a
href="userhtmlsu6.html#x14-440003.2.18" id="QQ2-14-48">clone &#8212; Clone current object</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.19 <a
href="userhtmlsu6.html#x14-450003.2.19" id="QQ2-14-49">Named Constants</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.3 <a
href="userhtmlsu7.html#x15-460003.3">Dense Vector Data Structure</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.1 <a
href="userhtmlsu7.html#x15-470003.3.1" id="QQ2-15-52">Vector Methods</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.2 <a
href="userhtmlsu7.html#x15-480003.3.2" id="QQ2-15-53">get_nrows &#8212; Get number of rows in a dense vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.3 <a
href="userhtmlsu7.html#x15-490003.3.3" id="QQ2-15-54">sizeof &#8212; Get memory occupation in bytes of a dense vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.4 <a
href="userhtmlsu7.html#x15-500003.3.4" id="QQ2-15-55">set &#8212; Set contents of the vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.5 <a
href="userhtmlsu7.html#x15-510003.3.5" id="QQ2-15-56">get_vect &#8212; Get a copy of the vector contents</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.6 <a
href="userhtmlsu7.html#x15-520003.3.6" id="QQ2-15-57">clone &#8212; Clone current object</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.4 <a
href="userhtmlsu8.html#x16-530003.4">Preconditioner data structure</a></span>
<br /> &#x00A0;<span class="subsectionToc" >3.5 <a
href="userhtmlsu9.html#x17-540003.5">Heap data structure</a></span>
<br /> <span class="sectionToc" >4 <a
href="userhtmlse4.html#x18-550004">Computational routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.1 <a
href="userhtmlsu10.html#x19-560004.1">psb_geaxpby &#8212; General Dense Matrix Sum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.2 <a
href="userhtmlsu11.html#x20-570004.2">psb_gedot &#8212; Dot Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.3 <a
href="userhtmlsu12.html#x21-580004.3">psb_gedots &#8212; Generalized Dot Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.4 <a
href="userhtmlsu13.html#x22-590004.4">psb_normi &#8212; Infinity-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.5 <a
href="userhtmlsu14.html#x23-600004.5">psb_geamaxs &#8212; Generalized Infinity Norm</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.6 <a
href="userhtmlsu15.html#x24-610004.6">psb_norm1 &#8212; 1-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.7 <a
href="userhtmlsu16.html#x25-620004.7">psb_geasums &#8212; Generalized 1-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.8 <a
href="userhtmlsu17.html#x26-630004.8">psb_norm2 &#8212; 2-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.9 <a
href="userhtmlsu18.html#x27-640004.9">psb_genrm2s &#8212; Generalized 2-Norm of Vector</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.10 <a
href="userhtmlsu19.html#x28-650004.10">psb_norm1 &#8212; 1-Norm of Sparse Matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.11 <a
href="userhtmlsu20.html#x29-660004.11">psb_normi &#8212; Infinity Norm of Sparse Matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.12 <a
href="userhtmlsu21.html#x30-670004.12">psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.13 <a
href="userhtmlsu22.html#x31-680004.13">psb_spsm &#8212; Triangular System Solve</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.14 <a
href="userhtmlsu23.html#x32-690004.14">psb_gemlt &#8212; Entrywise Product</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.15 <a
href="userhtmlsu24.html#x33-700004.15">psb_gediv &#8212; Entrywise Division</a></span>
<br /> &#x00A0;<span class="subsectionToc" >4.16 <a
href="userhtmlsu25.html#x34-710004.16">psb_geinv &#8212; Entrywise Inversion</a></span>
<br /> <span class="sectionToc" >5 <a
href="userhtmlse5.html#x35-720005">Communication routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.1 <a
href="userhtmlsu26.html#x36-730005.1">psb_halo &#8212; Halo Data Communication</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.2 <a
href="userhtmlsu27.html#x37-740005.2">psb_ovrl &#8212; Overlap Update</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.3 <a
href="userhtmlsu28.html#x38-750005.3">psb_gather &#8212; Gather Global Dense Matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >5.4 <a
href="userhtmlsu29.html#x39-760005.4">psb_scatter &#8212; Scatter Global Dense Matrix</a></span>
<br /> <span class="sectionToc" >6 <a
href="userhtmlse6.html#x40-770006">Data management routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.1 <a
href="userhtmlsu30.html#x41-780006.1">psb_cdall &#8212; Allocates a communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.2 <a
href="userhtmlsu31.html#x42-790006.2">psb_cdins &#8212; Communication descriptor insert routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.3 <a
href="userhtmlsu32.html#x43-800006.3">psb_cdasb &#8212; Communication descriptor assembly routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.4 <a
href="userhtmlsu33.html#x44-810006.4">psb_cdcpy &#8212; Copies a communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.5 <a
href="userhtmlsu34.html#x45-820006.5">psb_cdfree &#8212; Frees a communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.6 <a
href="userhtmlsu35.html#x46-830006.6">psb_cdbldext &#8212; Build an extended communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.7 <a
href="userhtmlsu36.html#x47-840006.7">psb_spall &#8212; Allocates a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.8 <a
href="userhtmlsu37.html#x48-850006.8">psb_spins &#8212; Insert a set of coefficients into a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.9 <a
href="userhtmlsu38.html#x49-860006.9">psb_spasb &#8212; Sparse matrix assembly routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.10 <a
href="userhtmlsu39.html#x50-870006.10">psb_spfree &#8212; Frees a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.11 <a
href="userhtmlsu40.html#x51-880006.11">psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.12 <a
href="userhtmlsu41.html#x52-890006.12">psb_geall &#8212; Allocates a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.13 <a
href="userhtmlsu42.html#x53-900006.13">psb_geins &#8212; Dense matrix insertion routine</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.3 <a
href="userhtmlse3.html#x8-290003.2.3" id="QQ2-8-33">get_ncols &#8212; Get number of columns in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.4 <a
href="userhtmlse3.html#x8-300003.2.4" id="QQ2-8-34">get_nnzeros &#8212; Get number of nonzero elements in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.5 <a
href="userhtmlse3.html#x8-310003.2.5" id="QQ2-8-35">get_size &#8212; Get maximum number of nonzero elements in a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.6 <a
href="userhtmlse3.html#x8-320003.2.6" id="QQ2-8-36">sizeof &#8212; Get memory occupation in bytes of a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.7 <a
href="userhtmlse3.html#x8-330003.2.7" id="QQ2-8-37">get_fmt &#8212; Short description of the dynamic type</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.8 <a
href="userhtmlse3.html#x8-340003.2.8" id="QQ2-8-38">is_bld, is_upd, is_asb &#8212; Status check</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.9 <a
href="userhtmlse3.html#x8-350003.2.9" id="QQ2-8-39">is_lower, is_upper, is_triangle, is_unit &#8212; Format check</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.10 <a
href="userhtmlse3.html#x8-360003.2.10" id="QQ2-8-40">cscnv &#8212; Convert to a different storage format</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.11 <a
href="userhtmlse3.html#x8-370003.2.11" id="QQ2-8-41">csclip &#8212; Reduce to a submatrix</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.12 <a
href="userhtmlse3.html#x8-380003.2.12" id="QQ2-8-42">clean_zeros &#8212; Eliminate zero coefficients</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.13 <a
href="userhtmlse3.html#x8-390003.2.13" id="QQ2-8-43">get_diag &#8212; Get main diagonal</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.14 <a
href="userhtmlse3.html#x8-400003.2.14" id="QQ2-8-44">clip_diag &#8212; Cut out main diagonal</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.15 <a
href="userhtmlse3.html#x8-410003.2.15" id="QQ2-8-45">tril &#8212; Return the lower triangle</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.16 <a
href="userhtmlse3.html#x8-420003.2.16" id="QQ2-8-46">triu &#8212; Return the upper triangle</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.17 <a
href="userhtmlse3.html#x8-430003.2.17" id="QQ2-8-47">psb_set_mat_default &#8212; Set default storage format</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.18 <a
href="userhtmlse3.html#x8-440003.2.18" id="QQ2-8-48">clone &#8212; Clone current object</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.2.19 <a
href="userhtmlse3.html#x8-450003.2.19" id="QQ2-8-49">Named Constants</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.3 <a
href="userhtmlse3.html#x8-460003.3" id="QQ2-8-50">Dense Vector Data Structure</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.1 <a
href="userhtmlse3.html#x8-470003.3.1" id="QQ2-8-52">Vector Methods</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.2 <a
href="userhtmlse3.html#x8-480003.3.2" id="QQ2-8-53">get_nrows &#8212; Get number of rows in a dense vector</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.3 <a
href="userhtmlse3.html#x8-490003.3.3" id="QQ2-8-54">sizeof &#8212; Get memory occupation in bytes of a dense vector</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.4 <a
href="userhtmlse3.html#x8-500003.3.4" id="QQ2-8-55">set &#8212; Set contents of the vector</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.5 <a
href="userhtmlse3.html#x8-510003.3.5" id="QQ2-8-56">get_vect &#8212; Get a copy of the vector contents</a></span>
<br /> &#x00A0;&#x00A0;&#x00A0;<span class="subsubsectionToc" >3.3.6 <a
href="userhtmlse3.html#x8-520003.3.6" id="QQ2-8-57">clone &#8212; Clone current object</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.4 <a
href="userhtmlse3.html#x8-530003.4" id="QQ2-8-58">Preconditioner data structure</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >3.5 <a
href="userhtmlse3.html#x8-540003.5" id="QQ2-8-60">Heap data structure</a></span>
<br /> &#x00A0;<span class="sectionToc" >4 <a
href="userhtmlse4.html#x9-550004">Computational routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.1 <a
href="userhtmlse4.html#x9-560004.1" id="QQ2-9-62">psb_geaxpby &#8212; General Dense Matrix Sum</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.2 <a
href="userhtmlse4.html#x9-570004.2" id="QQ2-9-64">psb_gedot &#8212; Dot Product</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.3 <a
href="userhtmlse4.html#x9-580004.3" id="QQ2-9-66">psb_gedots &#8212; Generalized Dot Product</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.4 <a
href="userhtmlse4.html#x9-590004.4" id="QQ2-9-68">psb_normi &#8212; Infinity-Norm of Vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.5 <a
href="userhtmlse4.html#x9-600004.5" id="QQ2-9-70">psb_geamaxs &#8212; Generalized Infinity Norm</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.6 <a
href="userhtmlse4.html#x9-610004.6" id="QQ2-9-72">psb_norm1 &#8212; 1-Norm of Vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.7 <a
href="userhtmlse4.html#x9-620004.7" id="QQ2-9-74">psb_geasums &#8212; Generalized 1-Norm of Vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.8 <a
href="userhtmlse4.html#x9-630004.8" id="QQ2-9-76">psb_norm2 &#8212; 2-Norm of Vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.9 <a
href="userhtmlse4.html#x9-640004.9" id="QQ2-9-78">psb_genrm2s &#8212; Generalized 2-Norm of Vector</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.10 <a
href="userhtmlse4.html#x9-650004.10" id="QQ2-9-80">psb_norm1 &#8212; 1-Norm of Sparse Matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.11 <a
href="userhtmlse4.html#x9-660004.11" id="QQ2-9-82">psb_normi &#8212; Infinity Norm of Sparse Matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.12 <a
href="userhtmlse4.html#x9-670004.12" id="QQ2-9-84">psb_spmm &#8212; Sparse Matrix by Dense Matrix Product</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.13 <a
href="userhtmlse4.html#x9-680004.13" id="QQ2-9-86">psb_spsm &#8212; Triangular System Solve</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.14 <a
href="userhtmlse4.html#x9-690004.14" id="QQ2-9-88">psb_gemlt &#8212; Entrywise Product</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.15 <a
href="userhtmlse4.html#x9-700004.15" id="QQ2-9-90">psb_gediv &#8212; Entrywise Division</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >4.16 <a
href="userhtmlse4.html#x9-710004.16" id="QQ2-9-92">psb_geinv &#8212; Entrywise Inversion</a></span>
<br /> &#x00A0;<span class="sectionToc" >5 <a
href="userhtmlse5.html#x10-720005">Communication routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >5.1 <a
href="userhtmlse5.html#x10-730005.1" id="QQ2-10-95">psb_halo &#8212; Halo Data Communication</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >5.2 <a
href="userhtmlse5.html#x10-740005.2" id="QQ2-10-98">psb_ovrl &#8212; Overlap Update</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >5.3 <a
href="userhtmlse5.html#x10-750005.3" id="QQ2-10-101">psb_gather &#8212; Gather Global Dense Matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >5.4 <a
href="userhtmlse5.html#x10-760005.4" id="QQ2-10-103">psb_scatter &#8212; Scatter Global Dense Matrix</a></span>
<br /> &#x00A0;<span class="sectionToc" >6 <a
href="userhtmlse6.html#x11-770006">Data management routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.1 <a
href="userhtmlse6.html#x11-780006.1" id="QQ2-11-106">psb_cdall &#8212; Allocates a communication descriptor</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.14 <a
href="userhtmlsu43.html#x54-910006.14">psb_geasb &#8212; Assembly a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.15 <a
href="userhtmlsu44.html#x55-920006.15">psb_gefree &#8212; Frees a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.16 <a
href="userhtmlsu45.html#x56-930006.16">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.17 <a
href="userhtmlsu46.html#x57-940006.17">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.18 <a
href="userhtmlsu47.html#x58-950006.18">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.19 <a
href="userhtmlsu48.html#x59-960006.19">psb_is_owned &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.20 <a
href="userhtmlsu49.html#x60-970006.20">psb_owned_index &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.21 <a
href="userhtmlsu50.html#x61-980006.21">psb_is_local &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.22 <a
href="userhtmlsu51.html#x62-990006.22">psb_local_index &#8212; </a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.23 <a
href="userhtmlsu52.html#x63-1000006.23">psb_get_boundary &#8212; Extract list of boundary elements</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.24 <a
href="userhtmlsu53.html#x64-1010006.24">psb_get_overlap &#8212; Extract list of overlap elements</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.25 <a
href="userhtmlsu54.html#x65-1020006.25">psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.26 <a
href="userhtmlsu55.html#x66-1030006.26">psb_sizeof &#8212; Memory occupation</a></span>
<br /> &#x00A0;<span class="subsectionToc" >6.27 <a
href="userhtmlsu56.html#x67-1040006.27">Sorting utilities &#8212; </a></span>
<br /> <span class="sectionToc" >7 <a
href="userhtmlse7.html#x68-1050007">Parallel environment routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.1 <a
href="userhtmlsu57.html#x69-1060007.1">psb_init &#8212; Initializes PSBLAS parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.2 <a
href="userhtmlsu58.html#x70-1070007.2">psb_info &#8212; Return information about PSBLAS parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.3 <a
href="userhtmlsu59.html#x71-1080007.3">psb_exit &#8212; Exit from PSBLAS parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.4 <a
href="userhtmlsu60.html#x72-1090007.4">psb_get_mpi_comm &#8212; Get the MPI communicator</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.5 <a
href="userhtmlsu61.html#x73-1100007.5">psb_get_mpi_rank &#8212; Get the MPI rank</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.6 <a
href="userhtmlsu62.html#x74-1110007.6">psb_wtime &#8212; Wall clock timing</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.7 <a
href="userhtmlsu63.html#x75-1120007.7">psb_barrier &#8212; Sinchronization point parallel environment</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.8 <a
href="userhtmlsu64.html#x76-1130007.8">psb_abort &#8212; Abort a computation</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.9 <a
href="userhtmlsu65.html#x77-1140007.9">psb_bcast &#8212; Broadcast data</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.10 <a
href="userhtmlsu66.html#x78-1150007.10">psb_sum &#8212; Global sum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.11 <a
href="userhtmlsu67.html#x79-1160007.11">psb_max &#8212; Global maximum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.12 <a
href="userhtmlsu68.html#x80-1170007.12">psb_min &#8212; Global minimum</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.13 <a
href="userhtmlsu69.html#x81-1180007.13">psb_amx &#8212; Global maximum absolute value</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.14 <a
href="userhtmlsu70.html#x82-1190007.14">psb_amn &#8212; Global minimum absolute value</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.15 <a
href="userhtmlsu71.html#x83-1200007.15">psb_nrm2 &#8212; Global 2-norm reduction</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.16 <a
href="userhtmlsu72.html#x84-1210007.16">psb_snd &#8212; Send data</a></span>
<br /> &#x00A0;<span class="subsectionToc" >7.17 <a
href="userhtmlsu73.html#x85-1220007.17">psb_rcv &#8212; Receive data</a></span>
<br /> <span class="sectionToc" >8 <a
href="userhtmlse8.html#x86-1230008">Error handling</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.1 <a
href="userhtmlsu74.html#x87-1240008.1">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.2 <a
href="userhtmlsu75.html#x88-1250008.2">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.3 <a
href="userhtmlsu76.html#x89-1260008.3">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.4 <a
href="userhtmlsu77.html#x90-1270008.4">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
<br /> <span class="sectionToc" >9 <a
href="userhtmlse9.html#x91-1280009">Utilities</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.1 <a
href="userhtmlsu78.html#x92-1290009.1"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.2 <a
href="userhtmlsu79.html#x93-1300009.2">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.3 <a
href="userhtmlsu80.html#x94-1310009.3">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.4 <a
href="userhtmlsu81.html#x95-1320009.4">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.5 <a
href="userhtmlsu82.html#x96-1330009.5">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.6 <a
href="userhtmlsu83.html#x97-1340009.6">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
<br /> <span class="sectionToc" >10 <a
href="userhtmlse10.html#x98-13500010">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.1 <a
href="userhtmlsu84.html#x99-13600010.1">init &#8212; Initialize a preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.2 <a
href="userhtmlsu85.html#x101-13700010.2">build &#8212; Builds a preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.3 <a
href="userhtmlsu86.html#x102-13800010.3">apply &#8212; Preconditioner application routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.4 <a
href="userhtmlsu87.html#x103-13900010.4">descr &#8212; Prints a description of current preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.5 <a
href="userhtmlsu88.html#x104-14000010.5">clone &#8212; clone current preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.2 <a
href="userhtmlse6.html#x11-790006.2" id="QQ2-11-107">psb_cdins &#8212; Communication descriptor insert routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.3 <a
href="userhtmlse6.html#x11-800006.3" id="QQ2-11-108">psb_cdasb &#8212; Communication descriptor assembly routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.4 <a
href="userhtmlse6.html#x11-810006.4" id="QQ2-11-109">psb_cdcpy &#8212; Copies a communication descriptor</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.5 <a
href="userhtmlse6.html#x11-820006.5" id="QQ2-11-110">psb_cdfree &#8212; Frees a communication descriptor</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.6 <a
href="userhtmlse6.html#x11-830006.6" id="QQ2-11-111">psb_cdbldext &#8212; Build an extended communication descriptor</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.7 <a
href="userhtmlse6.html#x11-840006.7" id="QQ2-11-112">psb_spall &#8212; Allocates a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.8 <a
href="userhtmlse6.html#x11-850006.8" id="QQ2-11-113">psb_spins &#8212; Insert a set of coefficients into a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.9 <a
href="userhtmlse6.html#x11-860006.9" id="QQ2-11-114">psb_spasb &#8212; Sparse matrix assembly routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.10 <a
href="userhtmlse6.html#x11-870006.10" id="QQ2-11-115">psb_spfree &#8212; Frees a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.11 <a
href="userhtmlse6.html#x11-880006.11" id="QQ2-11-116">psb_sprn &#8212; Reinit sparse matrix structure for psblas routines.</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.12 <a
href="userhtmlse6.html#x11-890006.12" id="QQ2-11-117">psb_geall &#8212; Allocates a dense matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.13 <a
href="userhtmlse6.html#x11-900006.13" id="QQ2-11-118">psb_geins &#8212; Dense matrix insertion routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.14 <a
href="userhtmlse6.html#x11-910006.14" id="QQ2-11-119">psb_geasb &#8212; Assembly a dense matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.15 <a
href="userhtmlse6.html#x11-920006.15" id="QQ2-11-120">psb_gefree &#8212; Frees a dense matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.16 <a
href="userhtmlse6.html#x11-930006.16" id="QQ2-11-121">psb_gelp &#8212; Applies a left permutation to a dense matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.17 <a
href="userhtmlse6.html#x11-940006.17" id="QQ2-11-122">psb_glob_to_loc &#8212; Global to local indices convertion</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.18 <a
href="userhtmlse6.html#x11-950006.18" id="QQ2-11-123">psb_loc_to_glob &#8212; Local to global indices conversion</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.19 <a
href="userhtmlse6.html#x11-960006.19" id="QQ2-11-124">psb_is_owned &#8212; </a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.20 <a
href="userhtmlse6.html#x11-970006.20" id="QQ2-11-125">psb_owned_index &#8212; </a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.21 <a
href="userhtmlse6.html#x11-980006.21" id="QQ2-11-126">psb_is_local &#8212; </a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.22 <a
href="userhtmlse6.html#x11-990006.22" id="QQ2-11-127">psb_local_index &#8212; </a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.23 <a
href="userhtmlse6.html#x11-1000006.23" id="QQ2-11-128">psb_get_boundary &#8212; Extract list of boundary elements</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.24 <a
href="userhtmlse6.html#x11-1010006.24" id="QQ2-11-129">psb_get_overlap &#8212; Extract list of overlap elements</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.25 <a
href="userhtmlse6.html#x11-1020006.25" id="QQ2-11-130">psb_sp_getrow &#8212; Extract row(s) from a sparse matrix</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.26 <a
href="userhtmlse6.html#x11-1030006.26" id="QQ2-11-131">psb_sizeof &#8212; Memory occupation</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >6.27 <a
href="userhtmlse6.html#x11-1040006.27" id="QQ2-11-132">Sorting utilities &#8212; </a></span>
<br /> &#x00A0;<span class="sectionToc" >7 <a
href="userhtmlse7.html#x12-1050007">Parallel environment routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.1 <a
href="userhtmlse7.html#x12-1060007.1" id="QQ2-12-134">psb_init &#8212; Initializes PSBLAS parallel environment</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.2 <a
href="userhtmlse7.html#x12-1070007.2" id="QQ2-12-135">psb_info &#8212; Return information about PSBLAS parallel environment</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.3 <a
href="userhtmlse7.html#x12-1080007.3" id="QQ2-12-136">psb_exit &#8212; Exit from PSBLAS parallel environment</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.4 <a
href="userhtmlse7.html#x12-1090007.4" id="QQ2-12-137">psb_get_mpi_comm &#8212; Get the MPI communicator</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.5 <a
href="userhtmlse7.html#x12-1100007.5" id="QQ2-12-138">psb_get_mpi_rank &#8212; Get the MPI rank</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.6 <a
href="userhtmlse7.html#x12-1110007.6" id="QQ2-12-139">psb_wtime &#8212; Wall clock timing</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.7 <a
href="userhtmlse7.html#x12-1120007.7" id="QQ2-12-140">psb_barrier &#8212; Sinchronization point parallel environment</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.8 <a
href="userhtmlse7.html#x12-1130007.8" id="QQ2-12-141">psb_abort &#8212; Abort a computation</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.9 <a
href="userhtmlse7.html#x12-1140007.9" id="QQ2-12-142">psb_bcast &#8212; Broadcast data</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.10 <a
href="userhtmlse7.html#x12-1150007.10" id="QQ2-12-143">psb_sum &#8212; Global sum</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.11 <a
href="userhtmlse7.html#x12-1160007.11" id="QQ2-12-144">psb_max &#8212; Global maximum</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.12 <a
href="userhtmlse7.html#x12-1170007.12" id="QQ2-12-145">psb_min &#8212; Global minimum</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.13 <a
href="userhtmlse7.html#x12-1180007.13" id="QQ2-12-146">psb_amx &#8212; Global maximum absolute value</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.14 <a
href="userhtmlse7.html#x12-1190007.14" id="QQ2-12-147">psb_amn &#8212; Global minimum absolute value</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.15 <a
href="userhtmlse7.html#x12-1200007.15" id="QQ2-12-148">psb_nrm2 &#8212; Global 2-norm reduction</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.16 <a
href="userhtmlse7.html#x12-1210007.16" id="QQ2-12-149">psb_snd &#8212; Send data</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >7.17 <a
href="userhtmlse7.html#x12-1220007.17" id="QQ2-12-150">psb_rcv &#8212; Receive data</a></span>
<br /> &#x00A0;<span class="sectionToc" >8 <a
href="userhtmlse8.html#x13-1230008">Error handling</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >8.1 <a
href="userhtmlse8.html#x13-1240008.1" id="QQ2-13-154">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >8.2 <a
href="userhtmlse8.html#x13-1250008.2" id="QQ2-13-155">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >8.3 <a
href="userhtmlse8.html#x13-1260008.3" id="QQ2-13-156">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >8.4 <a
href="userhtmlse8.html#x13-1270008.4" id="QQ2-13-157">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
<br /> &#x00A0;<span class="sectionToc" >9 <a
href="userhtmlse9.html#x14-1280009">Utilities</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.6 <a
href="userhtmlsu89.html#x105-14100010.6">free &#8212; Free a preconditioner</a></span>
<br /> <span class="sectionToc" >11 <a
href="userhtmlse11.html#x106-14200011">Iterative Methods</a></span>
<br /> &#x00A0;<span class="subsectionToc" >11.1 <a
href="userhtmlsu90.html#x107-14300011.1">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.1 <a
href="userhtmlse9.html#x14-1290009.1" id="QQ2-14-159"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.2 <a
href="userhtmlse9.html#x14-1300009.2" id="QQ2-14-160">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.3 <a
href="userhtmlse9.html#x14-1310009.3" id="QQ2-14-161">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.4 <a
href="userhtmlse9.html#x14-1320009.4" id="QQ2-14-162">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.5 <a
href="userhtmlse9.html#x14-1330009.5" id="QQ2-14-163">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >9.6 <a
href="userhtmlse9.html#x14-1340009.6" id="QQ2-14-164">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010">Preconditioner routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.1 <a
href="userhtmlse10.html#x15-13600010.1" id="QQ2-15-166">init &#8212; Initialize a preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.2 <a
href="userhtmlse10.html#x15-13700010.2" id="QQ2-15-167">build &#8212; Builds a preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.3 <a
href="userhtmlse10.html#x15-13800010.3" id="QQ2-15-168">apply &#8212; Preconditioner application routine</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.4 <a
href="userhtmlse10.html#x15-13900010.4" id="QQ2-15-169">descr &#8212; Prints a description of current preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.5 <a
href="userhtmlse10.html#x15-14000010.5" id="QQ2-15-170">clone &#8212; clone current preconditioner</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >10.6 <a
href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-171">free &#8212; Free a preconditioner</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011">Iterative Methods</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >11.1 <a
href="userhtmlse11.html#x17-14300011.1" id="QQ2-17-173">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14400012">Extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.1 <a
href="userhtmlse12.html#x19-14500012.1" id="QQ2-19-175">Using the extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.2 <a
href="userhtmlse12.html#x19-14600012.2" id="QQ2-19-176">Extensions&#8217; Data Structures</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.3 <a
href="userhtmlse12.html#x19-14700012.3" id="QQ2-19-179">CPU-class extensions</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" >12.4 <a
href="userhtmlse12.html#x19-15200012.4" id="QQ2-19-188">CUDA-class extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15300013">CUDA Environment Routines</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-191">psb_cuda_init</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-193">psb_cuda_exit</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-195">psb_cuda_DeviceSync</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-197">psb_cuda_getDeviceCount</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-199">psb_cuda_getDevice</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-201">psb_cuda_setDevice</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-203">psb_cuda_DeviceHasUVA</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-205">psb_cuda_WarpSize</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-207">psb_cuda_MultiProcessors</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-209">psb_cuda_MaxThreadsPerMP</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-211">psb_cuda_MaxRegisterPerBlock</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-213">psb_cuda_MemoryClockRate</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-215">psb_cuda_MemoryBusWidth</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsectionToc" ><a
href="userhtmlse13.html#Q1-20-217">psb_cuda_MemoryPeakBandwidth</a></span>
</div>

@ -11,26 +11,16 @@
</head><body
>
<!--l. 2--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse11.html" >prev</a>] [<a
href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a
href="userhtmlsu88.html#tailuserhtmlli2.html">tail</a>] [<a
href="userhtmlse13.html" >prev</a>] [<a
href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
href="#tailuserhtmlli2.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="likesectionHead"><a
id="x109-14400011.1"></a>References</h3>
id="x21-168000"></a>References</h3>
<!--l. 2--><p class="noindent" >
<div class="thebibliography">
<p class="bibitem" ><span class="biblabel">
[1]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span>
<a
id="XDesPat:11"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, S.&#x00A0;Filippone and D.&#x00A0;Rouson <span
class="cmti-10">Design Patterns</span>
<span
class="cmti-10">for Scientific Computations on Sparse Matrices</span>, HPSS 2011, Algorithms
and Programming Tools for Next-Generation High-Performance Scientific
Software, Bordeaux, Sep. 2011
</p>
<p class="bibitem" ><span class="biblabel">
[2]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[1]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XPARA04FOREST"></a>G.&#x00A0;Bella, S.&#x00A0;Filippone, A.&#x00A0;De Maio and M.&#x00A0;Testa, <span
class="cmti-10">A Simulation Model</span>
<span
@ -40,45 +30,45 @@ class="cmti-10">for Forest Fires</span>, in J.&#x00A0;Dongarra, K.&#x00A0;M
2005.
</p>
<p class="bibitem" ><span class="biblabel">
[3]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[2]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="X2007d"></a>A. Buttari, D. di Serafino, P. D&#8217;Ambra, S. Filippone, 2LEV-D2P4:
a package of high-performance preconditioners, Applicable Algebra in
Engineering, Communications and Computing, Volume 18, Number 3, May,
2007, pp. 223-239
</p>
<p class="bibitem" ><span class="biblabel">
[4]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[3]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="X2007c"></a>P. D&#8217;Ambra, S. Filippone, D. Di Serafino On the Development
of PSBLAS-based Parallel Two-level Schwarz Preconditioners Applied
Numerical Mathematics, Elsevier Science, Volume 57, Issues 11-12,
November-December 2007, Pages 1181-1196.
</p>
<p class="bibitem" ><span class="biblabel">
[5]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[4]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS2"></a>Dongarra, J. J., DuCroz, J., Hammarling, S. and Hanson, R., An
Extended Set of Fortran Basic Linear Algebra Subprograms, ACM Trans.
Math. Softw. vol.&#x00A0;14, 1&#8211;17, 1988.
</p>
<p class="bibitem" ><span class="biblabel">
[6]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[5]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS3"></a>Dongarra, J., DuCroz, J., Hammarling, S. and Duff, I., A Set of level
3 Basic Linear Algebra Subprograms, ACM Trans. Math. Softw. vol.&#x00A0;16,
1&#8211;17, 1990.
</p>
<p class="bibitem" ><span class="biblabel">
[7]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[6]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLACS"></a>J.&#x00A0;J.&#x00A0;Dongarra and R.&#x00A0;C.&#x00A0;Whaley, <span
class="cmti-10">A User&#8217;s Guide to the BLACS</span>
<span
class="cmti-10">v.</span><span
class="cmti-10">&#x00A0;1.1</span>, Lapack Working Note 94, Tech.&#x00A0;Rep.&#x00A0;UT-CS-95-281, University of
Tennessee, March 1995 (updated May 1997).
</p>
<p class="bibitem" ><span class="biblabel">
[8]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[7]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xsblas97"></a>I.&#x00A0;Duff, M.&#x00A0;Marrone, G.&#x00A0;Radicati and C.&#x00A0;Vittoli, <span
class="cmti-10">Level 3 Basic Linear</span>
<span
@ -86,7 +76,7 @@ class="cmti-10">Algebra Subprograms for Sparse Matrices: a User Level Interface<
Transactions on Mathematical Software, 23(3), pp.&#x00A0;379&#8211;401, 1997.
</p>
<p class="bibitem" ><span class="biblabel">
[9]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[8]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xsblas02"></a>I.&#x00A0;Duff, M.&#x00A0;Heroux and R.&#x00A0;Pozo, <span
class="cmti-10">An Overview of the Sparse Basic</span>
<span
@ -96,7 +86,7 @@ class="cmti-10">Forum</span>, ACM Transactions on Mathematical Software, 28(2),
2002.
</p>
<p class="bibitem" ><span class="biblabel">
[10]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[9]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XPSBLAS"></a>S.&#x00A0;Filippone and M.&#x00A0;Colajanni, <span
class="cmti-10">PSBLAS: A Library for Parallel</span>
<span
@ -104,7 +94,7 @@ class="cmti-10">Linear Algebra Computation on Sparse Matrices</span>, ACM Trans
Mathematical Software, 26(4), pp.&#x00A0;527&#8211;550, 2000.
</p>
<p class="bibitem" ><span class="biblabel">
[11]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[10]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XSparse03"></a>S.&#x00A0;Filippone and A.&#x00A0;Buttari, <span
class="cmti-10">Object-Oriented Techniques for Sparse</span>
<span
@ -112,7 +102,7 @@ class="cmti-10">Matrix Computations in Fortran 2003</span>, ACM Transactions on
Software, 38(4), 2012.
</p>
<p class="bibitem" ><span class="biblabel">
[12]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[11]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XKIVA3PSBLAS"></a>S.&#x00A0;Filippone, P.&#x00A0;D&#8217;Ambra, M.&#x00A0;Colajanni, <span
class="cmti-10">Using a Parallel Library</span>
<span
@ -123,14 +113,14 @@ class="cmti-10">Linux Clusters</span>, in G.&#x00A0;Joubert, A.&#x00A0;Murli, F.
College Press, 2002.
</p>
<p class="bibitem" ><span class="biblabel">
[13]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[12]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XDesignPatterns"></a> Gamma, E., Helm, R., Johnson, R., and Vlissides, J. 1995. <span
class="cmti-10">Design</span>
<span
class="cmti-10">Patterns: Elements of Reusable Object-Oriented Software</span>. Addison-Wesley.
</p>
<p class="bibitem" ><span class="biblabel">
[14]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[13]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMETIS"></a>Karypis, G. and Kumar, V., <span
class="cmti-10">METIS: Unstructured Graph Partitioning</span>
<span
@ -138,18 +128,18 @@ class="cmti-10">and Sparse Matrix Ordering System</span>. Minneapolis, MN 55455:
of Minnesota, Department of Computer Science, 1995. Internet Address:
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">http://www.cs.umn.edu/~karypis</span></span></span>.
</p>
<p class="bibitem" ><span class="biblabel">
[15]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[14]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XBLAS1"></a>Lawson, C., Hanson, R., Kincaid, D. and Krogh, F., Basic Linear
Algebra Subprograms for Fortran usage, ACM Trans. Math. Softw. vol.&#x00A0;5,
38&#8211;329, 1979.
</p>
<p class="bibitem" ><span class="biblabel">
[16]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[15]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xmachiels"></a>Machiels, L. and Deville, M. <span
class="cmti-10">Fortran 90: An entry to object-oriented</span>
<span
@ -157,12 +147,18 @@ class="cmti-10">programming for the solution of partial differential equations.
Math. Softw. vol.&#x00A0;23, 32&#8211;49.
</p>
<p class="bibitem" ><span class="biblabel">
[17]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
[16]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="Xmetcalf"></a>Metcalf, M., Reid, J. and Cohen, M. <span
class="cmti-10">Fortran 95/2003 explained. </span>Oxford
University Press, 2004.
</p>
<p class="bibitem" ><span class="biblabel">
[17]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XMRC:11"></a>Metcalf, M., Reid, J. and Cohen, M. <span
class="cmti-10">Modern Fortran explained. </span>Oxford
University Press, 2011.
</p>
<p class="bibitem" ><span class="biblabel">
[18]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XRouXiaXu:11"></a>Rouson, D.W.I., Xia, J., Xu, X.: Scientific Software Design: The
Object-Oriented Way. Cambridge University Press (2011)
@ -172,15 +168,42 @@ class="cmti-10">Fortran 95/2003 explained. </span>Oxford
id="XMPI1"></a>M.&#x00A0;Snir, S.&#x00A0;Otto, S.&#x00A0;Huss-Lederman, D.&#x00A0;Walker and J.&#x00A0;Dongarra,
<span
class="cmti-10">MPI: The Complete Reference. Volume 1 - The MPI Core</span>, second edition,
MIT Press, 1998.</p></div>
MIT Press, 1998.
</p>
<p class="bibitem" ><span class="biblabel">
[20]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span>
<a
id="XDesPat:11"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, S.&#x00A0;Filippone and D.&#x00A0;Rouson <span
class="cmti-10">Design Patterns</span>
<span
class="cmti-10">for Scientific Computations on Sparse Matrices</span>, HPSS 2011, Algorithms
and Programming Tools for Next-Generation High-Performance Scientific
Software, Bordeaux, Sep. 2011
</p>
<p class="bibitem" ><span class="biblabel">
[21]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XCaFiRo:2014"></a> Cardellini, V., Filippone, S., and Rouson, D. 2014, Design patterns
for sparse-matrix computations on hybrid CPU/GPU platforms, <span
class="cmti-10">Scientific</span>
<span
class="cmti-10">Programming</span>&#x00A0;<span
class="cmti-10">22,</span>&#x00A0;1, 1&#8211;19.
</p>
<p class="bibitem" ><span class="biblabel">
[22]<span class="bibsp">&#x00A0;&#x00A0;&#x00A0;</span></span><a
id="XOurTechRep"></a>D.&#x00A0;Barbieri, V.&#x00A0;Cardellini, A.&#x00A0;Fanfarillo, S.&#x00A0;Filippone, Three storage
formats for sparse matrices on GPGPUs, Tech. Rep. DICII RR-15.6,
Università di Roma Tor Vergata (February 2015).
</p>
</div>
<!--l. 128--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse11.html" >prev</a>] [<a
href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a
<!--l. 130--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse13.html" >prev</a>] [<a
href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail</a>] [<a
href="userhtmlli2.html" >front</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<!--l. 128--><p class="indent" > <a
<!--l. 130--><p class="indent" > <a
id="tailuserhtmlli2.html"></a>
</body></html>

@ -27,35 +27,35 @@ preprocessing sparse matrices, and contains additional routines for dense matrix
operations. The current implementation of PSBLAS addresses a distributed memory
execution model operating with message passing.
<!--l. 14--><p class="indent" > The PSBLAS library version 3 is implemented in the Fortran&#x00A0;2003&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#Xmetcalf">17</a>]</span>
href="userhtmlli2.html#Xmetcalf">16</a>]</span>
programming language, with reuse and/or adaptation of existing Fortran&#x00A0;77 and
Fortran&#x00A0;95 software, plus a handful of C routines.
<!--l. 19--><p class="indent" > The use of Fortran&#x00A0;2003 offers a number of advantages over Fortran&#x00A0;95, mostly in
the handling of requirements for evolution and adaptation of the library to new
computing architectures and integration of new algorithms. For a detailed discussion
of our design see&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XSparse03">11</a>]</span>; other works discussing advanced programming in Fortran&#x00A0;2003
href="userhtmlli2.html#XSparse03">10</a>]</span>; other works discussing advanced programming in Fortran&#x00A0;2003
include&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XDesPat:11">1</a>,&#x00A0;<a
href="userhtmlli2.html#XDesPat:11">20</a>,&#x00A0;<a
href="userhtmlli2.html#XRouXiaXu:11">18</a>]</span>; sufficient support for Fortran&#x00A0;2003 is now available from many
compilers, including the GNU Fortran compiler from the Free Software Foundation
(as of version 4.8).
<!--l. 30--><p class="indent" > Previous approaches have been based on mixing Fortran&#x00A0;95, with its support for
object-based design, with other languages; these have been advocated by a number of
authors, e.g.&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#Xmachiels">16</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
href="userhtmlli2.html#Xmachiels">15</a>]</span>. Moreover, the Fortran&#x00A0;95 facilities for dynamic memory
management and interface overloading greatly enhance the usability of the PSBLAS
subroutines. In this way, the library can take care of runtime memory requirements
that are quite difficult or even impossible to predict at implementation or
compilation time.
<!--l. 40--><p class="indent" > The presentation of the PSBLAS library follows the general structure of the
proposal for serial Sparse BLAS&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#Xsblas97">8</a>,&#x00A0;<a
href="userhtmlli2.html#Xsblas02">9</a>]</span>, which in its turn is based on the proposal for
href="userhtmlli2.html#Xsblas97">7</a>,&#x00A0;<a
href="userhtmlli2.html#Xsblas02">8</a>]</span>, which in its turn is based on the proposal for
BLAS on dense matrices&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XBLAS1">15</a>,&#x00A0;<a
href="userhtmlli2.html#XBLAS2">5</a>,&#x00A0;<a
href="userhtmlli2.html#XBLAS3">6</a>]</span>.
href="userhtmlli2.html#XBLAS1">14</a>,&#x00A0;<a
href="userhtmlli2.html#XBLAS2">4</a>,&#x00A0;<a
href="userhtmlli2.html#XBLAS3">5</a>]</span>.
<!--l. 45--><p class="indent" > The applicability of sparse iterative solvers to many different areas causes some
terminology problems because the same concept may be denoted through different
names depending on the application area. The PSBLAS features presented in this

@ -13,16 +13,18 @@
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse9.html" >prev</a>] [<a
href="userhtmlse9.html#tailuserhtmlse9.html" >prev-tail</a>] [<a
href="userhtmlsu81.html#tailuserhtmlse10.html">tail</a>] [<a
href="userhtml.html#userhtmlsu86.html" >up</a>] </p></div>
href="userhtmlse7.html#tailuserhtmlse10.html">tail</a>] [<a
href="userhtml.html#userhtmlse13.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">10 </span> <a
id="x98-13500010"></a>Preconditioner routines</h3>
id="x15-13500010"></a>Preconditioner routines</h3>
<!--l. 6--><p class="noindent" >The base PSBLAS library contains the implementation of two simple preconditioning
techniques:
<ul class="itemize1">
<li class="itemize">Diagonal Scaling
<li class="itemize">
<!--l. 9--><p class="noindent" >Diagonal Scaling
</li>
<li class="itemize">Block Jacobi with ILU(0) factorization</li></ul>
<li class="itemize">
<!--l. 10--><p class="noindent" >Block Jacobi with ILU(0) factorization</li></ul>
<!--l. 14--><p class="noindent" >The supporting data type and subroutine interfaces are defined in the module
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_prec_mod</span></span></span>. The old interfaces <span class="obeylines-h"><span class="verb"><span
@ -32,20 +34,673 @@ supported for backward compatibility
<div class="subsectionTOCS">
&#x00A0;<span class="subsectionToc" >10.1 <a
href="userhtmlsu84.html#x99-13600010.1">init &#8212; Initialize a preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.2 <a
href="userhtmlsu85.html#x101-13700010.2">build &#8212; Builds a preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.3 <a
href="userhtmlsu86.html#x102-13800010.3">apply &#8212; Preconditioner application routine</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.4 <a
href="userhtmlsu87.html#x103-13900010.4">descr &#8212; Prints a description of current preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.5 <a
href="userhtmlsu88.html#x104-14000010.5">clone &#8212; clone current preconditioner</a></span>
<br /> &#x00A0;<span class="subsectionToc" >10.6 <a
href="userhtmlsu89.html#x105-14100010.6">free &#8212; Free a preconditioner</a></span>
</div>
<h4 class="subsectionHead"><span class="titlemark">10.1 </span> <a
id="x15-13600010.1"></a>init &#8212; Initialize a preconditioner</h4>
<pre class="verbatim" id="verbatim-97">
call&#x00A0;prec%init(icontxt,ptype,&#x00A0;info)
</pre>
<!--l. 30--><p class="nopar" >
<!--l. 32--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 33--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 33--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 34--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 34--><p class="noindent" >
</dd><dt class="description">
<!--l. 35--><p class="noindent" >
<span
class="cmbx-10">icontxt</span> </dt><dd
class="description">
<!--l. 35--><p class="noindent" >the communication context.<br
class="newline" />Scope:<span
class="cmbx-10">global</span>.<br
class="newline" />Type:<span
class="cmbx-10">required</span>.<br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an integer value.
</dd><dt class="description">
<!--l. 40--><p class="noindent" >
<span
class="cmbx-10">ptype</span> </dt><dd
class="description">
<!--l. 40--><p class="noindent" >the type of preconditioner. Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a character string, see usage notes.
</dd><dt class="description">
<!--l. 53--><p class="noindent" >
<span
class="cmbx-10">On Exit</span> </dt><dd
class="description">
<!--l. 53--><p class="noindent" >
</dd><dt class="description">
<!--l. 55--><p class="noindent" >
<span
class="cmbx-10">prec</span> </dt><dd
class="description">
<!--l. 55--><p class="noindent" >Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a preconditioner data structure <a
href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 60--><p class="noindent" >
<span
class="cmbx-10">info</span> </dt><dd
class="description">
<!--l. 60--><p class="noindent" >Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />Error code: if no error, 0 is returned.</dd></dl>
<!--l. 66--><p class="noindent" ><span
class="cmbx-12">Notes </span>Legal inputs to this subroutine are interpreted depending on the <span
class="cmmi-10">ptype </span>string as
follows<span class="footnote-mark"><a
href="userhtml16.html#fn4x0"><sup class="textsuperscript">4</sup></a></span><a
id="x15-136001f4"></a> :
<dl class="description"><dt class="description">
<!--l. 74--><p class="noindent" >
<span
class="cmbx-10">NONE</span> </dt><dd
class="description">
<!--l. 74--><p class="noindent" >No preconditioning, i.e. the preconditioner is just a copy operator.
</dd><dt class="description">
<!--l. 76--><p class="noindent" >
<span
class="cmbx-10">DIAG</span> </dt><dd
class="description">
<!--l. 76--><p class="noindent" >Diagonal scaling; each entry of the input vector is multiplied by the
reciprocal of the sum of the absolute values of the coefficients in the
corresponding row of matrix <span
class="cmmi-10">A</span>;
</dd><dt class="description">
<!--l. 79--><p class="noindent" >
<span
class="cmbx-10">BJAC</span> </dt><dd
class="description">
<!--l. 79--><p class="noindent" >Precondition by a factorization of the block-diagonal of matrix <span
class="cmmi-10">A</span>, where
block boundaries are determined by the data allocation boundaries
for each process; requires no communication. Only the incomplete
factorization <span
class="cmmi-10">ILU</span>(0) is currently implemented.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">10.2 </span> <a
id="x15-13700010.2"></a>build &#8212; Builds a preconditioner</h4>
<pre class="verbatim" id="verbatim-98">
call&#x00A0;prec%build(a,&#x00A0;desc_a,&#x00A0;info[,amold,vmold,imold])
</pre>
<!--l. 91--><p class="nopar" >
<!--l. 93--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 94--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 94--><p class="noindent" >Synchronous.
</dd><dt class="description">
<!--l. 95--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 95--><p class="noindent" >
</dd><dt class="description">
<!--l. 96--><p class="noindent" >
<span
class="cmbx-10">a</span> </dt><dd
class="description">
<!--l. 96--><p class="noindent" >the system sparse matrix. Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>, target.<br
class="newline" />Specified as: a sparse matrix data structure <a
href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_Tspmat</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 101--><p class="noindent" >
<span
class="cmbx-10">prec</span> </dt><dd
class="description">
<!--l. 101--><p class="noindent" >the preconditioner.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: an already initialized precondtioner data structure
<a
href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span
class="cmtt-10">_type</span></a><br
class="newline" />
</dd><dt class="description">
<!--l. 106--><p class="noindent" >
<span
class="cmbx-10">desc</span><span
class="cmbx-10">_a</span> </dt><dd
class="description">
<!--l. 106--><p class="noindent" >the problem communication descriptor. Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>, target.<br
class="newline" />Specified as: a communication descriptor data structure <a
href="userhtmlse3.html#descdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_desc</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 116--><p class="noindent" >
<span
class="cmbx-10">amold</span> </dt><dd
class="description">
<!--l. 116--><p class="noindent" >The desired dynamic type for the internal matrix storage.<br
class="newline" />Scope: <span
class="cmbx-10">local</span>.<br
class="newline" />Type: <span
class="cmbx-10">optional</span>.<br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an object of a class derived from <a
id="spbasedata"></a><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_base</span><span
class="cmtt-10">_sparse</span><span
class="cmtt-10">_mat</span>.
</dd><dt class="description">
<!--l. 121--><p class="noindent" >
<span
class="cmbx-10">vmold</span> </dt><dd
class="description">
<!--l. 121--><p class="noindent" >The desired dynamic type for the internal vector storage.<br
class="newline" />Scope: <span
class="cmbx-10">local</span>.<br
class="newline" />Type: <span
class="cmbx-10">optional</span>.<br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an object of a class derived from <a
id="vbasedata"></a><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_base</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span>.
</dd><dt class="description">
<!--l. 126--><p class="noindent" >
<span
class="cmbx-10">imold</span> </dt><dd
class="description">
<!--l. 126--><p class="noindent" >The desired dynamic type for the internal integer vector storage.<br
class="newline" />Scope: <span
class="cmbx-10">local</span>.<br
class="newline" />Type: <span
class="cmbx-10">optional</span>.<br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an object of a class derived from (integer)
<a
id="vbasedata"></a><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_base</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span>.</dd></dl>
<!--l. 133--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 134--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 134--><p class="noindent" >
</dd><dt class="description">
<!--l. 135--><p class="noindent" >
<span
class="cmbx-10">prec</span> </dt><dd
class="description">
<!--l. 135--><p class="noindent" >the preconditioner.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a precondtioner data structure <a
href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span
class="cmtt-10">_type</span></a><br
class="newline" />
</dd><dt class="description">
<!--l. 140--><p class="noindent" >
<span
class="cmbx-10">info</span> </dt><dd
class="description">
<!--l. 140--><p class="noindent" >Error code.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<!--l. 146--><p class="noindent" >The <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">amold</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">vmold</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">imold</span></span></span> arguments may be employed to interface with special
devices, such as GPUs and other accelerators.
<h4 class="subsectionHead"><span class="titlemark">10.3 </span> <a
id="x15-13800010.3"></a>apply &#8212; Preconditioner application routine</h4>
<pre class="verbatim" id="verbatim-99">
call&#x00A0;prec%apply(x,y,desc_a,info,trans,work)
call&#x00A0;prec%apply(x,desc_a,info,trans)
</pre>
<!--l. 158--><p class="nopar" >
<!--l. 160--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 161--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 161--><p class="noindent" >Synchronous.
</dd><dt class="description">
<!--l. 162--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 162--><p class="noindent" >
</dd><dt class="description">
<!--l. 163--><p class="noindent" >
<span
class="cmbx-10">prec</span> </dt><dd
class="description">
<!--l. 163--><p class="noindent" >the preconditioner. Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a preconditioner data structure <a
href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 168--><p class="noindent" >
<span
class="cmbx-10">x</span> </dt><dd
class="description">
<!--l. 168--><p class="noindent" >the source vector. Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 173--><p class="noindent" >
<span
class="cmbx-10">desc</span><span
class="cmbx-10">_a</span> </dt><dd
class="description">
<!--l. 173--><p class="noindent" >the problem communication descriptor. Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a communication data structure <a
href="userhtmlse3.html#descdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_desc</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 178--><p class="noindent" >
<span
class="cmbx-10">trans</span> </dt><dd
class="description">
<!--l. 178--><p class="noindent" >Scope: <br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a character.
</dd><dt class="description">
<!--l. 183--><p class="noindent" >
<span
class="cmbx-10">work</span> </dt><dd
class="description">
<!--l. 183--><p class="noindent" >an optional work space Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a double precision array.</dd></dl>
<!--l. 190--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 191--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 191--><p class="noindent" >
</dd><dt class="description">
<!--l. 192--><p class="noindent" >
<span
class="cmbx-10">y</span> </dt><dd
class="description">
<!--l. 192--><p class="noindent" >the destination vector. Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 197--><p class="noindent" >
<span
class="cmbx-10">info</span> </dt><dd
class="description">
<!--l. 197--><p class="noindent" >Error code.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">10.4 </span> <a
id="x15-13900010.4"></a>descr &#8212; Prints a description of current preconditioner</h4>
<pre class="verbatim" id="verbatim-100">
call&#x00A0;prec%descr(info)
call&#x00A0;prec%descr(info,iout,&#x00A0;root)
</pre>
<!--l. 212--><p class="nopar" >
<!--l. 214--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 215--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 215--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 216--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 216--><p class="noindent" >
</dd><dt class="description">
<!--l. 217--><p class="noindent" >
<span
class="cmbx-10">prec</span> </dt><dd
class="description">
<!--l. 217--><p class="noindent" >the preconditioner. Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a preconditioner data structure <a
href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 222--><p class="noindent" >
<span
class="cmbx-10">iout</span> </dt><dd
class="description">
<!--l. 222--><p class="noindent" >output unit. Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an integer number. Default: default output unit.
</dd><dt class="description">
<!--l. 227--><p class="noindent" >
<span
class="cmbx-10">root</span> </dt><dd
class="description">
<!--l. 227--><p class="noindent" >Process from which to print Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an integer number between 0 and <span
class="cmmi-10">np </span><span
class="cmsy-10">- </span>1, in which case
the specified process will print the description, or <span
class="cmsy-10">-</span>1, in which case all
processes will print. Default: 0.
</dd><dt class="description">
<!--l. 234--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 234--><p class="noindent" >
</dd><dt class="description">
<!--l. 235--><p class="noindent" >
<span
class="cmbx-10">info</span> </dt><dd
class="description">
<!--l. 235--><p class="noindent" >Error code.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">10.5 </span> <a
id="x15-14000010.5"></a>clone &#8212; clone current preconditioner</h4>
<pre class="verbatim" id="verbatim-101">
call&#x00A0;&#x00A0;prec%clone(precout,info)
</pre>
<!--l. 248--><p class="nopar" >
<!--l. 250--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 251--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 251--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 252--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 252--><p class="noindent" >
</dd><dt class="description">
<!--l. 253--><p class="noindent" >
<span
class="cmbx-10">prec</span> </dt><dd
class="description">
<!--l. 253--><p class="noindent" >the preconditioner.<br
class="newline" />Scope: <span
class="cmbx-10">local</span>.<br
class="newline" /></dd></dl>
<!--l. 260--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 261--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 261--><p class="noindent" >
</dd><dt class="description">
<!--l. 262--><p class="noindent" >
<span
class="cmbx-10">precout</span> </dt><dd
class="description">
<!--l. 262--><p class="noindent" >A copy of the input object.
</dd><dt class="description">
<!--l. 263--><p class="noindent" >
<span
class="cmbx-10">info</span> </dt><dd
class="description">
<!--l. 263--><p class="noindent" >Return code.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">10.6 </span> <a
id="x15-14100010.6"></a>free &#8212; Free a preconditioner</h4>
<pre class="verbatim" id="verbatim-102">
call&#x00A0;prec%free(info)
</pre>
<!--l. 271--><p class="nopar" >
<!--l. 273--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 274--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 274--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 275--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 275--><p class="noindent" >
</dd><dt class="description">
<!--l. 276--><p class="noindent" >
<span
class="cmbx-10">prec</span> </dt><dd
class="description">
<!--l. 276--><p class="noindent" >the preconditioner.<br
class="newline" />Scope: <span
class="cmbx-10">local</span>.<br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a preconditioner data structure <a
href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 289--><p class="noindent" >
<span
class="cmbx-10">On Exit</span> </dt><dd
class="description">
<!--l. 289--><p class="noindent" >
</dd><dt class="description">
<!--l. 291--><p class="noindent" >
<span
class="cmbx-10">prec</span> </dt><dd
class="description">
<!--l. 291--><p class="noindent" >Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a preconditioner data structure <a
href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 296--><p class="noindent" >
<span
class="cmbx-10">info</span> </dt><dd
class="description">
<!--l. 296--><p class="noindent" >Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />Error code: if no error, 0 is returned.</dd></dl>
<!--l. 302--><p class="noindent" ><span
class="cmbx-12">Notes </span>Releases all internal storage.
@ -58,7 +713,7 @@ href="userhtmlsu89.html#x105-14100010.6">free &#8212; Free a preconditioner</a><
href="userhtmlse9.html" >prev</a>] [<a
href="userhtmlse9.html#tailuserhtmlse9.html" >prev-tail</a>] [<a
href="userhtmlse10.html" >front</a>] [<a
href="userhtml.html#userhtmlsu86.html" >up</a>] </p></div>
href="userhtml.html#userhtmlse13.html" >up</a>] </p></div>
<!--l. 1--><p class="indent" > <a
id="tailuserhtmlse10.html"></a>
</body></html>

@ -13,10 +13,10 @@
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse10.html" >prev</a>] [<a
href="userhtmlse10.html#tailuserhtmlse10.html" >prev-tail</a>] [<a
href="userhtmlsu86.html#tailuserhtmlse11.html">tail</a>] [<a
href="userhtmlse8.html#tailuserhtmlse11.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">11 </span> <a
id="x106-14200011"></a>Iterative Methods</h3>
id="x17-14200011"></a>Iterative Methods</h3>
<!--l. 4--><p class="noindent" >In this chapter we provide routines for preconditioners and iterative methods.
The interfaces for Krylov subspace methods are available in the module
<span class="obeylines-h"><span class="verb"><span
@ -24,18 +24,452 @@ class="cmtt-10">psb_krylov_mod</span></span></span>.
<div class="subsectionTOCS">
&#x00A0;<span class="subsectionToc" >11.1 <a
href="userhtmlsu90.html#x107-14300011.1">psb_krylov &#8212; Krylov Methods Driver Routine</a></span>
</div>
<h4 class="subsectionHead"><span class="titlemark">11.1 </span> <a
id="x17-14300011.1"></a>psb_krylov &#8212; Krylov Methods Driver Routine</h4>
<!--l. 17--><p class="noindent" >This subroutine is a driver that provides a general interface for all the Krylov-Subspace
family methods implemented in PSBLAS version 2.
<!--l. 20--><p class="indent" > The stopping criterion can take the following values:
<dl class="description"><dt class="description">
<!--l. 22--><p class="noindent" >
<span
class="cmbx-10">1</span> </dt><dd
class="description">
<!--l. 22--><p class="noindent" >normwise backward error in the infinity norm; the iteration is stopped
when
<div class="math-display" >
<img
src="userhtml30x.png" alt=" -----&#x2225;ri&#x2225;------
err = (&#x2225;A&#x2225;&#x2225;xi&#x2225;+ &#x2225;b&#x2225;) &#x003C; eps
" class="math-display" ></div>
<!--l. 24--><p class="nopar" >
</dd><dt class="description">
<!--l. 25--><p class="noindent" >
<span
class="cmbx-10">2</span> </dt><dd
class="description">
<!--l. 25--><p class="noindent" >Relative residual in the 2-norm; the iteration is stopped when
<div class="math-display" >
<img
src="userhtml31x.png" alt=" &#x2225;ri&#x2225;-
err = &#x2225;b&#x2225;2 &#x003C; eps
" class="math-display" ></div>
<!--l. 27--><p class="nopar" >
</dd><dt class="description">
<!--l. 28--><p class="noindent" >
<span
class="cmbx-10">3</span> </dt><dd
class="description">
<!--l. 28--><p class="noindent" >Relative residual reduction in the 2-norm; the iteration is stopped when
<div class="math-display" >
<img
src="userhtml32x.png" alt=" &#x2225;ri&#x2225;
err = &#x2225;r0&#x2225;2 &#x003C; eps
" class="math-display" ></div>
<!--l. 30--><p class="nopar" ></dd></dl>
<!--l. 32--><p class="noindent" >The behaviour is controlled by the istop argument (see later). In the above formulae, <span
class="cmmi-10">x</span><sub><span
class="cmmi-7">i</span></sub>
is the tentative solution and <span
class="cmmi-10">r</span><sub><span
class="cmmi-7">i</span></sub> = <span
class="cmmi-10">b </span><span
class="cmsy-10">- </span><span
class="cmmi-10">Ax</span><sub><span
class="cmmi-7">i</span></sub> the corresponding residual at the <span
class="cmmi-10">i</span>-th
iteration.
<!--l. 37-->
<!--l. 2--><div class="crosslinks"><p class="noindent">[<a
<pre class="lstlisting" id="listing-167"><span class="label"><a
id="x17-143001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_krylov</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">method</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">a</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">prec</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">b</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">x</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">eps</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">desc_a</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">info</span></span><span style="color:#000000"><span
class="cmtt-10">,&amp;</span></span>
<span class="label"><a
id="x17-143002r2"></a></span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span
class="cmtt-10">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-10">&amp;</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">itmax</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">iter</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">err</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">itrace</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">irst</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">istop</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"><span
class="cmtt-10">cond</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 42--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 43--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 43--><p class="noindent" >Synchronous.
</dd><dt class="description">
<!--l. 44--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 44--><p class="noindent" >
</dd><dt class="description">
<!--l. 45--><p class="noindent" >
<span
class="cmbx-10">method</span> </dt><dd
class="description">
<!--l. 45--><p class="noindent" >a string that defines the iterative method to be used. Supported values
are:
<dl class="description"><dt class="description">
<!--l. 48--><p class="noindent" >
<span
class="cmbx-10">CG:</span> </dt><dd
class="description">
<!--l. 48--><p class="noindent" >the Conjugate Gradient method;
</dd><dt class="description">
<!--l. 49--><p class="noindent" >
<span
class="cmbx-10">CGS:</span> </dt><dd
class="description">
<!--l. 49--><p class="noindent" >the Conjugate Gradient Stabilized method;
</dd><dt class="description">
<!--l. 51--><p class="noindent" >
<span
class="cmbx-10">GCR:</span> </dt><dd
class="description">
<!--l. 51--><p class="noindent" >the Generalized Conjugate Residual method;
</dd><dt class="description">
<!--l. 52--><p class="noindent" >
<span
class="cmbx-10">FCG:</span> </dt><dd
class="description">
<!--l. 52--><p class="noindent" >the Flexible Conjugate Gradient method<span class="footnote-mark"><a
href="userhtml18.html#fn5x0"><sup class="textsuperscript">5</sup></a></span><a
id="x17-143003f5"></a> ;
</dd><dt class="description">
<!--l. 55--><p class="noindent" >
<span
class="cmbx-10">BICG:</span> </dt><dd
class="description">
<!--l. 55--><p class="noindent" >the Bi-Conjugate Gradient method;
</dd><dt class="description">
<!--l. 56--><p class="noindent" >
<span
class="cmbx-10">BICGSTAB:</span> </dt><dd
class="description">
<!--l. 56--><p class="noindent" >the Bi-Conjugate Gradient Stabilized method;
</dd><dt class="description">
<!--l. 57--><p class="noindent" >
<span
class="cmbx-10">BICGSTABL:</span> </dt><dd
class="description">
<!--l. 57--><p class="noindent" >the Bi-Conjugate Gradient Stabilized method with restarting;
</dd><dt class="description">
<!--l. 58--><p class="noindent" >
<span
class="cmbx-10">RGMRES:</span> </dt><dd
class="description">
<!--l. 58--><p class="noindent" >the Generalized Minimal Residual method with restarting.</dd></dl>
</dd><dt class="description">
<!--l. 60--><p class="noindent" >
<span
class="cmbx-10">a</span> </dt><dd
class="description">
<!--l. 60--><p class="noindent" >the local portion of global sparse matrix <span
class="cmmi-10">A</span>. <br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_Tspmat</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 66--><p class="noindent" >
<span
class="cmbx-10">prec</span> </dt><dd
class="description">
<!--l. 66--><p class="noindent" >The data structure containing the preconditioner.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#precdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_prec</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 71--><p class="noindent" >
<span
class="cmbx-10">b</span> </dt><dd
class="description">
<!--l. 71--><p class="noindent" >The RHS vector. <br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 76--><p class="noindent" >
<span
class="cmbx-10">x</span> </dt><dd
class="description">
<!--l. 76--><p class="noindent" >The initial guess. <br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 81--><p class="noindent" >
<span
class="cmbx-10">eps</span> </dt><dd
class="description">
<!--l. 81--><p class="noindent" >The stopping tolerance. <br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a real number.
</dd><dt class="description">
<!--l. 86--><p class="noindent" >
<span
class="cmbx-10">desc</span><span
class="cmbx-10">_a</span> </dt><dd
class="description">
<!--l. 86--><p class="noindent" >contains data structures for communications.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#descdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_desc</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 91--><p class="noindent" >
<span
class="cmbx-10">itmax</span> </dt><dd
class="description">
<!--l. 91--><p class="noindent" >The maximum number of iterations to perform.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Default: <span
class="cmmi-10">itmax </span>= 1000.<br
class="newline" />Specified as: an integer variable <span
class="cmmi-10">itmax </span><span
class="cmsy-10">&#x2265; </span>1.
</dd><dt class="description">
<!--l. 97--><p class="noindent" >
<span
class="cmbx-10">itrace</span> </dt><dd
class="description">
<!--l. 97--><p class="noindent" >If <span
class="cmmi-10">&#x003E; </span>0 print out an informational message about convergence every <span
class="cmmi-10">itrace</span>
iterations. If = 0 print a message in case of convergence failure.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Default: <span
class="cmmi-10">itrace </span>= <span
class="cmsy-10">-</span>1.<br
class="newline" />
</dd><dt class="description">
<!--l. 104--><p class="noindent" >
<span
class="cmbx-10">irst</span> </dt><dd
class="description">
<!--l. 104--><p class="noindent" >An integer specifying the restart parameter.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span>.<br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Values: <span
class="cmmi-10">irst &#x003E; </span>0. This is employed for the BiCGSTABL or RGMRES methods,
otherwise it is ignored.
</dd><dt class="description">
<!--l. 111--><p class="noindent" >
<span
class="cmbx-10">istop</span> </dt><dd
class="description">
<!--l. 111--><p class="noindent" >An integer specifying the stopping criterion.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span>.<br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Values: 1: use the normwise backward error, 2: use the scaled 2-norm
of the residual, 3: use the residual reduction in the 2-norm. Default:
2.
</dd><dt class="description">
<!--l. 117--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 117--><p class="noindent" >
</dd><dt class="description">
<!--l. 118--><p class="noindent" >
<span
class="cmbx-10">x</span> </dt><dd
class="description">
<!--l. 118--><p class="noindent" >The computed solution. <br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">inout</span>.<br
class="newline" />Specified as: a rank one array or an object of type <a
href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 123--><p class="noindent" >
<span
class="cmbx-10">iter</span> </dt><dd
class="description">
<!--l. 123--><p class="noindent" >The number of iterations performed.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />Returned as: an integer variable.
</dd><dt class="description">
<!--l. 128--><p class="noindent" >
<span
class="cmbx-10">err</span> </dt><dd
class="description">
<!--l. 128--><p class="noindent" >The convergence estimate on exit.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />Returned as: a real number.
</dd><dt class="description">
<!--l. 133--><p class="noindent" >
<span
class="cmbx-10">cond</span> </dt><dd
class="description">
<!--l. 133--><p class="noindent" >An estimate of the condition number of matrix <span
class="cmmi-10">A</span>; only available with the <span
class="cmmi-10">CG</span>
method on real data.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />Returned as: a real number. A correct result will be greater than or
equal to one; if specified for non-real data, or an error occurred, zero is
returned.
</dd><dt class="description">
<!--l. 141--><p class="noindent" >
<span
class="cmbx-10">info</span> </dt><dd
class="description">
<!--l. 141--><p class="noindent" >Error code.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />Intent: <span
class="cmbx-10">out</span>.<br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse10.html" >prev</a>] [<a
href="userhtmlse10.html#tailuserhtmlse10.html" >prev-tail</a>] [<a
href="userhtmlse11.html" >front</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<!--l. 2--><p class="indent" > <a
<!--l. 1--><p class="indent" > <a
id="tailuserhtmlse11.html"></a>
</body></html>

@ -0,0 +1,921 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html >
<head><title>Extensions</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)">
<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)">
<!-- html,3 -->
<meta name="src" content="userhtml.tex">
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse11.html" >prev</a>] [<a
href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a
href="userhtmlse9.html#tailuserhtmlse12.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">12 </span> <a
id="x19-14400012"></a>Extensions</h3>
<!--l. 3--><p class="noindent" >The EXT, CUDA and RSB subdirectories contains a set of extensions to the base
library. The extensions provide additional storage formats beyond the ones already
contained in the base library, as well as interfaces to:
<dl class="description"><dt class="description">
<!--l. 8--><p class="noindent" >
<span
class="cmbx-10">SPGPU</span> </dt><dd
class="description">
<!--l. 8--><p class="noindent" >a CUDA library originally
published as <a
href="https://code.google.com/p/spgpu/" class="url" ><span
class="cmtt-10">https://code.google.com/p/spgpu/</span></a> and now included
in the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">cuda</span></span></span> subdir, for computations on NVIDIA GPUs;
</dd><dt class="description">
<!--l. 11--><p class="noindent" >
<span
class="cmbx-10">LIBRSB</span> </dt><dd
class="description">
<!--l. 11--><p class="noindent" ><a
href="http://sourceforge.net/projects/librsb/" class="url" ><span
class="cmtt-10">http://sourceforge.net/projects/librsb/</span></a>, for computations on
multicore parallel machines.</dd></dl>
<!--l. 14--><p class="noindent" >The infrastructure laid out in the base library to allow for these extensions is detailed in
the references&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XDesPat:11">20</a>,&#x00A0;<a
href="userhtmlli2.html#XCaFiRo:2014">21</a>,&#x00A0;<a
href="userhtmlli2.html#XSparse03">10</a>]</span>; the CUDA-specific data formats are described
in&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XOurTechRep">22</a>]</span>.
<!--l. 19--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">12.1 </span> <a
id="x19-14500012.1"></a>Using the extensions</h4>
<!--l. 21--><p class="noindent" >A sample application using the PSBLAS extensions will contain the following
steps:
<ul class="itemize1">
<li class="itemize">
<!--l. 24--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">USE</span></span></span> the appropriat modules (<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_ext_mod</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cuda_mod</span></span></span>);
</li>
<li class="itemize">
<!--l. 26--><p class="noindent" >Declare a <span
class="cmti-10">mold </span>variable of the necessary type (e.g.
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_d_ell_sparse_mat</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_d_hlg_sparse_mat</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_d_vect_cuda</span></span></span>);
</li>
<li class="itemize">
<!--l. 29--><p class="noindent" >Pass the mold variable to the base library interface where needed to ensure
the appropriate dynamic type.</li></ul>
<!--l. 32--><p class="noindent" >Suppose you want to use the CUDA-enabled ELLPACK data structure; you would use a
piece of code like this (and don&#8217;t forget, you need CUDA-side vectors along with the
matrices):
<div class="center"
>
<!--l. 85--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-103">
program&#x00A0;my_cuda_test
&#x00A0;&#x00A0;use&#x00A0;psb_base_mod
&#x00A0;&#x00A0;use&#x00A0;psb_util_mod
&#x00A0;&#x00A0;use&#x00A0;psb_ext_mod
&#x00A0;&#x00A0;use&#x00A0;psb_cuda_mod
&#x00A0;&#x00A0;type(psb_dspmat_type)&#x00A0;::&#x00A0;a,&#x00A0;agpu
&#x00A0;&#x00A0;type(psb_d_vect_type)&#x00A0;::&#x00A0;x,&#x00A0;xg,&#x00A0;bg
&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;::&#x00A0;xtmp(:)
&#x00A0;&#x00A0;type(psb_d_vect_cuda)&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;::&#x00A0;vmold
&#x00A0;&#x00A0;type(psb_d_elg_sparse_mat)&#x00A0;::&#x00A0;aelg
&#x00A0;&#x00A0;type(psb_ctxt_type)&#x00A0;::&#x00A0;ctxt
&#x00A0;&#x00A0;integer&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;::&#x00A0;iam,&#x00A0;np
&#x00A0;&#x00A0;call&#x00A0;psb_init(ctxt)
&#x00A0;&#x00A0;call&#x00A0;psb_info(ctxt,iam,np)
&#x00A0;&#x00A0;call&#x00A0;psb_cuda_init(ctxt,&#x00A0;iam)
&#x00A0;&#x00A0;!&#x00A0;My&#x00A0;own&#x00A0;home-grown&#x00A0;matrix&#x00A0;generator
&#x00A0;&#x00A0;call&#x00A0;gen_matrix(ctxt,idim,desc_a,a,x,info)
&#x00A0;&#x00A0;if&#x00A0;(info&#x00A0;/=&#x00A0;0)&#x00A0;goto&#x00A0;9999
&#x00A0;&#x00A0;call&#x00A0;a%cscnv(agpu,info,mold=aelg)
&#x00A0;&#x00A0;if&#x00A0;(info&#x00A0;/=&#x00A0;0)&#x00A0;goto&#x00A0;9999
&#x00A0;&#x00A0;xtmp&#x00A0;=&#x00A0;x%get_vect()
&#x00A0;&#x00A0;call&#x00A0;xg%bld(xtmp,mold=vmold)
&#x00A0;&#x00A0;call&#x00A0;bg%bld(size(xtmp),mold=vmold)
&#x00A0;&#x00A0;!&#x00A0;Do&#x00A0;sparse&#x00A0;MV
&#x00A0;&#x00A0;call&#x00A0;psb_spmm(done,agpu,xg,dzero,bg,desc_a,info)
9999&#x00A0;continue
&#x00A0;&#x00A0;if&#x00A0;(info&#x00A0;==&#x00A0;0)&#x00A0;then
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;write(*,*)&#x00A0;&#8217;42&#8217;
&#x00A0;&#x00A0;else
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;write(*,*)&#x00A0;&#8217;Something&#x00A0;went&#x00A0;wrong&#x00A0;&#8217;,info
&#x00A0;&#x00A0;end&#x00A0;if
&#x00A0;&#x00A0;call&#x00A0;psb_cuda_exit()
&#x00A0;&#x00A0;call&#x00A0;psb_exit(ctxt)
&#x00A0;&#x00A0;stop
end&#x00A0;program&#x00A0;my_cuda_test
</pre>
<!--l. 134--><p class="nopar" > </div></div>
<!--l. 139--><p class="indent" > A full example of this strategy can be seen in the <span
class="cmtt-10">test/ext/kernel </span>and
<span
class="cmtt-10">test/cuda/kernel </span>subdirectories, where we provide sample programs to test the
speed of the sparse matrix-vector product with the various data structures included
in the library.
<!--l. 146--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">12.2 </span> <a
id="x19-14600012.2"></a>Extensions&#8217; Data Structures</h4>
<!--l. 150--><p class="noindent" >Access to the facilities provided by the EXT library is mainly achieved through
the data types that are provided within. The data classes are derived from
the base classes in PSBLAS, through the Fortran&#x00A0;2003 mechanism of <span
class="cmti-10">type</span>
<span
class="cmti-10">extension</span>&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XMRC:11">17</a>]</span>.
<!--l. 155--><p class="indent" > The data classes are divided between the general purpose CPU extensions, the
GPU interfaces and the RSB interfaces. In the description we will make use of the
notation introduced in Table&#x00A0;<a
href="#x19-146001r21">21<!--tex4ht:ref: tab:notation --></a>.
<div class="table">
<!--l. 160--><p class="indent" > <a
id="x19-146001r21"></a><hr class="float"><div class="float"
>
<div class="caption"
><span class="id">Table&#x00A0;21: </span><span
class="content">Notation for parameters describing a sparse matrix</span></div><!--tex4ht:label?: x19-146001r21 -->
<div class="center"
>
<!--l. 162--><p class="noindent" >
<div class="tabular"> <table id="TBL-23" class="tabular"
><colgroup id="TBL-23-1g"><col
id="TBL-23-1"><col
id="TBL-23-2"></colgroup><tr
class="hline"><td><hr></td><td><hr></td></tr><tr
style="vertical-align:baseline;" id="TBL-23-1-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-1-1"
class="td11"><span
class="cmr-8">Name </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-1-2"
class="td11"><span
class="cmr-8">Description </span></td>
</tr><tr
class="hline"><td><hr></td><td><hr></td></tr><tr
style="vertical-align:baseline;" id="TBL-23-2-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-2-1"
class="td11"><span
class="cmr-8">M </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-2-2"
class="td11"><span
class="cmr-8">Number of rows in matrix </span></td></tr><tr
style="vertical-align:baseline;" id="TBL-23-3-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-3-1"
class="td11"><span
class="cmr-8">N </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-3-2"
class="td11"><span
class="cmr-8">Number of columns in matrix</span></td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-4-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-4-1"
class="td11"><span
class="cmr-8">NZ </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-4-2"
class="td11"><span
class="cmr-8">Number of nonzeros in matrix </span></td></tr><tr
style="vertical-align:baseline;" id="TBL-23-5-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-5-1"
class="td11"><span
class="cmr-8">AVGNZR </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-5-2"
class="td11"><span
class="cmr-8">Average number of nonzeros per row</span></td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-6-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-6-1"
class="td11"><span
class="cmr-8">MAXNZR</span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-6-2"
class="td11"><span
class="cmr-8">Maximum number of nonzeros per row</span></td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-7-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-7-1"
class="td11"><span
class="cmr-8">NDIAG </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-7-2"
class="td11"><span
class="cmr-8">Numero of nonzero diagonals </span></td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-8-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-8-1"
class="td11"><span
class="cmr-8">AS </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-8-2"
class="td11"><span
class="cmr-8">Coefficients array </span></td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-9-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-9-1"
class="td11"><span
class="cmr-8">IA </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-9-2"
class="td11"><span
class="cmr-8">Row indices array </span></td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-10-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-10-1"
class="td11"><span
class="cmr-8">JA </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-10-2"
class="td11"><span
class="cmr-8">Column indices array </span></td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-11-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-11-1"
class="td11"><span
class="cmr-8">IRP </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-11-2"
class="td11"><span
class="cmr-8">Row start pointers array </span></td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-12-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-12-1"
class="td11"><span
class="cmr-8">JCP </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-12-2"
class="td11"><span
class="cmr-8">Column start pointers array </span></td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-13-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-13-1"
class="td11"><span
class="cmr-8">NZR </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-13-2"
class="td11"><span
class="cmr-8">Number of nonzeros per row array </span></td>
</tr><tr
style="vertical-align:baseline;" id="TBL-23-14-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-14-1"
class="td11"><span
class="cmr-8">OFFSET </span></td><td style="white-space:nowrap; text-align:left;" id="TBL-23-14-2"
class="td11"><span
class="cmr-8">Offset for diagonals </span></td>
</tr><tr
class="hline"><td><hr></td><td><hr></td></tr><tr
style="vertical-align:baseline;" id="TBL-23-15-"><td style="white-space:nowrap; text-align:left;" id="TBL-23-15-1"
class="td11"> </td></tr></table> </div>
</div>
</div><hr class="endfloat" />
</div>
<!--l. 188--><p class="indent" > <hr class="figure"><div class="figure"
>
<a
id="x19-146002r5"></a>
<!--l. 192--><p class="noindent" ><img
src="mat.png" alt="PIC"
width="147" height="147" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;5: </span><span
class="content">Example of sparse matrix</span></div><!--tex4ht:label?: x19-146002r5 -->
<!--l. 198--><p class="indent" > </div><hr class="endfigure">
<h4 class="subsectionHead"><span class="titlemark">12.3 </span> <a
id="x19-14700012.3"></a>CPU-class extensions</h4>
<!--l. 203--><p class="noindent" >
<h5 class="likesubsubsectionHead"><a
id="x19-148000"></a>ELLPACK</h5>
<!--l. 205--><p class="noindent" >The ELLPACK/ITPACK format (shown in Figure&#x00A0;<a
href="#x19-148001r6">6<!--tex4ht:ref: fig:ell --></a>) comprises two 2-dimensional
arrays <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">JA</span></span></span> with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">M</span></span></span> rows and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">MAXNZR</span></span></span> columns, where <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">MAXNZR</span></span></span> is the maximum
number of nonzeros in any row&#x00A0;<span class="cite">[<span
class="cmbx-10">?</span>]</span>. Each row of the arrays <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">JA</span></span></span> contains the
coefficients and column indices; rows shorter than <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">MAXNZR</span></span></span> are padded with zero
coefficients and appropriate column indices, e.g. the last valid one found in the same
row.
<!--l. 215--><p class="indent" > <hr class="figure"><div class="figure"
>
<a
id="x19-148001r6"></a>
<!--l. 219--><p class="noindent" ><img
src="ell.png" alt="PIC"
width="233" height="233" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;6: </span><span
class="content">ELLPACK compression of matrix in Figure&#x00A0;<a
href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-148001r6 -->
<!--l. 225--><p class="indent" > </div><hr class="endfigure">
<a
id="x19-148002r1"></a>
<!--l. 229--><p class="indent" > <hr class="float"><div class="float"
>
<!--l. 231-->
<pre class="lstlisting" id="listing-168"><span class="label"><a
id="x19-148003r1"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">do</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">i</span></span><span style="color:#000000"><span
class="cmtt-9">=1,</span></span><span style="color:#000000"><span
class="cmtt-9">n</span></span>
<span class="label"><a
id="x19-148004r2"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">t</span></span><span style="color:#000000"><span
class="cmtt-9">=0</span></span>
<span class="label"><a
id="x19-148005r3"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">do</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">=1,</span></span><span style="color:#000000"><span
class="cmtt-9">maxnzr</span></span>
<span class="label"><a
id="x19-148006r4"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">t</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">t</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">+</span></span><span style="color:#000000"> </span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">as</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">i</span></span><span style="color:#000000"><span
class="cmtt-9">,</span></span><span style="color:#000000"><span
class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">)*</span></span><span style="color:#000000"><span
class="cmtt-9">x</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">ja</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">i</span></span><span style="color:#000000"><span
class="cmtt-9">,</span></span><span style="color:#000000"><span
class="cmtt-9">j</span></span><span style="color:#000000"><span
class="cmtt-9">))</span></span>
<span class="label"><a
id="x19-148007r5"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">do</span></span>
<span class="label"><a
id="x19-148008r6"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">y</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">i</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">=</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">t</span></span>
<span class="label"><a
id="x19-148009r7"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">do</span></span></pre>
<a
id="x19-148010r1"></a>
<a
id="x19-148011"></a>
<span
class="cmbx-10">Algorithm</span><span
class="cmbx-10">&#x00A0;1:</span>&#x00A0; Matrix-Vector product in ELL format
</div><hr class="endfloat" />
<!--l. 242--><p class="indent" > The matrix-vector product <span
class="cmmi-10">y </span>= <span
class="cmmi-10">Ax </span>can be computed with the code shown in
Alg.&#x00A0;<a
href="#x19-148010r1">1<!--tex4ht:ref: alg:ell --></a>; it costs one memory write per outer iteration, plus three memory reads and
two floating-point operations per inner iteration.
<!--l. 247--><p class="indent" > Unless all rows have exactly the same number of nonzeros, some of the coefficients
in the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> array will be zeros; therefore this data structure will have an overhead both
in terms of memory space and redundant operations (multiplications by zero). The
overhead can be acceptable if:
<ol class="enumerate1" >
<li
class="enumerate" id="x19-148013x1">
<!--l. 253--><p class="noindent" >The maximum number of nonzeros per row is not much larger than the
average;
</li>
<li
class="enumerate" id="x19-148015x2">
<!--l. 255--><p class="noindent" >The regularity of the data structure allows for faster code, e.g. by allowing
vectorization, thereby offsetting the additional storage requirements.</li></ol>
<!--l. 259--><p class="noindent" >In the extreme case where the input matrix has one full row, the ELLPACK
structure would require more memory than the normal 2D array storage. The
ELLPACK storage format was very popular in the vector computing days; in
modern CPUs it is not quite as popular, but it is the basis for many GPU
formats.
<!--l. 265--><p class="indent" > The relevant data type is <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_T_ell_sparse_mat</span></span></span>:
<div class="center"
>
<!--l. 281--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-104">
&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_ell_sparse_mat
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;ITPACK/ELL&#x00A0;format,&#x00A0;extended.
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_),&#x00A0;allocatable&#x00A0;::&#x00A0;irn(:),&#x00A0;ja(:,:),&#x00A0;idiag(:)
&#x00A0;&#x00A0;&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;::&#x00A0;val(:,:)
&#x00A0;&#x00A0;contains
&#x00A0;&#x00A0;&#x00A0;&#x00A0;....
&#x00A0;&#x00A0;end&#x00A0;type&#x00A0;psb_d_ell_sparse_mat
</pre>
<!--l. 295--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a
id="x19-149000"></a>Hacked ELLPACK</h5>
<!--l. 303--><p class="noindent" >The <span
class="cmti-10">hacked ELLPACK </span>(<span
class="cmbx-10">HLL</span>) format alleviates the main problem of the ELLPACK
format, that is, the amount of memory required by padding for sparse matrices in
which the maximum row length is larger than the average.
<!--l. 308--><p class="indent" > The number of elements allocated to padding is
[(<span
class="cmmi-10">m</span><span
class="cmsy-10">*</span><span
class="cmmi-10">maxNR</span>) <span
class="cmsy-10">- </span>(<span
class="cmmi-10">m</span><span
class="cmsy-10">*</span><span
class="cmmi-10">avgNR</span>) = <span
class="cmmi-10">m</span><span
class="cmsy-10">* </span>(<span
class="cmmi-10">maxNR</span><span
class="cmsy-10">-</span><span
class="cmmi-10">avgNR</span>)] for both <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">JA</span></span></span> arrays,
where <span
class="cmmi-10">m </span>is equal to the number of rows of the matrix, <span
class="cmmi-10">maxNR </span>is the maximum
number of nonzero elements in every row and <span
class="cmmi-10">avgNR </span>is the average number of
nonzeros. Therefore a single densely populated row can seriously affect the total size
of the allocation.
<!--l. 317--><p class="indent" > To limit this effect, in the HLL format we break the original matrix into equally
sized groups of rows (called <span
class="cmti-10">hacks</span>), and then store these groups as independent
matrices in ELLPACK format. The groups can be arranged selecting rows in an
arbitrarily manner; indeed, if the rows are sorted by decreasing number of nonzeros
we obtain essentially the JAgged Diagonals format. If the rows are not in the original
order, then an additional vector <span
class="cmti-10">rIdx </span>is required, storing the actual row index for
each row in the data structure.
<!--l. 327--><p class="indent" > The multiple ELLPACK-like buffers are stacked together inside a single, one
dimensional array; an additional vector <span
class="cmti-10">hackOffsets </span>is provided to keep track of the
individual submatrices. All hacks have the same number of rows <span
class="cmti-10">hackSize</span>; hence, the
<span
class="cmti-10">hackOffsets </span>vector is an array of (<span
class="cmmi-10">m&#x2215;hackSize</span>) + 1 elements, each one pointing to
the first index of a submatrix inside the stacked <span
class="cmti-10">cM</span>/<span
class="cmti-10">rP </span>buffers, plus an additional
element pointing past the end of the last block, where the next one would begin. We
thus have the property that the elements of the <span
class="cmmi-10">k</span>-th <span
class="cmti-10">hack </span>are stored between
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">hackOffsets[k]</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">hackOffsets[k+1]</span></span></span>, similarly to what happens in the CSR
format.
<!--l. 342--><p class="indent" > <hr class="figure"><div class="figure"
>
<a
id="x19-149001r7"></a>
<!--l. 346--><p class="noindent" ><img
src="hll.png" alt="PIC"
width="248" height="248" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;7: </span><span
class="content">Hacked ELLPACK compression of matrix in Figure&#x00A0;<a
href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-149001r7 -->
<!--l. 352--><p class="indent" > </div><hr class="endfigure">
<!--l. 354--><p class="indent" > With this data structure a very long row only affects one hack, and therefore the
additional memory is limited to the hack in which the row appears.
<!--l. 358--><p class="indent" > The relevant data type is <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_T_hll_sparse_mat</span></span></span>:
<div class="center"
>
<!--l. 374--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-105">
&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_hll_sparse_mat
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;HLL&#x00A0;format.&#x00A0;(Hacked&#x00A0;ELL)
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_)&#x00A0;::&#x00A0;hksz
&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_),&#x00A0;allocatable&#x00A0;::&#x00A0;irn(:),&#x00A0;ja(:),&#x00A0;idiag(:),&#x00A0;hkoffs(:)
&#x00A0;&#x00A0;&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;::&#x00A0;val(:)
&#x00A0;&#x00A0;contains
&#x00A0;&#x00A0;&#x00A0;....
&#x00A0;&#x00A0;end&#x00A0;type
</pre>
<!--l. 388--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a
id="x19-150000"></a>Diagonal storage</h5>
<!--l. 396--><p class="noindent" >The DIAgonal (DIA) format (shown in Figure&#x00A0;<a
href="#x19-150001r8">8<!--tex4ht:ref: fig:dia --></a>) has a 2-dimensional array <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span>
containing in each column the coefficients along a diagonal of the matrix, and an
integer array <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">OFFSET</span></span></span> that determines where each diagonal starts. The diagonals in <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span>
are padded with zeros as necessary.
<!--l. 402--><p class="indent" > The code to compute the matrix-vector product <span
class="cmmi-10">y </span>= <span
class="cmmi-10">Ax </span>is shown in Alg.&#x00A0;<a
href="#x19-150003r2">2<!--tex4ht:ref: alg:dia --></a>; it
costs one memory read per outer iteration, plus three memory reads, one memory
write and two floating-point operations per inner iteration. The accesses to
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">AS</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">x</span></span></span> are in strict sequential order, therefore no indirect addressing is
required.
<!--l. 409--><p class="indent" > <hr class="figure"><div class="figure"
>
<a
id="x19-150001r8"></a>
<!--l. 413--><p class="noindent" ><img
src="dia.png" alt="PIC"
width="248" height="248" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;8: </span><span
class="content">DIA compression of matrix in Figure&#x00A0;<a
href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-150001r8 -->
<!--l. 419--><p class="indent" > </div><hr class="endfigure">
<a
id="x19-150002r2"></a>
<!--l. 423--><p class="indent" > <hr class="float"><div class="float"
>
<div class="center"
>
<!--l. 437--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-106">
&#x00A0;&#x00A0;&#x00A0;&#x00A0;do&#x00A0;j=1,ndiag
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;if&#x00A0;(offset(j)&#x00A0;&#x003E;&#x00A0;0)&#x00A0;then
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;ir1&#x00A0;=&#x00A0;1;&#x00A0;ir2&#x00A0;=&#x00A0;m&#x00A0;-&#x00A0;offset(j);
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;else
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;ir1&#x00A0;=&#x00A0;1&#x00A0;-&#x00A0;offset(j);&#x00A0;ir2&#x00A0;=&#x00A0;m;
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;end&#x00A0;if
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;do&#x00A0;i=ir1,ir2
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;y(i)&#x00A0;=&#x00A0;y(i)&#x00A0;+&#x00A0;alpha*as(i,j)*x(i+offset(j))
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;end&#x00A0;do
&#x00A0;&#x00A0;&#x00A0;&#x00A0;end&#x00A0;do
</pre>
<!--l. 450--><p class="nopar" > </div></div>
<a
id="x19-150003r2"></a>
<a
id="x19-150004"></a>
<span
class="cmbx-10">Algorithm</span><span
class="cmbx-10">&#x00A0;2:</span>&#x00A0; Matrix-Vector product in DIA format
</div><hr class="endfloat" />
<!--l. 458--><p class="indent" > The relevant data type is <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_T_dia_sparse_mat</span></span></span>:
<div class="center"
>
<!--l. 473--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-107">
&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_dia_sparse_mat
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;DIA&#x00A0;format,&#x00A0;extended.
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_),&#x00A0;allocatable&#x00A0;::&#x00A0;offset(:)
&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_)&#x00A0;::&#x00A0;nzeros
&#x00A0;&#x00A0;&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;::&#x00A0;data(:,:)
&#x00A0;&#x00A0;end&#x00A0;type
</pre>
<!--l. 486--><p class="nopar" > </div></div>
<h5 class="likesubsubsectionHead"><a
id="x19-151000"></a>Hacked DIA</h5>
<!--l. 495--><p class="noindent" >Storage by DIAgonals is an attractive option for matrices whose coefficients are
located on a small set of diagonals, since they do away with storing explicitly the
indices and therefore reduce significantly memory traffic. However, having a few
coefficients outside of the main set of diagonals may significantly increase the
amount of needed padding; moreover, while the DIA code is easily vectorized,
it does not necessarily make optimal use of the memory hierarchy. While
processing each diagonal we are updating entries in the output vector <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">y</span></span></span>,
which is then accessed multiple times; if the vector <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">y</span></span></span> is too large to remain
in the cache memory, the associated cache miss penalty is paid multiple
times.
<!--l. 507--><p class="indent" > The <span
class="cmti-10">hacked DIA </span>(<span
class="cmbx-10">HDIA</span>) format was designed to contain the amount of padding,
by breaking the original matrix into equally sized groups of rows (<span
class="cmti-10">hacks</span>), and then
storing these groups as independent matrices in DIA format. This approach is similar
to that of HLL, and requires using an offset vector for each submatrix. Again,
similarly to HLL, the various submatrices are stacked inside a linear array to
improve memory management. The fact that the matrix is accessed in slices
helps in reducing cache misses, especially regarding accesses to the vector
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">y</span></span></span>.
<!--l. 519--><p class="indent" > An additional vector <span
class="cmti-10">hackOffsets </span>is provided to complete the matrix format; given
that <span
class="cmti-10">hackSize </span>is the number of rows of each hack, the <span
class="cmti-10">hackOffsets </span>vector is made by
an array of (<span
class="cmmi-10">m&#x2215;hackSize</span>) + 1 elements, pointing to the first diagonal offset of a
submatrix inside the stacked <span
class="cmti-10">offsets </span>buffers, plus an additional element equal to the
number of nonzero diagonals in the whole matrix. We thus have the property that
the number of diagonals of the <span
class="cmmi-10">k</span>-th <span
class="cmti-10">hack </span>is given by <span
class="cmti-10">hackOffsets[k+1] -</span>
<span
class="cmti-10">hackOffsets[k]</span>.
<!--l. 529--><p class="indent" > <hr class="figure"><div class="figure"
>
<a
id="x19-151001r9"></a>
<!--l. 533--><p class="noindent" ><img
src="hdia.png" alt="PIC"
width="248" height="248" >
<br /> <div class="caption"
><span class="id">Figure&#x00A0;9: </span><span
class="content">Hacked DIA compression of matrix in Figure&#x00A0;<a
href="#x19-146002r5">5<!--tex4ht:ref: fig:dense --></a></span></div><!--tex4ht:label?: x19-151001r9 -->
<!--l. 539--><p class="indent" > </div><hr class="endfigure">
<!--l. 541--><p class="indent" > The relevant data type is <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_T_hdia_sparse_mat</span></span></span>:
<div class="center"
>
<!--l. 568--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-108">
&#x00A0;&#x00A0;type&#x00A0;pm
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;real(psb_dpk_),&#x00A0;allocatable&#x00A0;&#x00A0;::&#x00A0;data(:,:)
&#x00A0;&#x00A0;end&#x00A0;type&#x00A0;pm
&#x00A0;&#x00A0;type&#x00A0;po
&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_),&#x00A0;allocatable&#x00A0;&#x00A0;::&#x00A0;off(:)
&#x00A0;&#x00A0;end&#x00A0;type&#x00A0;po
&#x00A0;&#x00A0;type,&#x00A0;extends(psb_d_base_sparse_mat)&#x00A0;::&#x00A0;psb_d_hdia_sparse_mat
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!&#x00A0;HDIA&#x00A0;format,&#x00A0;extended.
&#x00A0;&#x00A0;&#x00A0;&#x00A0;!
&#x00A0;&#x00A0;&#x00A0;&#x00A0;type(pm),&#x00A0;allocatable&#x00A0;::&#x00A0;hdia(:)
&#x00A0;&#x00A0;&#x00A0;&#x00A0;type(po),&#x00A0;allocatable&#x00A0;::&#x00A0;offset(:)
&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_)&#x00A0;::&#x00A0;nblocks,&#x00A0;nzeros
&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_ipk_)&#x00A0;::&#x00A0;hack&#x00A0;=&#x00A0;64
&#x00A0;&#x00A0;&#x00A0;&#x00A0;integer(psb_long_int_k_)&#x00A0;::&#x00A0;dim=0
&#x00A0;&#x00A0;contains
&#x00A0;&#x00A0;&#x00A0;....
&#x00A0;&#x00A0;end&#x00A0;type
</pre>
<!--l. 593--><p class="nopar" > </div></div>
<h4 class="subsectionHead"><span class="titlemark">12.4 </span> <a
id="x19-15200012.4"></a>CUDA-class extensions</h4>
<!--l. 4--><p class="noindent" >For computing with CUDA we define a dual memorization strategy in which each
variable on the CPU (&#8220;host&#8221;) side has a GPU (&#8220;device&#8221;) side. When a GPU-type
variable is initialized, the data contained is (usually) the same on both sides. Each
operator invoked on the variable may change the data so that only the host side or
the device side are up-to-date.
<!--l. 11--><p class="indent" > Keeping track of the updates to data in the variables is essential: we want to
perform most computations on the GPU, but we cannot afford the time needed to
move data between the host memory and the device memory because the bandwidth
of the interconnection bus would become the main bottleneck of the computation.
Thus, each and every computational routine in the library is built according to the
following principles:
<ul class="itemize1">
<li class="itemize">
<!--l. 18--><p class="noindent" >If the data type being handled is GPU-enabled, make sure that its device
copy is up to date, perform any arithmetic operation on the GPU, and
if the data has been altered as a result, mark the main-memory copy as
outdated.
</li>
<li class="itemize">
<!--l. 22--><p class="noindent" >The main-memory copy is never updated unless this is requested by the user
either
<dl class="description"><dt class="description">
<!--l. 25--><p class="noindent" >
<span
class="cmbx-10">explicitly</span> </dt><dd
class="description">
<!--l. 25--><p class="noindent" >by invoking a synchronization method;
</dd><dt class="description">
<!--l. 26--><p class="noindent" >
<span
class="cmbx-10">implicitly</span> </dt><dd
class="description">
<!--l. 26--><p class="noindent" >by invoking a method that involves other data items that are not
GPU-enabled, e.g., by assignment ov a vector to a normal array.</dd></dl>
</li></ul>
<!--l. 31--><p class="noindent" >In this way, data items are put on the GPU memory &#8220;on demand&#8221; and remain there as
long as &#8220;normal&#8221; computations are carried out. As an example, the following call to a
matrix-vector product
<div class="center"
>
<!--l. 39--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-109">
&#x00A0;&#x00A0;&#x00A0;&#x00A0;call&#x00A0;psb_spmm(alpha,a,x,beta,y,desc_a,info)
</pre>
<!--l. 43--><p class="nopar" > </div></div>
<!--l. 47--><p class="noindent" >will transparently and automatically be performed on the GPU whenever all three data
inputs <code class="lstinline"><span style="color:#000000">a</span></code>, <code class="lstinline"><span style="color:#000000">x</span></code> and <code class="lstinline"><span style="color:#000000">y</span></code> are GPU-enabled. If a program makes many such calls sequentially,
then
<ul class="itemize1">
<li class="itemize">
<!--l. 52--><p class="noindent" >The first kernel invocation will find the data in main memory, and will
copy it to the GPU memory, thus incurring a significant overhead; the
result is however <span
class="cmti-10">not </span>copied back, and therefore:
</li>
<li class="itemize">
<!--l. 56--><p class="noindent" >Subsequent kernel invocations involving the same vector will find the data
on the GPU side so that they will run at full speed.</li></ul>
<!--l. 60--><p class="noindent" >For all invocations after the first the only data that will have to be transferred to/from
the main memory will be the scalars <code class="lstinline"><span style="color:#000000">alpha</span></code> and <code class="lstinline"><span style="color:#000000">beta</span></code>, and the return code
<code class="lstinline"><span style="color:#000000">info</span></code>.
<!--l. 64--><p class="indent" >
<dl class="description"><dt class="description">
<!--l. 65--><p class="noindent" >
<span
class="cmbx-10">Vectors:</span> </dt><dd
class="description">
<!--l. 65--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_vect_gpu</span></code> provides a GPU-enabled extension of
the inner type <code class="lstinline"><span style="color:#000000">psb_T_base_vect_type</span></code>, and must be used together with
the other inner matrix type to make full use of the GPU computational
capabilities;
</dd><dt class="description">
<!--l. 69--><p class="noindent" >
<span
class="cmbx-10">CSR:</span> </dt><dd
class="description">
<!--l. 69--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_csrg_sparse_mat</span></code> provides an interface to the GPU
version of CSR available in the NVIDIA CuSPARSE library;
</dd><dt class="description">
<!--l. 72--><p class="noindent" >
<span
class="cmbx-10">HYB:</span> </dt><dd
class="description">
<!--l. 72--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hybg_sparse_mat</span></code> provides an interface to the HYB
GPU storage available in the NVIDIA CuSPARSE library. The internal
structure is opaque, hence the host side is just CSR; the HYB data format
is only available up to CUDA version 10.
</dd><dt class="description">
<!--l. 77--><p class="noindent" >
<span
class="cmbx-10">ELL:</span> </dt><dd
class="description">
<!--l. 77--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_elg_sparse_mat</span></code> provides an interface to the
ELLPACK implementation from SPGPU;
</dd><dt class="description">
<!--l. 80--><p class="noindent" >
<span
class="cmbx-10">HLL:</span> </dt><dd
class="description">
<!--l. 80--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hlg_sparse_mat</span></code> provides an interface to the Hacked
ELLPACK implementation from SPGPU;
</dd><dt class="description">
<!--l. 82--><p class="noindent" >
<span
class="cmbx-10">HDIA:</span> </dt><dd
class="description">
<!--l. 82--><p class="noindent" >The data type <code class="lstinline"><span style="color:#000000">psb_T_hdiag_sparse_mat</span></code> provides an interface to the
Hacked DIAgonals implementation from SPGPU;</dd></dl>
<!--l. 87--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse11.html" >prev</a>] [<a
href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail</a>] [<a
href="userhtmlse12.html" >front</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<!--l. 87--><p class="indent" > <a
id="tailuserhtmlse12.html"></a>
</body></html>

@ -0,0 +1,299 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html >
<head><title>CUDA Environment Routines</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="generator" content="TeX4ht (https://tug.org/tex4ht/)">
<meta name="originator" content="TeX4ht (https://tug.org/tex4ht/)">
<!-- html,3 -->
<meta name="src" content="userhtml.tex">
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 87--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse12.html" >prev</a>] [<a
href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail</a>] [<a
href="userhtmlse10.html#tailuserhtmlse13.html">tail</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">13 </span> <a
id="x20-15300013"></a>CUDA Environment Routines</h3>
<!--l. 91--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-154000"></a>psb_cuda_init &#8212; Initializes PSBLAS-CUDA environment</h4>
<a
id="Q1-20-191"></a>
<div class="center"
>
<!--l. 99--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-110">
call&#x00A0;psb_cuda_init(ctxt&#x00A0;[,&#x00A0;device])
</pre>
<!--l. 103--><p class="nopar" > </div></div>
<!--l. 108--><p class="noindent" >This subroutine initializes the PSBLAS-CUDA environment.
<dl class="description"><dt class="description">
<!--l. 110--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 110--><p class="noindent" >Synchronous.
</dd><dt class="description">
<!--l. 111--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 111--><p class="noindent" >
</dd><dt class="description">
<!--l. 112--><p class="noindent" >
<span
class="cmbx-10">device</span> </dt><dd
class="description">
<!--l. 112--><p class="noindent" >ID of CUDA device to attach to.<br
class="newline" />Scope: <span
class="cmbx-10">local</span>.<br
class="newline" />Type: <span
class="cmbx-10">optional</span>.<br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an integer value. &#x00A0;Default: use <code class="lstinline"><span style="color:#000000">mod</span><span style="color:#000000">(</span><span style="color:#000000">iam</span><span style="color:#000000">,</span><span style="color:#000000">ngpu</span><span style="color:#000000">)</span></code> where <code class="lstinline"><span style="color:#000000">iam</span></code> is
the calling process index and <code class="lstinline"><span style="color:#000000">ngpu</span></code> is the total number of CUDA devices
available on the current node.</dd></dl>
<!--l. 123--><p class="noindent" ><span
class="cmbx-12">Notes</span>
<ol class="enumerate1" >
<li
class="enumerate" id="x20-154002x1">
<!--l. 125--><p class="noindent" >A call to this routine must precede any other PSBLAS-CUDA call.</li></ol>
<!--l. 129--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-155000"></a>psb_cuda_exit &#8212; Exit from PSBLAS-CUDA environment</h4>
<a
id="Q1-20-193"></a>
<div class="center"
>
<!--l. 137--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-111">
call&#x00A0;psb_cuda_exit(ctxt)
</pre>
<!--l. 141--><p class="nopar" > </div></div>
<!--l. 146--><p class="noindent" >This subroutine exits from the PSBLAS CUDA context.
<dl class="description"><dt class="description">
<!--l. 148--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 148--><p class="noindent" >Synchronous.
</dd><dt class="description">
<!--l. 149--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 149--><p class="noindent" >
</dd><dt class="description">
<!--l. 150--><p class="noindent" >
<span
class="cmbx-10">ctxt</span> </dt><dd
class="description">
<!--l. 150--><p class="noindent" >the communication context identifying the virtual parallel machine.<br
class="newline" />Scope: <span
class="cmbx-10">global</span>.<br
class="newline" />Type: <span
class="cmbx-10">required</span>.<br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an integer variable.</dd></dl>
<!--l. 161--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-156000"></a>psb_cuda_DeviceSync &#8212; Synchronize CUDA device</h4>
<a
id="Q1-20-195"></a>
<div class="center"
>
<!--l. 169--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-112">
call&#x00A0;psb_cuda_DeviceSync()
</pre>
<!--l. 173--><p class="nopar" > </div></div>
<!--l. 178--><p class="noindent" >This subroutine ensures that all previosly invoked kernels, i.e. all invocation of
CUDA-side code, have completed.
<!--l. 182--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-157000"></a>psb_cuda_getDeviceCount </h4>
<a
id="Q1-20-197"></a>
<div class="center"
>
<!--l. 190--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-113">
ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDeviceCount()
</pre>
<!--l. 194--><p class="nopar" > </div></div>
<!--l. 199--><p class="noindent" >Get number of devices available on current computing node.
<!--l. 201--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-158000"></a>psb_cuda_getDevice </h4>
<a
id="Q1-20-199"></a>
<div class="center"
>
<!--l. 209--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-114">
ngpus&#x00A0;=&#x00A0;&#x00A0;psb_cuda_getDevice()
</pre>
<!--l. 213--><p class="nopar" > </div></div>
<!--l. 218--><p class="noindent" >Get device in use by current process.
<!--l. 220--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-159000"></a>psb_cuda_setDevice </h4>
<a
id="Q1-20-201"></a>
<div class="center"
>
<!--l. 228--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-115">
info&#x00A0;=&#x00A0;psb_cuda_setDevice(dev)
</pre>
<!--l. 232--><p class="nopar" > </div></div>
<!--l. 237--><p class="noindent" >Set device to be used by current process.
<!--l. 239--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-160000"></a>psb_cuda_DeviceHasUVA </h4>
<a
id="Q1-20-203"></a>
<div class="center"
>
<!--l. 247--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-116">
hasUva&#x00A0;=&#x00A0;psb_cuda_DeviceHasUVA()
</pre>
<!--l. 251--><p class="nopar" > </div></div>
<!--l. 256--><p class="noindent" >Returns true if device currently in use supports UVA (Unified Virtual Addressing).
<!--l. 259--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-161000"></a>psb_cuda_WarpSize </h4>
<a
id="Q1-20-205"></a>
<div class="center"
>
<!--l. 267--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-117">
nw&#x00A0;=&#x00A0;psb_cuda_WarpSize()
</pre>
<!--l. 271--><p class="nopar" > </div></div>
<!--l. 276--><p class="noindent" >Returns the warp size.
<!--l. 279--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-162000"></a>psb_cuda_MultiProcessors </h4>
<a
id="Q1-20-207"></a>
<div class="center"
>
<!--l. 287--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-118">
nmp&#x00A0;=&#x00A0;psb_cuda_MultiProcessors()
</pre>
<!--l. 291--><p class="nopar" > </div></div>
<!--l. 296--><p class="noindent" >Returns the number of multiprocessors in the CUDA device.
<!--l. 298--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-163000"></a>psb_cuda_MaxThreadsPerMP </h4>
<a
id="Q1-20-209"></a>
<div class="center"
>
<!--l. 306--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-119">
nt&#x00A0;=&#x00A0;psb_cuda_MaxThreadsPerMP()
</pre>
<!--l. 310--><p class="nopar" > </div></div>
<!--l. 315--><p class="noindent" >Returns the maximum number of threads per multiprocessor.
<!--l. 318--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-164000"></a>psb_cuda_MaxRegistersPerBlock </h4>
<a
id="Q1-20-211"></a>
<div class="center"
>
<!--l. 326--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-120">
nr&#x00A0;=&#x00A0;psb_cuda_MaxRegistersPerBlock()
</pre>
<!--l. 330--><p class="nopar" > </div></div>
<!--l. 335--><p class="noindent" >Returns the maximum number of register per thread block.
<!--l. 338--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-165000"></a>psb_cuda_MemoryClockRate </h4>
<a
id="Q1-20-213"></a>
<div class="center"
>
<!--l. 346--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-121">
cl&#x00A0;=&#x00A0;psb_cuda_MemoryClockRate()
</pre>
<!--l. 350--><p class="nopar" > </div></div>
<!--l. 355--><p class="noindent" >Returns the memory clock rate in KHz, as an integer.
<!--l. 357--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-166000"></a>psb_cuda_MemoryBusWidth </h4>
<a
id="Q1-20-215"></a>
<div class="center"
>
<!--l. 365--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-122">
nb&#x00A0;=&#x00A0;psb_cuda_MemoryBusWidth()
</pre>
<!--l. 369--><p class="nopar" > </div></div>
<!--l. 374--><p class="noindent" >Returns the memory bus width in bits.
<!--l. 376--><p class="noindent" >
<h4 class="likesubsectionHead"><a
id="x20-167000"></a>psb_cuda_MemoryPeakBandwidth </h4>
<a
id="Q1-20-217"></a>
<div class="center"
>
<!--l. 384--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-123">
bw&#x00A0;=&#x00A0;psb_cuda_MemoryPeakBandwidth()
</pre>
<!--l. 388--><p class="nopar" > </div></div>
<!--l. 392--><p class="noindent" >Returns the peak memory bandwidth in MB/s (real double precision).
<!--l. 126--><p class="indent" >
<!--l. 2--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlse12.html" >prev</a>] [<a
href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail</a>] [<a
href="userhtmlse13.html" >front</a>] [<a
href="userhtml.html# " >up</a>] </p></div>
<!--l. 2--><p class="indent" > <a
id="tailuserhtmlse13.html"></a>
</body></html>

@ -11,7 +11,7 @@
</head><body
>
<!--l. 72--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlsu7.html" >next</a>] [<a
href="userhtmlse6.html" >next</a>] [<a
href="userhtmlse1.html" >prev</a>] [<a
href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a
href="#tailuserhtmlse2.html">tail</a>] [<a
@ -35,11 +35,11 @@ process are executed through calls to the serial sparse BLAS subroutines. In a
similar way, the inter-process message exchanges are encapsulated in an
applicaiton layer that has been strongly inspired by the Basic Linear Algebra
Communication Subroutines (BLACS) library&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XBLACS">7</a>]</span>. Usually there is no need to deal
href="userhtmlli2.html#XBLACS">6</a>]</span>. Usually there is no need to deal
directly with MPI; however, in some cases, MPI routines are used directly
to improve efficiency. For further details on our communication layer see
Sec.&#x00A0;<a
href="userhtmlse7.html#x68-1050007">7<!--tex4ht:ref: sec:parenv --></a>.
href="userhtmlse7.html#x12-1050007">7<!--tex4ht:ref: sec:parenv --></a>.
<!--l. 101--><p class="indent" > <hr class="figure"><div class="figure"
>
@ -85,7 +85,7 @@ class="cmtt-10">BLOCK</span></span></span>, as well as completely
arbitrary assignments of equation indices to processes. In particular it is
consistent with the usage of graph partitioning tools commonly available in
the literature, e.g. METIS&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#XMETIS">14</a>]</span>. Dense vectors conform to sparse matrices,
href="userhtmlli2.html#XMETIS">13</a>]</span>. Dense vectors conform to sparse matrices,
that is, the entries of a vector follow the same distribution of the matrix
rows.
<!--l. 146--><p class="indent" > We assume that the sparse matrix is built in parallel, where each process generates
@ -96,18 +96,598 @@ href="userhtml5.html#fn1x0"><sup class="textsuperscript">1</sup></a></span><a
id="x4-3002f1"></a> ,
even though the resulting memory bottleneck would make this option unattractive in
most cases.
<div class="subsectionTOCS">
&#x00A0;<span class="subsectionToc" >2.1 <a
href="userhtmlsu1.html#x6-40002.1">Basic Nomenclature</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.2 <a
href="userhtmlsu2.html#x8-50002.2">Library contents</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.3 <a
href="userhtmlsu3.html#x9-60002.3">Application structure</a></span>
<br /> &#x00A0;&#x00A0;<span class="subsubsectionToc" >2.3.1 <a
href="userhtmlsu3.html#x9-70002.3.1">User-defined index mappings</a></span>
<br /> &#x00A0;<span class="subsectionToc" >2.4 <a
href="userhtmlsu4.html#x11-80002.4">Programming model</a></span>
</div>
<h4 class="subsectionHead"><span class="titlemark">2.1 </span> <a
id="x4-40002.1"></a>Basic Nomenclature</h4>
<!--l. 158--><p class="noindent" >Our computational model implies that the data allocation on the parallel distributed
memory machine is guided by the structure of the physical model, and specifically by
the discretization mesh of the PDE.
<!--l. 163--><p class="indent" > Each point of the discretization mesh will have (at least) one associated
equation/variable, and therefore one index. We say that point <span
class="cmmi-10">i </span><span
class="cmti-10">depends </span>on point <span
class="cmmi-10">j </span>if
the equation for a variable associated with <span
class="cmmi-10">i </span>contains a term in <span
class="cmmi-10">j</span>, or equivalently if
<span
class="cmmi-10">a</span><sub><span
class="cmmi-7">ij</span></sub><span
class="cmmi-10">&#x2260;</span>0. After the partition of the discretization mesh into <span
class="cmti-10">sub-domains </span>assigned
to the parallel processes, we classify the points of a given sub-domain as
following.
<dl class="description"><dt class="description">
<!--l. 172--><p class="noindent" >
<span
class="cmbx-10">Internal.</span> </dt><dd
class="description">
<!--l. 172--><p class="noindent" >An internal point of a given domain <span
class="cmti-10">depends </span>only on points of the same
domain. If all points of a domain are assigned to one process, then
a computational step (e.g., a matrix-vector product) of the equations
associated with the internal points requires no data items from other
domains and no communications.
</dd><dt class="description">
<!--l. 181--><p class="noindent" >
<span
class="cmbx-10">Boundary.</span> </dt><dd
class="description">
<!--l. 181--><p class="noindent" >A point of a given domain is a boundary point if it <span
class="cmti-10">depends </span>on points
belonging to other domains.
</dd><dt class="description">
<!--l. 185--><p class="noindent" >
<span
class="cmbx-10">Halo.</span> </dt><dd
class="description">
<!--l. 185--><p class="noindent" >A halo point for a given domain is a point belonging to another domain
such that there is a boundary point which <span
class="cmti-10">depends </span>on it. Whenever performing
a computational step, such as a matrix-vector product, the values associated
with halo points are requested from other domains. A boundary point of a
given domain is usually a halo point for some other domain<span class="footnote-mark"><a
href="userhtml6.html#fn2x0"><sup class="textsuperscript">2</sup></a></span><a
id="x4-4001f2"></a> ;
therefore the cardinality of the boundary points set denotes the amount
of data sent to other domains.
</dd><dt class="description">
<!--l. 198--><p class="noindent" >
<span
class="cmbx-10">Overlap.</span> </dt><dd
class="description">
<!--l. 198--><p class="noindent" >An overlap point is a boundary point assigned to multiple domains. Any
operation that involves an overlap point has to be replicated for each
assignment.</dd></dl>
<!--l. 202--><p class="noindent" >Overlap points do not usually exist in the basic data distributions; however they are a
feature of Domain Decomposition Schwarz preconditioners which are the subject of
related research work&#x00A0;<span class="cite">[<a
href="userhtmlli2.html#X2007c">3</a>,&#x00A0;<a
href="userhtmlli2.html#X2007d">2</a>]</span>.
<!--l. 207--><p class="indent" > We denote the sets of internal, boundary and halo points for a given subdomain
by <span
class="cmsy-10"><img
src="cmsy10-49.png" alt="I" class="10x-x-49" /></span>, <span
class="cmsy-10"><img
src="cmsy10-42.png" alt="B" class="10x-x-42" /> </span>and <span
class="cmsy-10"><img
src="cmsy10-48.png" alt="H" class="10x-x-48" /></span>. Each subdomain is assigned to one process; each process usually owns
one subdomain, although the user may choose to assign more than one subdomain to
a process. If each process <span
class="cmmi-10">i </span>owns one subdomain, the number of rows in
the local sparse matrix is <span
class="cmsy-10">|<img
src="cmsy10-49.png" alt="I" class="10x-x-49" /></span><sub><span
class="cmmi-7">i</span></sub><span
class="cmsy-10">| </span>+ <span
class="cmsy-10">|<img
src="cmsy10-42.png" alt="B" class="10x-x-42" /></span><sub><span
class="cmmi-7">i</span></sub><span
class="cmsy-10">|</span>, and the number of local columns (i.e.
those for which there exists at least one non-zero entry in the local rows) is
<span
class="cmsy-10">|<img
src="cmsy10-49.png" alt="I" class="10x-x-49" /></span><sub><span
class="cmmi-7">i</span></sub><span
class="cmsy-10">| </span>+ <span
class="cmsy-10">|<img
src="cmsy10-42.png" alt="B" class="10x-x-42" /></span><sub><span
class="cmmi-7">i</span></sub><span
class="cmsy-10">| </span>+ <span
class="cmsy-10">|<img
src="cmsy10-48.png" alt="H" class="10x-x-48" /></span><sub><span
class="cmmi-7">i</span></sub><span
class="cmsy-10">|</span>.
<!--l. 217--><p class="indent" > <hr class="figure"><div class="figure"
>
<a
id="x4-4003r2"></a>
<div class="center"
>
<!--l. 218--><p class="noindent" >
<!--l. 221--><p class="noindent" ><img
src="points.png" alt="PIC"
width="46" height="46" ></div>
<br /> <div class="caption"
><span class="id">Figure&#x00A0;2: </span><span
class="content">Point classfication.</span></div><!--tex4ht:label?: x4-4003r2 -->
<!--l. 227--><p class="indent" > </div><hr class="endfigure">
<!--l. 229--><p class="indent" > This classification of mesh points guides the naming scheme that we adopted in
the library internals and in the data structures. We explicitly note that &#8220;Halo&#8221; points
are also often called &#8220;ghost&#8221; points in the literature.
<h4 class="subsectionHead"><span class="titlemark">2.2 </span> <a
id="x4-50002.2"></a>Library contents</h4>
<!--l. 238--><p class="noindent" >The PSBLAS library consists of various classes of subroutines:
<dl class="description"><dt class="description">
<!--l. 240--><p class="noindent" >
<span
class="cmbx-10">Computational routines</span> </dt><dd
class="description">
<!--l. 240--><p class="noindent" >comprising:
<ul class="itemize1">
<li class="itemize">
<!--l. 242--><p class="noindent" >Sparse matrix by dense matrix product;
</li>
<li class="itemize">
<!--l. 243--><p class="noindent" >Sparse triangular systems solution for block diagonal matrices;
</li>
<li class="itemize">
<!--l. 245--><p class="noindent" >Vector and matrix norms;
</li>
<li class="itemize">
<!--l. 246--><p class="noindent" >Dense matrix sums;
</li>
<li class="itemize">
<!--l. 247--><p class="noindent" >Dot products.</li></ul>
</dd><dt class="description">
<!--l. 249--><p class="noindent" >
<span
class="cmbx-10">Communication routines</span> </dt><dd
class="description">
<!--l. 249--><p class="noindent" >handling halo and overlap communications;
</dd><dt class="description">
<!--l. 251--><p class="noindent" >
<span
class="cmbx-10">Data management and auxiliary routines</span> </dt><dd
class="description">
<!--l. 251--><p class="noindent" >including:
<ul class="itemize1">
<li class="itemize">
<!--l. 253--><p class="noindent" >Parallel environment management
</li>
<li class="itemize">
<!--l. 254--><p class="noindent" >Communication descriptors allocation;
</li>
<li class="itemize">
<!--l. 255--><p class="noindent" >Dense and sparse matrix allocation;
</li>
<li class="itemize">
<!--l. 256--><p class="noindent" >Dense and sparse matrix build and update;
</li>
<li class="itemize">
<!--l. 257--><p class="noindent" >Sparse matrix and data distribution preprocessing.</li></ul>
</dd><dt class="description">
<!--l. 259--><p class="noindent" >
<span
class="cmbx-10">Preconditioner routines</span> </dt><dd
class="description">
<!--l. 259--><p class="noindent" >
</dd><dt class="description">
<!--l. 260--><p class="noindent" >
<span
class="cmbx-10">Iterative methods</span> </dt><dd
class="description">
<!--l. 260--><p class="noindent" >a subset of Krylov subspace iterative methods</dd></dl>
<!--l. 263--><p class="noindent" >The following naming scheme has been adopted for all the symbols internally defined in
the PSBLAS software package:
<ul class="itemize1">
<li class="itemize">
<!--l. 266--><p class="noindent" >all symbols (i.e. subroutine names, data types...) are prefixed by <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_</span></span></span>
</li>
<li class="itemize">
<!--l. 268--><p class="noindent" >all data type names are suffixed by <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">_type</span></span></span>
</li>
<li class="itemize">
<!--l. 269--><p class="noindent" >all constants are suffixed by <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">_</span></span></span>
</li>
<li class="itemize">
<!--l. 270--><p class="noindent" >all top-level subroutine names follow the rule <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_xxname</span></span></span> where <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">xx</span></span></span> can be
either:
<ul class="itemize2">
<li class="itemize">
<!--l. 273--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ge</span></span></span>: the routine is related to dense data,
</li>
<li class="itemize">
<!--l. 274--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">sp</span></span></span>: the routine is related to sparse data,
</li>
<li class="itemize">
<!--l. 275--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">cd</span></span></span>: the routine is related to communication descriptor (see&#x00A0;<a
href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a>).</li></ul>
<!--l. 278--><p class="noindent" >For example the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins</span></span></span> perform the same
action (see&#x00A0;<a
href="userhtmlse6.html#x11-770006">6<!--tex4ht:ref: sec:toolsrout --></a>) on dense matrices, sparse matrices and communication
descriptors respectively. Interface overloading allows the usage of the same
subroutine names for both real and complex data.</li></ul>
<!--l. 285--><p class="noindent" >In the description of the subroutines, arguments or argument entries are classified
as:
<dl class="description"><dt class="description">
<!--l. 288--><p class="noindent" >
<span
class="cmbx-10">global</span> </dt><dd
class="description">
<!--l. 288--><p class="noindent" >For input arguments, the value must be the same on all processes
participating in the subroutine call; for output arguments the value is
guaranteed to be the same.
</dd><dt class="description">
<!--l. 291--><p class="noindent" >
<span
class="cmbx-10">local</span> </dt><dd
class="description">
<!--l. 291--><p class="noindent" >Each process has its own value(s) independently.</dd></dl>
<!--l. 293--><p class="noindent" >To finish our general description, we define a version string with the constant
<div class="math-display" >
<img
src="userhtml0x.png" alt="psb_version_string_
" class="math-display" ></div>
<!--l. 295--><p class="nopar" > whose current value is <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">3.8.0</span></span></span>
<!--l. 298--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">2.3 </span> <a
id="x4-60002.3"></a>Application structure</h4>
<!--l. 301--><p class="noindent" >The main underlying principle of the PSBLAS library is that the library objects are
created and exist with reference to a discretized space to which there corresponds
an index space and a matrix sparsity pattern. As an example, consider a
cell-centered finite-volume discretization of the Navier-Stokes equations on a
simulation domain; the index space 1<span
class="cmmi-10">&#x2026;</span><span
class="cmmi-10">n </span>is isomorphic to the set of cell centers,
whereas the pattern of the associated linear system matrix is isomorphic to the
adjacency graph imposed on the discretization mesh by the discretization
stencil.
<!--l. 311--><p class="indent" > Thus the first order of business is to establish an index space, and this is done
with a call to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span> in which we specify the size of the index space <span
class="cmmi-10">n </span>and the
allocation of the elements of the index space to the various processes making up the
MPI (virtual) parallel machine.
<!--l. 317--><p class="indent" > The index space is partitioned among processes, and this creates a mapping from
the &#8220;global&#8221; numbering 1<span
class="cmmi-10">&#x2026;</span><span
class="cmmi-10">n </span>to a numbering &#8220;local&#8221; to each process; each process <span
class="cmmi-10">i</span>
will own a certain subset 1<span
class="cmmi-10">&#x2026;</span><span
class="cmmi-10">n</span><sub>row<sub><span
class="cmmi-5">i</span></sub></sub>, each element of which corresponds to a certain
element of 1<span
class="cmmi-10">&#x2026;</span><span
class="cmmi-10">n</span>. The user does not set explicitly this mapping; when the application
needs to indicate to which element of the index space a certain item is related,
such as the row and column index of a matrix coefficient, it does so in the
&#8220;global&#8221; numbering, and the library will translate into the appropriate &#8220;local&#8221;
numbering.
<!--l. 327--><p class="indent" > For a given index space 1<span
class="cmmi-10">&#x2026;</span><span
class="cmmi-10">n </span>there are many possible associated topologies, i.e.
many different discretization stencils; thus the description of the index space is not
completed until the user has defined a sparsity pattern, either explicitly through
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins</span></span></span> or implicitly through <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>. The descriptor is finalized with a call to
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span> and a sparse matrix with a call to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span>. After <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span> each
process <span
class="cmmi-10">i </span>will have defined a set of &#8220;halo&#8221; (or &#8220;ghost&#8221;) indices <span
class="cmmi-10">n</span><sub>row<sub><span
class="cmmi-5">i</span></sub></sub> + 1<span
class="cmmi-10">&#x2026;</span><span
class="cmmi-10">n</span><sub>col<sub>
<span
class="cmmi-5">i</span></sub></sub>,
denoting elements of the index space that are <span
class="cmti-10">not </span>assigned to process <span
class="cmmi-10">i</span>; however the
variables associated with them are needed to complete computations associated with
the sparse matrix <span
class="cmmi-10">A</span>, and thus they have to be fetched from (neighbouring)
processes. The descriptor of the index space is built exactly for the purpose
of properly sequencing the communication steps required to achieve this
objective.
<!--l. 343--><p class="indent" > A simple application structure will walk through the index space allocation,
matrix/vector creation and linear system solution as follows:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-6002x1">
<!--l. 347--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span>
</li>
<li
class="enumerate" id="x4-6004x2">
<!--l. 348--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span>
</li>
<li
class="enumerate" id="x4-6006x3">
<!--l. 349--><p class="noindent" >Allocate sparse matrix and dense vectors with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spall</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geall</span></span></span>
</li>
<li
class="enumerate" id="x4-6008x4">
<!--l. 351--><p class="noindent" >Loop over all local rows, generate matrix and vector entries, and insert
them with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span>
</li>
<li
class="enumerate" id="x4-6010x5">
<!--l. 353--><p class="noindent" >Assemble the various entities:
<ol class="enumerate2" >
<li
class="enumerate" id="x4-6012x1">
<!--l. 355--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>
</li>
<li
class="enumerate" id="x4-6014x2">
<!--l. 356--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span>
</li>
<li
class="enumerate" id="x4-6016x3">
<!--l. 357--><p class="noindent" ><span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geasb</span></span></span></li></ol>
</li>
<li
class="enumerate" id="x4-6018x6">
<!--l. 359--><p class="noindent" >Choose the preconditioner to be used with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%init</span></span></span> and build it with
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%build</span></span></span><span class="footnote-mark"><a
href="userhtml7.html#fn3x0"><sup class="textsuperscript">3</sup></a></span><a
id="x4-6019f3"></a> .
</li>
<li
class="enumerate" id="x4-6022x7">
<!--l. 363--><p class="noindent" >Call the iterative driver <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_krylov</span></span></span> with the method of choice, e.g.
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">bicgstab</span></span></span>.</li></ol>
<!--l. 366--><p class="noindent" >This is the structure of the sample programs in the directory <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">test/pargen/</span></span></span>.
<!--l. 369--><p class="indent" > For a simulation in which the same discretization mesh is used over multiple time
steps, the following structure may be more appropriate:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-6024x1">
<!--l. 372--><p class="noindent" >Initialize parallel environment with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_init</span></span></span>
</li>
<li
class="enumerate" id="x4-6026x2">
<!--l. 373--><p class="noindent" >Initialize index space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall</span></span></span>
</li>
<li
class="enumerate" id="x4-6028x3">
<!--l. 374--><p class="noindent" >Loop over the topology of the discretization mesh and build the descriptor
with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins</span></span></span>
</li>
<li
class="enumerate" id="x4-6030x4">
<!--l. 376--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>
</li>
<li
class="enumerate" id="x4-6032x5">
<!--l. 377--><p class="noindent" >Allocate the sparse matrices and dense vectors with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spall</span></span></span> and
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geall</span></span></span>
</li>
<li
class="enumerate" id="x4-6034x6">
<!--l. 379--><p class="noindent" >Loop over the time steps:
<ol class="enumerate2" >
<li
class="enumerate" id="x4-6036x1">
<!--l. 381--><p class="noindent" >If after first time step, reinitialize the sparse matrix with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_sprn</span></span></span>;
also zero out the dense vectors;
</li>
<li
class="enumerate" id="x4-6038x2">
<!--l. 384--><p class="noindent" >Loop over the mesh, generate the coefficients and insert/update them
with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span>
</li>
<li
class="enumerate" id="x4-6040x3">
<!--l. 386--><p class="noindent" >Assemble with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spasb</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geasb</span></span></span>
</li>
<li
class="enumerate" id="x4-6042x4">
<!--l. 387--><p class="noindent" >Choose and build preconditioner with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%init</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">prec%build</span></span></span>
</li>
<li
class="enumerate" id="x4-6044x5">
<!--l. 389--><p class="noindent" >Call the iterative method of choice, e.g. <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_bicgstab</span></span></span></li></ol>
</li></ol>
<!--l. 392--><p class="noindent" >The insertion routines will be called as many times as needed; they only need to be
called on the data that is actually allocated to the current process, i.e. each process
generates its own data.
<!--l. 397--><p class="indent" > In principle there is no specific order in the calls to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>, nor is there a
requirement to build a matrix row in its entirety before calling the routine; this
allows the application programmer to walk through the discretization mesh element
by element, generating the main part of a given matrix row but also contributions to
the rows corresponding to neighbouring elements.
<!--l. 404--><p class="indent" > From a functional point of view it is even possible to execute one call for each
nonzero coefficient; however this would have a substantial computational
overhead. It is therefore advisable to pack a certain amount of data into each
call to the insertion routine, say touching on a few tens of rows; the best
performng value would depend on both the architecture of the computer being
used and on the problem structure. At the opposite extreme, it would be
possible to generate the entire part of a coefficient matrix residing on a
process and pass it in a single call to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>; this, however, would entail a
doubling of memory occupation, and thus would be almost always far from
optimal.
<!--l. 417--><p class="noindent" >
<h5 class="subsubsectionHead"><span class="titlemark">2.3.1 </span> <a
id="x4-70002.3.1"></a>User-defined index mappings</h5>
<!--l. 419--><p class="noindent" >PSBLAS supports user-defined global to local index mappings, subject to the
constraints outlined in sec.&#x00A0;<a
href="#x4-60002.3">2.3<!--tex4ht:ref: sec:appstruct --></a>:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-7002x1">
<!--l. 422--><p class="noindent" >The set of indices owned locally must be mapped to the set 1<span
class="cmmi-10">&#x2026;</span><span
class="cmmi-10">n</span><sub>row<sub><span
class="cmmi-5">i</span></sub></sub>;
</li>
<li
class="enumerate" id="x4-7004x2">
<!--l. 424--><p class="noindent" >The set of halo points must be mapped to the set <span
class="cmmi-10">n</span><sub>row<sub><span
class="cmmi-5">i</span></sub></sub> + 1<span
class="cmmi-10">&#x2026;</span><span
class="cmmi-10">n</span><sub>col<sub>
<span
class="cmmi-5">i</span></sub></sub>;</li></ol>
<!--l. 427--><p class="noindent" >but otherwise the mapping is arbitrary. The user application is responsible to ensure
consistency of this mapping; some errors may be caught by the library, but
this is not guaranteed. The application structure to support this usage is as
follows:
<ol class="enumerate1" >
<li
class="enumerate" id="x4-7006x1">
<!--l. 433--><p class="noindent" >Initialize index
space with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdall(ictx,desc,info,vl=vl,lidx=lidx)</span></span></span> passing the
vectors <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">vl(:)</span></span></span> containing the set of global indices owned by the current
process and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">lidx(:)</span></span></span> containing the corresponding local indices;
</li>
<li
class="enumerate" id="x4-7008x2">
<!--l. 438--><p class="noindent" >Add the halo points <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja(:)</span></span></span> and their associated local indices <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">lidx(:)</span></span></span> with
a(some) call(s) to <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdins(nz,ja,desc,info,lidx=lidx)</span></span></span>;
</li>
<li
class="enumerate" id="x4-7010x3">
<!--l. 441--><p class="noindent" >Assemble the descriptor with <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_cdasb</span></span></span>;
</li>
<li
class="enumerate" id="x4-7012x4">
<!--l. 442--><p class="noindent" >Build the sparse matrices and vectors, optionally making use in <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_spins</span></span></span>
and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_geins</span></span></span> of the <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">local</span></span></span> argument specifying that the indices in <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ia</span></span></span>,
<span class="obeylines-h"><span class="verb"><span
class="cmtt-10">ja</span></span></span> and <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">irw</span></span></span>, respectively, are already local indices.</li></ol>
<!--l. 449--><p class="noindent" >
<h4 class="subsectionHead"><span class="titlemark">2.4 </span> <a
id="x4-80002.4"></a>Programming model</h4>
<!--l. 451--><p class="noindent" >The PSBLAS librarary is based on the Single Program Multiple Data (SPMD)
programming model: each process participating in the computation performs the
same actions on a chunk of data. Parallelism is thus data-driven.
<!--l. 456--><p class="indent" > Because of this structure, many subroutines coordinate their action across the
various processes, thus providing an implicit synchronization point, and therefore
<span
class="cmti-10">must </span>be called simultaneously by all processes participating in the computation. This
is certainly true for the data allocation and assembly routines, for all the
computational routines and for some of the tools routines.
<!--l. 464--><p class="indent" > However there are many cases where no synchronization, and indeed no
communication among processes, is implied; for instance, all the routines in sec.&#x00A0;<a
href="userhtmlse3.html#x8-90003">3<!--tex4ht:ref: sec:datastruct --></a>
are only acting on the local data structures, and thus may be called independently.
The most important case is that of the coefficient insertion routines: since the
number of coefficients in the sparse and dense matrices varies among the processors,
and since the user is free to choose an arbitrary order in builiding the matrix entries,
these routines cannot imply a synchronization.
<!--l. 474--><p class="indent" > Throughout this user&#8217;s guide each subroutine will be clearly indicated
as:
<dl class="description"><dt class="description">
<!--l. 477--><p class="noindent" >
<span
class="cmbx-10">Synchronous:</span> </dt><dd
class="description">
<!--l. 477--><p class="noindent" >must be called simultaneously by all the processes in the relevant
communication context;
</dd><dt class="description">
<!--l. 479--><p class="noindent" >
<span
class="cmbx-10">Asynchronous:</span> </dt><dd
class="description">
<!--l. 479--><p class="noindent" >may be called in a totally independent manner.</dd></dl>
@ -115,7 +695,7 @@ href="userhtmlsu4.html#x11-80002.4">Programming model</a></span>
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlsu7.html" >next</a>] [<a
href="userhtmlse6.html" >next</a>] [<a
href="userhtmlse1.html" >prev</a>] [<a
href="userhtmlse1.html#tailuserhtmlse1.html" >prev-tail</a>] [<a
href="userhtmlse2.html" >front</a>] [<a

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -11,13 +11,13 @@
</head><body
>
<!--l. 3--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlsu80.html" >next</a>] [<a
href="userhtmlse12.html" >next</a>] [<a
href="userhtmlse7.html" >prev</a>] [<a
href="userhtmlse7.html#tailuserhtmlse7.html" >prev-tail</a>] [<a
href="userhtmlsu71.html#tailuserhtmlse8.html">tail</a>] [<a
href="userhtml.html#userhtmlsu76.html" >up</a>] </p></div>
href="userhtmlse5.html#tailuserhtmlse8.html">tail</a>] [<a
href="userhtml.html#userhtmlse11.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">8 </span> <a
id="x86-1230008"></a>Error handling</h3>
id="x13-1230008"></a>Error handling</h3>
<!--l. 5--><p class="noindent" >The PSBLAS library error handling policy has been completely rewritten in version
2.0. The idea behind the design of this new error handling strategy is to keep error
messages on a stack allowing the user to trace back up to the point where the first
@ -36,7 +36,7 @@ zero, an error condition is raised. This process continues on all the levels of
nested calls until the level where the user decides to abort the program
execution.
<!--l. 23--><p class="indent" > Figure&#x00A0;<a
href="#x86-123025r5">5<!--tex4ht:ref: fig:routerr --></a> shows the layout of a generic <span class="obeylines-h"><span class="verb"><span
href="#x13-123025r5">5<!--tex4ht:ref: fig:routerr --></a> shows the layout of a generic <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_foo</span></span></span> routine with respect to the
PSBLAS-2.0 error handling policy. It is possible to see how, whenever an error
condition is detected, the <span class="obeylines-h"><span class="verb"><span
@ -58,7 +58,7 @@ explicitly.
<!--l. 40--><p class="indent" > <a
id="x86-123025r5"></a><hr class="float"><div class="float"
id="x13-123025r5"></a><hr class="float"><div class="float"
>
@ -67,270 +67,241 @@ explicitly.
>
<!--l. 101--><p class="noindent" >
<div class="fbox"><div class="minipage"><!--l. 72-->
<div class="lstlisting" id="listing-5"><span class="label"><a
id="x86-123001r1"></a></span><span
class="cmtt-9">subroutine</span><span
<pre class="lstlisting" id="listing-154"><span class="label"><a
id="x13-123001r1"></a></span><span style="color:#000000"><span
class="cmtt-9">subroutine</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_foo</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">some</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">args</span></span><span style="color:#000000"><span
class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">info</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span>
<span class="label"><a
id="x13-123002r2"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">psb_foo</span><span
class="cmtt-9">(</span><span
class="cmtt-9">some</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">args</span><span
class="cmtt-9">,</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">...</span></span>
<span class="label"><a
id="x13-123003r3"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">info</span><span
class="cmtt-9">)</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123002r2"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">if</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">error</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">detected</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">then</span></span>
<span class="label"><a
id="x13-123004r4"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">...</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123003r3"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">if</span><span
class="cmtt-9">(</span><span
class="cmtt-9">error</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">info</span></span><span style="color:#000000"><span
class="cmtt-9">=</span></span><span style="color:#000000"><span
class="cmtt-9">errcode1</span></span>
<span class="label"><a
id="x13-123005r5"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">detected</span><span
class="cmtt-9">)</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">then</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123004r4"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_errpush</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">&#8217;</span></span><span style="color:#000000"><span
class="cmtt-9">psb_foo</span></span><span style="color:#000000"><span
class="cmtt-9">&#8217;</span></span><span style="color:#000000"><span
class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">errcode1</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span>
<span class="label"><a
id="x13-123006r6"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">info</span><span
class="cmtt-9">=</span><span
class="cmtt-9">errcode1</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123005r5"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">9999</span></span>
<span class="label"><a
id="x13-123007r7"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">if</span></span>
<span class="label"><a
id="x13-123008r8"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">call</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">...</span></span>
<span class="label"><a
id="x13-123009r9"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">psb_errpush</span><span
class="cmtt-9">(</span><span
class="cmtt-9">&#8217;</span><span
class="cmtt-9">psb_foo</span><span
class="cmtt-9">&#8217;</span><span
class="cmtt-9">,</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">errcode1</span><span
class="cmtt-9">)</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123006r6"></a></span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_bar</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">some</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">args</span></span><span style="color:#000000"><span
class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">info</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span>
<span class="label"><a
id="x13-123010r10"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">if</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">info</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">.</span></span><span style="color:#000000"><span
class="cmtt-9">ne</span></span><span style="color:#000000"><span
class="cmtt-9">.</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">zero</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">then</span></span>
<span class="label"><a
id="x13-123011r11"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">goto</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">9999</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123007r7"></a></span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">info</span></span><span style="color:#000000"><span
class="cmtt-9">=</span></span><span style="color:#000000"><span
class="cmtt-9">errcode2</span></span>
<span class="label"><a
id="x13-123012r12"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">end</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">if</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123008r8"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_errpush</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">&#8217;</span></span><span style="color:#000000"><span
class="cmtt-9">psb_foo</span></span><span style="color:#000000"><span
class="cmtt-9">&#8217;</span></span><span style="color:#000000"><span
class="cmtt-9">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">errcode2</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span>
<span class="label"><a
id="x13-123013r13"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">...</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123009r9"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">call</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">goto</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">9999</span></span>
<span class="label"><a
id="x13-123014r14"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">psb_bar</span><span
class="cmtt-9">(</span><span
class="cmtt-9">some</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">args</span><span
class="cmtt-9">,</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">if</span></span>
<span class="label"><a
id="x13-123015r15"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">info</span><span
class="cmtt-9">)</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123010r10"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">...</span></span>
<span class="label"><a
id="x13-123016r16"></a></span><span style="color:#000000"><span
class="cmtt-9">9999</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">continue</span></span>
<span class="label"><a
id="x13-123017r17"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">if</span><span
class="cmtt-9">(</span><span
class="cmtt-9">info</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">if</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">err_act</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">.</span></span><span style="color:#000000"><span
class="cmtt-9">eq</span></span><span style="color:#000000"><span
class="cmtt-9">.</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">act_abort</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">then</span></span>
<span class="label"><a
id="x13-123018r18"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">.</span><span
class="cmtt-9">ne</span><span
class="cmtt-9">.</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">zero</span><span
class="cmtt-9">)</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">then</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123011r11"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_error</span></span><span style="color:#000000"><span
class="cmtt-9">(</span></span><span style="color:#000000"><span
class="cmtt-9">icontxt</span></span><span style="color:#000000"><span
class="cmtt-9">)</span></span>
<span class="label"><a
id="x13-123019r19"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">return</span></span>
<span class="label"><a
id="x13-123020r20"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">info</span><span
class="cmtt-9">=</span><span
class="cmtt-9">errcode2</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123012r12"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">else</span></span>
<span class="label"><a
id="x13-123021r21"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">return</span></span>
<span class="label"><a
id="x13-123022r22"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">call</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">psb_errpush</span><span
class="cmtt-9">(</span><span
class="cmtt-9">&#8217;</span><span
class="cmtt-9">psb_foo</span><span
class="cmtt-9">&#8217;</span><span
class="cmtt-9">,</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">errcode2</span><span
class="cmtt-9">)</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123013r13"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">goto</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">9999</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123014r14"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">end</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">if</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123015r15"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">...</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123016r16"></a></span><span
class="cmtt-9">9999</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">continue</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123017r17"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">if</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">(</span><span
class="cmtt-9">err_act</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">.</span><span
class="cmtt-9">eq</span><span
class="cmtt-9">.</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">act_abort</span><span
class="cmtt-9">)</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">then</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123018r18"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">call</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">psb_error</span><span
class="cmtt-9">(</span><span
class="cmtt-9">icontxt</span><span
class="cmtt-9">)</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123019r19"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">return</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123020r20"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">else</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123021r21"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">return</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123022r22"></a></span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">end</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">if</span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123023r23"></a></span><span
class="cmtt-9">&#x00A0;</span><br /><span class="label"><a
id="x86-123024r24"></a></span><span
class="cmtt-9">end</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">subroutine</span><span
class="cmtt-9">&#x00A0;</span><span
class="cmtt-9">psb_foo</span>
</div> </div> </div>
class="cmtt-9">&#x00A0;</span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">if</span></span>
<span class="label"><a
id="x13-123023r23"></a></span>
<span class="label"><a
id="x13-123024r24"></a></span><span style="color:#000000"><span
class="cmtt-9">end</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">subroutine</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-9">psb_foo</span></span></pre></div></div>
</div>
<br /> <div class="caption"
><span class="id">Listing 5: </span><span
class="content">The layout of a generic <span
class="cmtt-10">psb</span><span
class="cmtt-10">_foo </span>routine with respect to PSBLAS-2.0
error handling policy.</span></div><!--tex4ht:label?: x86-123025r5 -->
error handling policy.</span></div><!--tex4ht:label?: x13-123025r5 -->
</div><hr class="endfloat" />
<!--l. 112--><p class="indent" > Figure&#x00A0;<a
href="#x86-123026r6">6<!--tex4ht:ref: fig:errormsg --></a> reports a sample error message generated by the PSBLAS-2.0
href="#x13-123026r6">6<!--tex4ht:ref: fig:errormsg --></a> reports a sample error message generated by the PSBLAS-2.0
library. This error has been generated by the fact that the user has chosen the
invalid &#8220;FOO&#8221; storage format to represent the sparse matrix. From this
error message it is possible to see that the error has been detected inside
@ -342,7 +313,7 @@ process).
<!--l. 120--><p class="indent" > <a
id="x86-123026r6"></a><hr class="float"><div class="float"
id="x13-123026r6"></a><hr class="float"><div class="float"
>
@ -371,7 +342,7 @@ Aborting...
<br /> <div class="caption"
><span class="id">Listing 6: </span><span
class="content">A sample PSBLAS-3.0 error message. Process 0 detected an error
condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x86-123026r6 -->
condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x13-123026r6 -->
@ -379,16 +350,224 @@ condition inside the psb_cest subroutine</span></div><!--tex4ht:label?: x86-1230
<div class="subsectionTOCS">
&#x00A0;<span class="subsectionToc" >8.1 <a
href="userhtmlsu74.html#x87-1240008.1">psb_errpush &#8212; Pushes an error code onto the error stack</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.2 <a
href="userhtmlsu75.html#x88-1250008.2">psb_error &#8212; Prints the error stack content and aborts execution</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.3 <a
href="userhtmlsu76.html#x89-1260008.3">psb_set_errverbosity &#8212; Sets the verbosity of error messages</a></span>
<br /> &#x00A0;<span class="subsectionToc" >8.4 <a
href="userhtmlsu77.html#x90-1270008.4">psb_set_erraction &#8212; Set the type of action to be taken upon error condition</a></span>
</div>
<h4 class="subsectionHead"><span class="titlemark">8.1 </span> <a
id="x13-1240008.1"></a>psb_errpush &#8212; Pushes an error code onto the error stack</h4>
<!--l. 174-->
<pre class="lstlisting" id="listing-155"><span class="label"><a
id="x13-124001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_errpush</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">err_c</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">r_name</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">i_err</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">a_err</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 178--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 179--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 179--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 180--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 180--><p class="noindent" >
</dd><dt class="description">
<!--l. 181--><p class="noindent" >
<span
class="cmbx-10">err</span><span
class="cmbx-10">_c</span> </dt><dd
class="description">
<!--l. 181--><p class="noindent" >the error code<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an integer.
</dd><dt class="description">
<!--l. 186--><p class="noindent" >
<span
class="cmbx-10">r</span><span
class="cmbx-10">_name</span> </dt><dd
class="description">
<!--l. 186--><p class="noindent" >the soutine where the error has been caught.<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: a string.<br
class="newline" />
</dd><dt class="description">
<!--l. 191--><p class="noindent" >
<span
class="cmbx-10">i</span><span
class="cmbx-10">_err</span> </dt><dd
class="description">
<!--l. 191--><p class="noindent" >addional info for error code<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Specified as: an integer array<br
class="newline" />
</dd><dt class="description">
<!--l. 195--><p class="noindent" >
<span
class="cmbx-10">a</span><span
class="cmbx-10">_err</span> </dt><dd
class="description">
<!--l. 195--><p class="noindent" >addional info for error code<br
class="newline" />Scope: <span
class="cmbx-10">local </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Specified as: a string.<br
class="newline" /></dd></dl>
<h4 class="subsectionHead"><span class="titlemark">8.2 </span> <a
id="x13-1250008.2"></a>psb_error &#8212; Prints the error stack content and aborts execution</h4>
<!--l. 204-->
<pre class="lstlisting" id="listing-156"><span class="label"><a
id="x13-125001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_error</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">icontxt</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 208--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 209--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 209--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 210--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 210--><p class="noindent" >
</dd><dt class="description">
<!--l. 211--><p class="noindent" >
<span
class="cmbx-10">icontxt</span> </dt><dd
class="description">
<!--l. 211--><p class="noindent" >the communication context.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">optional</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an integer.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">8.3 </span> <a
id="x13-1260008.3"></a>psb_set_errverbosity &#8212; Sets the verbosity of error messages</h4>
<!--l. 224-->
<pre class="lstlisting" id="listing-157"><span class="label"><a
id="x13-126001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_set_errverbosity</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">v</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 228--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 229--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 229--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 230--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 230--><p class="noindent" >
</dd><dt class="description">
<!--l. 231--><p class="noindent" >
<span
class="cmbx-10">v</span> </dt><dd
class="description">
<!--l. 231--><p class="noindent" >the verbosity level<br
class="newline" />Scope: <span
class="cmbx-10">global</span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an integer.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">8.4 </span> <a
id="x13-1270008.4"></a>psb_set_erraction &#8212; Set the type of action to be taken upon error
condition</h4>
<!--l. 241-->
<pre class="lstlisting" id="listing-158"><span class="label"><a
id="x13-127001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">psb_set_erraction</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">err_act</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 245--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 246--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 246--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 247--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 247--><p class="noindent" >
</dd><dt class="description">
<!--l. 248--><p class="noindent" >
<span
class="cmbx-10">err</span><span
class="cmbx-10">_act</span> </dt><dd
class="description">
<!--l. 248--><p class="noindent" >the type of action.<br
class="newline" />Scope: <span
class="cmbx-10">global </span><br
class="newline" />Type: <span
class="cmbx-10">required</span><br
class="newline" />Intent: <span
class="cmbx-10">in</span>.<br
class="newline" />Specified as: an integer. Possible values: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_act_ret</span></span></span>, <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_act_abort</span></span></span>.</dd></dl>
@ -396,11 +575,11 @@ href="userhtmlsu77.html#x90-1270008.4">psb_set_erraction &#8212; Set the type of
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlsu80.html" >next</a>] [<a
href="userhtmlse12.html" >next</a>] [<a
href="userhtmlse7.html" >prev</a>] [<a
href="userhtmlse7.html#tailuserhtmlse7.html" >prev-tail</a>] [<a
href="userhtmlse8.html" >front</a>] [<a
href="userhtml.html#userhtmlsu76.html" >up</a>] </p></div>
href="userhtml.html#userhtmlse11.html" >up</a>] </p></div>
<!--l. 1--><p class="indent" > <a
id="tailuserhtmlse8.html"></a>
</body></html>

@ -11,33 +11,714 @@
</head><body
>
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlsu86.html" >next</a>] [<a
href="userhtmlse13.html" >next</a>] [<a
href="userhtmlse8.html" >prev</a>] [<a
href="userhtmlse8.html#tailuserhtmlse8.html" >prev-tail</a>] [<a
href="userhtmlsu75.html#tailuserhtmlse9.html">tail</a>] [<a
href="userhtml.html#userhtmlsu80.html" >up</a>] </p></div>
href="userhtmlse6.html#tailuserhtmlse9.html">tail</a>] [<a
href="userhtml.html#userhtmlse12.html" >up</a>] </p></div>
<h3 class="sectionHead"><span class="titlemark">9 </span> <a
id="x91-1280009"></a>Utilities</h3>
id="x14-1280009"></a>Utilities</h3>
<!--l. 4--><p class="noindent" >We have some utilities available for input and output of sparse matrices; the
interfaces to these routines are available in the module <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">psb_util_mod</span></span></span>.
<div class="subsectionTOCS">
&#x00A0;<span class="subsectionToc" >9.1 <a
href="userhtmlsu78.html#x92-1290009.1"> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.2 <a
href="userhtmlsu79.html#x93-1300009.2">hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.3 <a
href="userhtmlsu80.html#x94-1310009.3">mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.4 <a
href="userhtmlsu81.html#x95-1320009.4">mm_array_read &#8212; Read a dense array from a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.5 <a
href="userhtmlsu82.html#x96-1330009.5">mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket format</a></span>
<br /> &#x00A0;<span class="subsectionToc" >9.6 <a
href="userhtmlsu83.html#x97-1340009.6">mm_array_write &#8212; Write a dense array from a file in the MatrixMarket format</a></span>
</div>
<h4 class="subsectionHead"><span class="titlemark">9.1 </span> <a
id="x14-1290009.1"></a> hb_read &#8212; Read a sparse matrix from a file in the Harwell&#8211;Boeing
format</h4>
<!--l. 16-->
<pre class="lstlisting" id="listing-159"><span class="label"><a
id="x14-129001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">hb_read</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">a</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iret</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iunit</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">filename</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">b</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mtitle</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 20--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 21--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 21--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 22--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 22--><p class="noindent" >
</dd><dt class="description">
<!--l. 23--><p class="noindent" >
<span
class="cmbx-10">filename</span> </dt><dd
class="description">
<!--l. 23--><p class="noindent" >The name of the file to be read.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in
which case the default input unit 5 (i.e. standard input in Unix jargon) is
used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.
</dd><dt class="description">
<!--l. 28--><p class="noindent" >
<span
class="cmbx-10">iunit</span> </dt><dd
class="description">
<!--l. 28--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.</dd></dl>
<!--l. 33--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 34--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 34--><p class="noindent" >
</dd><dt class="description">
<!--l. 35--><p class="noindent" >
<span
class="cmbx-10">a</span> </dt><dd
class="description">
<!--l. 35--><p class="noindent" >the sparse matrix read from file.<br
class="newline" />Type:<span
class="cmbx-10">required</span>.<br
class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_Tspmat</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 38--><p class="noindent" >
<span
class="cmbx-10">b</span> </dt><dd
class="description">
<!--l. 38--><p class="noindent" >Rigth hand side(s).<br
class="newline" />Type: <span
class="cmbx-10">Optional </span><br
class="newline" />An array of type real or complex, rank 2 and having the ALLOCATABLE
attribute; will be allocated and filled in if the input file contains a right
hand side, otherwise will be left in the UNALLOCATED state.
</dd><dt class="description">
<!--l. 43--><p class="noindent" >
<span
class="cmbx-10">mtitle</span> </dt><dd
class="description">
<!--l. 43--><p class="noindent" >Matrix title.<br
class="newline" />Type: <span
class="cmbx-10">Optional </span><br
class="newline" />A charachter variable of length 72 holding a copy of the matrix title as
specified by the Harwell-Boeing format and contained in the input file.
</dd><dt class="description">
<!--l. 48--><p class="noindent" >
<span
class="cmbx-10">iret</span> </dt><dd
class="description">
<!--l. 48--><p class="noindent" >Error code.<br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">9.2 </span> <a
id="x14-1300009.2"></a>hb_write &#8212; Write a sparse matrix to a file in the Harwell&#8211;Boeing
format</h4>
<!--l. 59-->
<pre class="lstlisting" id="listing-160"><span class="label"><a
id="x14-130001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">hb_write</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">a</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iret</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iunit</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">filename</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">key</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">rhs</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mtitle</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 65--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 66--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 66--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 67--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 67--><p class="noindent" >
</dd><dt class="description">
<!--l. 68--><p class="noindent" >
<span
class="cmbx-10">a</span> </dt><dd
class="description">
<!--l. 68--><p class="noindent" >the sparse matrix to be written.<br
class="newline" />Type:<span
class="cmbx-10">required</span>.<br
class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_Tspmat</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 71--><p class="noindent" >
<span
class="cmbx-10">b</span> </dt><dd
class="description">
<!--l. 71--><p class="noindent" >Rigth hand side.<br
class="newline" />Type: <span
class="cmbx-10">Optional </span><br
class="newline" />An array of type real or complex, rank 1 and having the ALLOCATABLE
attribute; will be allocated and filled in if the input file contains a right
hand side.
</dd><dt class="description">
<!--l. 76--><p class="noindent" >
<span
class="cmbx-10">filename</span> </dt><dd
class="description">
<!--l. 76--><p class="noindent" >The name of the file to be written to.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in
which case the default output unit 6 (i.e. standard output in Unix jargon)
is used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.
</dd><dt class="description">
<!--l. 81--><p class="noindent" >
<span
class="cmbx-10">iunit</span> </dt><dd
class="description">
<!--l. 81--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.
</dd><dt class="description">
<!--l. 84--><p class="noindent" >
<span
class="cmbx-10">key</span> </dt><dd
class="description">
<!--l. 84--><p class="noindent" >Matrix key.<br
class="newline" />Type: <span
class="cmbx-10">Optional </span><br
class="newline" />A charachter variable of length 8 holding the matrix key as specified by
the Harwell-Boeing format and to be written to file.
</dd><dt class="description">
<!--l. 89--><p class="noindent" >
<span
class="cmbx-10">mtitle</span> </dt><dd
class="description">
<!--l. 89--><p class="noindent" >Matrix title.<br
class="newline" />Type: <span
class="cmbx-10">Optional </span><br
class="newline" />A charachter variable of length 72 holding the matrix title as specified by
the Harwell-Boeing format and to be written to file.</dd></dl>
<!--l. 96--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 97--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 97--><p class="noindent" >
</dd><dt class="description">
<!--l. 98--><p class="noindent" >
<span
class="cmbx-10">iret</span> </dt><dd
class="description">
<!--l. 98--><p class="noindent" >Error code.<br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">9.3 </span> <a
id="x14-1310009.3"></a>mm_mat_read &#8212; Read a sparse matrix from a file in the MatrixMarket
format</h4>
<!--l. 111-->
<pre class="lstlisting" id="listing-161"><span class="label"><a
id="x14-131001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mm_mat_read</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">a</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iret</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iunit</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">filename</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 115--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 116--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 116--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 117--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 117--><p class="noindent" >
</dd><dt class="description">
<!--l. 118--><p class="noindent" >
<span
class="cmbx-10">filename</span> </dt><dd
class="description">
<!--l. 118--><p class="noindent" >The name of the file to be read.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in
which case the default input unit 5 (i.e. standard input in Unix jargon) is
used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.
</dd><dt class="description">
<!--l. 123--><p class="noindent" >
<span
class="cmbx-10">iunit</span> </dt><dd
class="description">
<!--l. 123--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.</dd></dl>
<!--l. 128--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 129--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 129--><p class="noindent" >
</dd><dt class="description">
<!--l. 130--><p class="noindent" >
<span
class="cmbx-10">a</span> </dt><dd
class="description">
<!--l. 130--><p class="noindent" >the sparse matrix read from file.<br
class="newline" />Type:<span
class="cmbx-10">required</span>.<br
class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_Tspmat</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 133--><p class="noindent" >
<span
class="cmbx-10">iret</span> </dt><dd
class="description">
<!--l. 133--><p class="noindent" >Error code.<br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">9.4 </span> <a
id="x14-1320009.4"></a>mm_array_read &#8212; Read a dense array from a file in the MatrixMarket
format</h4>
<!--l. 142-->
<pre class="lstlisting" id="listing-162"><span class="label"><a
id="x14-132001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mm_array_read</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">b</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iret</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iunit</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">filename</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 146--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 147--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 147--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 148--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 148--><p class="noindent" >
</dd><dt class="description">
<!--l. 149--><p class="noindent" >
<span
class="cmbx-10">filename</span> </dt><dd
class="description">
<!--l. 149--><p class="noindent" >The name of the file to be read.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in
which case the default input unit 5 (i.e. standard input in Unix jargon) is
used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.
</dd><dt class="description">
<!--l. 154--><p class="noindent" >
<span
class="cmbx-10">iunit</span> </dt><dd
class="description">
<!--l. 154--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.</dd></dl>
<!--l. 159--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 160--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 160--><p class="noindent" >
</dd><dt class="description">
<!--l. 161--><p class="noindent" >
<span
class="cmbx-10">b</span> </dt><dd
class="description">
<!--l. 161--><p class="noindent" >Rigth hand side(s).<br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />An array of type real or complex, rank 1 or 2 and having the
ALLOCATABLE attribute, or an object of type <a
href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>, of
type real or complex.<br
class="newline" />Will be allocated and filled in if the input file contains a right hand side,
otherwise will be left in the UNALLOCATED state. <br
class="newline" />
</dd><dt class="description">
<!--l. 168--><p class="noindent" >
<span
class="cmbx-10">iret</span> </dt><dd
class="description">
<!--l. 168--><p class="noindent" >Error code.<br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<h4 class="subsectionHead"><span class="titlemark">9.5 </span> <a
id="x14-1330009.5"></a>mm_mat_write &#8212; Write a sparse matrix to a file in the MatrixMarket
format</h4>
<!--l. 179-->
<pre class="lstlisting" id="listing-163"><span class="label"><a
id="x14-133001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mm_mat_write</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">a</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mtitle</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iret</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iunit</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">filename</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 182--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 183--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 183--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 184--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 184--><p class="noindent" >
</dd><dt class="description">
<!--l. 185--><p class="noindent" >
<span
class="cmbx-10">a</span> </dt><dd
class="description">
<!--l. 185--><p class="noindent" >the sparse matrix to be written.<br
class="newline" />Type:<span
class="cmbx-10">required</span>.<br
class="newline" />Specified as: a structured data of type <a
href="userhtmlse3.html#spdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_Tspmat</span><span
class="cmtt-10">_type</span></a>.
</dd><dt class="description">
<!--l. 188--><p class="noindent" >
<span
class="cmbx-10">mtitle</span> </dt><dd
class="description">
<!--l. 188--><p class="noindent" >Matrix title.<br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />A charachter variable holding a descriptive title for the matrix to be
written to file.
</dd><dt class="description">
<!--l. 192--><p class="noindent" >
<span
class="cmbx-10">filename</span> </dt><dd
class="description">
<!--l. 192--><p class="noindent" >The name of the file to be written to.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in
which case the default output unit 6 (i.e. standard output in Unix jargon)
is used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.
</dd><dt class="description">
<!--l. 197--><p class="noindent" >
<span
class="cmbx-10">iunit</span> </dt><dd
class="description">
<!--l. 197--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.</dd></dl>
<!--l. 202--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 203--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 203--><p class="noindent" >
</dd><dt class="description">
<!--l. 204--><p class="noindent" >
<span
class="cmbx-10">iret</span> </dt><dd
class="description">
<!--l. 204--><p class="noindent" >Error code.<br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<!--l. 209--><p class="noindent" ><span
class="cmbx-12">Notes</span>
<!--l. 211--><p class="indent" > If this function is called on a matrix <code class="lstinline"><span style="color:#000000">a</span></code> on a distributed communicator only the
local part is written in output. To get a single MatrixMarket file with the whole
matrix when appropriate, e.g. for debugging purposes, one could <span
class="cmti-10">gather </span>the whole
matrix on a single rank and then write it. Consider the following example for a
<span
class="cmti-10">double </span>precision matrix
<div class="center"
>
<!--l. 227--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-93">
type(psb_ldspmat_type)&#x00A0;::&#x00A0;aglobal
call&#x00A0;psb_gather(aglobal,a,desc_a,info)
if&#x00A0;(iam&#x00A0;==&#x00A0;psb_root_)&#x00A0;then
call&#x00A0;mm_mat_write(aglobal,mtitle,info,filename)
end&#x00A0;if
call&#x00A0;psb_spfree(aglobal,&#x00A0;desc_a,&#x00A0;info)
</pre>
<!--l. 237--><p class="nopar" > </div></div>
<!--l. 241--><p class="noindent" >To simplify this procedure in <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">C</span></span></span>, there is a utility function
<div class="center"
>
<!--l. 247--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-94">
psb_i_t&#x00A0;psb_c_&#x003C;s,d,c,z&#x003E;global_mat_write(ah,cdh);
</pre>
<!--l. 251--><p class="nopar" > </div></div>
<!--l. 255--><p class="noindent" >that produces exactly this result.
<h4 class="subsectionHead"><span class="titlemark">9.6 </span> <a
id="x14-1340009.6"></a>mm_array_write &#8212; Write a dense array from a file in the MatrixMarket
format</h4>
<!--l. 261-->
<pre class="lstlisting" id="listing-165"><span class="label"><a
id="x14-134001r1"></a></span><span style="color:#000000"><span
class="cmtt-10">call</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">mm_array_write</span></span><span style="color:#000000"><span
class="cmtt-10">(</span></span><span style="color:#000000"><span
class="cmtt-10">b</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">vtitle</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iret</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">iunit</span></span><span style="color:#000000"><span
class="cmtt-10">,</span></span><span style="color:#000000"> </span><span style="color:#000000"><span
class="cmtt-10">filename</span></span><span style="color:#000000"><span
class="cmtt-10">)</span></span></pre>
<!--l. 265--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 266--><p class="noindent" >
<span
class="cmbx-10">Type:</span> </dt><dd
class="description">
<!--l. 266--><p class="noindent" >Asynchronous.
</dd><dt class="description">
<!--l. 267--><p class="noindent" >
<span
class="cmbx-10">On Entry</span> </dt><dd
class="description">
<!--l. 267--><p class="noindent" >
</dd><dt class="description">
<!--l. 268--><p class="noindent" >
<span
class="cmbx-10">b</span> </dt><dd
class="description">
<!--l. 268--><p class="noindent" >Rigth hand side(s).<br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />An array of type real or complex, rank 1 or 2, or an object of type
<a
href="userhtmlse3.html#vdata"><span
class="cmtt-10">psb</span><span
class="cmtt-10">_T</span><span
class="cmtt-10">_vect</span><span
class="cmtt-10">_type</span></a>, of type real or complex; its contents will be written to
disk.<br
class="newline" />
</dd><dt class="description">
<!--l. 273--><p class="noindent" >
<span
class="cmbx-10">filename</span> </dt><dd
class="description">
<!--l. 273--><p class="noindent" >The name of the file to be written.<br
class="newline" />
</dd><dt class="description">
<!--l. 274--><p class="noindent" >
<span
class="cmbx-10">vtitle</span> </dt><dd
class="description">
<!--l. 274--><p class="noindent" >Matrix title.<br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />A charachter variable holding a descriptive title for the vector to be written
to file. Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: a character variable containing a valid file name, or <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>, in
which case the default input unit 5 (i.e. standard input in Unix jargon) is
used. Default: <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.
</dd><dt class="description">
<!--l. 282--><p class="noindent" >
<span
class="cmbx-10">iunit</span> </dt><dd
class="description">
<!--l. 282--><p class="noindent" >The Fortran file unit number.<br
class="newline" />Type:<span
class="cmbx-10">optional</span>.<br
class="newline" />Specified as: an integer value. Only meaningful if filename is not <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">-</span></span></span>.</dd></dl>
<!--l. 287--><p class="noindent" >
<dl class="description"><dt class="description">
<!--l. 288--><p class="noindent" >
<span
class="cmbx-10">On Return</span> </dt><dd
class="description">
<!--l. 288--><p class="noindent" >
</dd><dt class="description">
<!--l. 289--><p class="noindent" >
<span
class="cmbx-10">iret</span> </dt><dd
class="description">
<!--l. 289--><p class="noindent" >Error code.<br
class="newline" />Type: <span
class="cmbx-10">required </span><br
class="newline" />An integer value; 0 means no error has been detected.</dd></dl>
<!--l. 294--><p class="noindent" ><span
class="cmbx-12">Notes</span>
<!--l. 296--><p class="indent" > If this function is called on a vector <code class="lstinline"><span style="color:#000000">v</span></code> on a distributed communicator only the
local part is written in output. To get a single MatrixMarket file with the whole
vector when appropriate, e.g. for debugging purposes, one could <span
class="cmti-10">gather </span>the whole
vector on a single rank and then write it. Consider the following example for a <span
class="cmti-10">double</span>
precision vector
<div class="center"
>
<!--l. 312--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-95">
real(psb_dpk_),&#x00A0;allocatable&#x00A0;::&#x00A0;vglobal(:)
call&#x00A0;psb_gather(vglobal,v,desc,info)
if&#x00A0;(iam&#x00A0;==&#x00A0;psb_root_)&#x00A0;then
call&#x00A0;mm_array_write(vglobal,vtitle,info,filename)
end&#x00A0;if
call&#x00A0;deallocate(vglobal,&#x00A0;stat=info)
</pre>
<!--l. 322--><p class="nopar" > </div></div>
<!--l. 326--><p class="noindent" >To simplify this procedure in <span class="obeylines-h"><span class="verb"><span
class="cmtt-10">C</span></span></span>, there is a utility function
<div class="center"
>
<!--l. 332--><p class="noindent" >
<div class="minipage"><pre class="verbatim" id="verbatim-96">
psb_i_t&#x00A0;psb_c_&#x003C;s,d,c,z&#x003E;global_vec_write(vh,cdh);
</pre>
<!--l. 336--><p class="nopar" > </div></div>
<!--l. 340--><p class="noindent" >that produces exactly this result.
@ -47,11 +728,11 @@ href="userhtmlsu83.html#x97-1340009.6">mm_array_write &#8212; Write a dense arra
<!--l. 1--><div class="crosslinks"><p class="noindent">[<a
href="userhtmlsu86.html" >next</a>] [<a
href="userhtmlse13.html" >next</a>] [<a
href="userhtmlse8.html" >prev</a>] [<a
href="userhtmlse8.html#tailuserhtmlse8.html" >prev-tail</a>] [<a
href="userhtmlse9.html" >front</a>] [<a
href="userhtml.html#userhtmlsu80.html" >up</a>] </p></div>
href="userhtml.html#userhtmlse12.html" >up</a>] </p></div>
<!--l. 1--><p class="indent" > <a
id="tailuserhtmlse9.html"></a>
</body></html>

File diff suppressed because one or more lines are too long

@ -86,7 +86,8 @@
TOPFILE = userguide.tex
HTMLFILE = userhtml.tex
SECFILE = intro.tex commrout.tex datastruct.tex psbrout.tex toolsrout.tex\
methods.tex precs.tex penv.tex error.tex util.tex biblio.tex
methods.tex precs.tex penv.tex error.tex util.tex biblio.tex \
ext-intro.tex cuda.tex
FIGDIR = figures
XPDFFLAGS =
@ -139,7 +140,7 @@ PDF = $(join $(BASEFILE),.pdf)
PS = $(join $(BASEFILE),.ps)
GXS = $(join $(BASEFILE),.gxs)
GLX = $(join $(BASEFILE),.glx)
TARGETPDF= ../psblas-3.8.pdf
TARGETPDF= ../psblas-3.9.pdf
BASEHTML = $(patsubst %.tex,%,$(HTMLFILE))
HTML = $(join $(BASEHTML),.html)
HTMLDIR = ../html

@ -1,9 +1,5 @@
\begin{thebibliography}{99}
\bibitem{DesPat:11}
D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson
{\em Design Patterns for Scientific Computations on Sparse Matrices},
HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011
\bibitem{PARA04FOREST}
G.~Bella, S.~Filippone, A.~De Maio and M.~Testa,
@ -154,6 +150,11 @@ Lawson, C., Hanson, R., Kincaid, D. and Krogh, F.,
{\em Fortran 95/2003 explained.}
{Oxford University Press}, 2004.
%
\bibitem{MRC:11}
{Metcalf, M., Reid, J. and Cohen, M.}
{\em Modern Fortran explained.}
{Oxford University Press}, 2011.
%
%% \bibitem{DD2}
%% B.~Smith, P.~Bjorstad and W.~Gropp,
%% {\em Domain Decomposition: Parallel Multilevel Methods for Elliptic
@ -169,4 +170,20 @@ M.~Snir, S.~Otto, S.~Huss-Lederman, D.~Walker and J.~Dongarra,
{\em MPI: The Complete Reference. Volume 1 - The MPI Core}, second edition,
MIT Press, 1998.
%
\bibitem{DesPat:11}
D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson
{\em Design Patterns for Scientific Computations on Sparse Matrices},
HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011
\bibitem{CaFiRo:2014}
{ Cardellini, V.}, { Filippone, S.}, { and} { Rouson, D.} 2014,
Design patterns for sparse-matrix computations on hybrid {CPU/GPU}
platforms,
{\em Scientific Programming\/}~{\em 22,\/}~1, 1--19.
\bibitem{OurTechRep}
D.~Barbieri, V.~Cardellini, A.~Fanfarillo, S.~Filippone, Three storage formats
for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6, Universit\`a di
Roma Tor Vergata (February 2015).
\end{thebibliography}

@ -0,0 +1,395 @@
\subsection{CUDA-class extensions}
For computing with CUDA we define a dual memorization strategy in
which each variable on the CPU (``host'') side has a GPU (``device'')
side. When a GPU-type variable is initialized, the data contained is
(usually) the same on both sides. Each operator invoked on the
variable may change the data so that only the host side or the device
side are up-to-date.
Keeping track of the updates to data in the variables is essential: we want
to perform most computations on the GPU, but we cannot afford the time
needed to move data between the host memory and the device memory
because the bandwidth of the interconnection bus would become the main
bottleneck of the computation. Thus, each and every computational
routine in the library is built according to the following principles:
\begin{itemize}
\item If the data type being handled is {GPU}-enabled, make sure that
its device copy is up to date, perform any arithmetic operation on
the {GPU}, and if the data has been altered as a result, mark
the main-memory copy as outdated.
\item The main-memory copy is never updated unless this is requested
by the user either
\begin{description}
\item[explicitly] by invoking a synchronization method;
\item[implicitly] by invoking a method that involves other data items
that are not {GPU}-enabled, e.g., by assignment ov a vector to a
normal array.
\end{description}
\end{itemize}
In this way, data items are put on the {GPU} memory ``on demand'' and
remain there as long as ``normal'' computations are carried out.
As an example, the following call to a matrix-vector product
\ifpdf
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
call psb_spmm(alpha,a,x,beta,y,desc_a,info)
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
call psb_spmm(alpha,a,x,beta,y,desc_a,info)
\end{verbatim}
\end{minipage}
\end{center}
\fi
will transparently and automatically be performed on the {GPU} whenever
all three data inputs \fortinline|a|, \fortinline|x| and
\fortinline|y| are {GPU}-enabled. If a program makes many such calls
sequentially, then
\begin{itemize}
\item The first kernel invocation will find the data in main memory,
and will copy it to the {GPU} memory, thus incurring a significant
overhead; the result is however \emph{not} copied back, and
therefore:
\item Subsequent kernel invocations involving the same vector will
find the data on the {GPU} side so that they will run at full
speed.
\end{itemize}
For all invocations after the first the only data that will have to be
transferred to/from the main memory will be the scalars \fortinline|alpha|
and \fortinline|beta|, and the return code \fortinline|info|.
\begin{description}
\item[Vectors:] The data type \fortinline|psb_T_vect_gpu| provides a
GPU-enabled extension of the inner type \fortinline|psb_T_base_vect_type|,
and must be used together with the other inner matrix type to make
full use of the GPU computational capabilities;
\item[CSR:] The data type \fortinline|psb_T_csrg_sparse_mat| provides an
interface to the GPU version of CSR available in the NVIDIA CuSPARSE
library;
\item[HYB:] The data type \fortinline|psb_T_hybg_sparse_mat| provides an
interface to the HYB GPU storage available in the NVIDIA CuSPARSE
library. The internal structure is opaque, hence the host side is
just CSR; the HYB data format is only available up to CUDA version
10.
\item[ELL:] The data type \fortinline|psb_T_elg_sparse_mat| provides an
interface to the ELLPACK implementation from SPGPU;
\item[HLL:] The data type \fortinline|psb_T_hlg_sparse_mat| provides an
interface to the Hacked ELLPACK implementation from SPGPU;
\item[HDIA:] The data type \fortinline|psb_T_hdiag_sparse_mat| provides an
interface to the Hacked DIAgonals implementation from SPGPU;
\end{description}
\section{CUDA Environment Routines}
\label{sec:cudaenv}
\subsection*{psb\_cuda\_init --- Initializes PSBLAS-CUDA
environment}
\addcontentsline{toc}{subsection}{psb\_cuda\_init}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
call psb_cuda_init(ctxt [, device])
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
call psb_cuda_init(ctxt [, device])
\end{verbatim}
\end{minipage}
\end{center}
\fi
This subroutine initializes the PSBLAS-CUDA environment.
\begin{description}
\item[Type:] Synchronous.
\item[\bf On Entry ]
\item[device] ID of CUDA device to attach to.\\
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf in}.\\
Specified as: an integer value. \
Default: use \fortinline|mod(iam,ngpu)| where \fortinline|iam| is the calling
process index and \fortinline|ngpu| is the total number of CUDA devices
available on the current node.
\end{description}
{\par\noindent\large\bfseries Notes}
\begin{enumerate}
\item A call to this routine must precede any other PSBLAS-CUDA call.
\end{enumerate}
\subsection*{psb\_cuda\_exit --- Exit from PSBLAS-CUDA
environment}
\addcontentsline{toc}{subsection}{psb\_cuda\_exit}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
call psb_cuda_exit(ctxt)
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
call psb_cuda_exit(ctxt)
\end{verbatim}
\end{minipage}
\end{center}
\fi
This subroutine exits from the PSBLAS CUDA context.
\begin{description}
\item[Type:] Synchronous.
\item[\bf On Entry ]
\item[ctxt] the communication context identifying the virtual
parallel machine.\\
Scope: {\bf global}.\\
Type: {\bf required}.\\
Intent: {\bf in}.\\
Specified as: an integer variable.
\end{description}
\subsection*{psb\_cuda\_DeviceSync --- Synchronize CUDA device}
\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceSync}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
call psb_cuda_DeviceSync()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
call psb_cuda_DeviceSync()
\end{verbatim}
\end{minipage}
\end{center}
\fi
This subroutine ensures that all previosly invoked kernels, i.e. all
invocation of CUDA-side code, have completed.
\subsection*{psb\_cuda\_getDeviceCount }
\addcontentsline{toc}{subsection}{psb\_cuda\_getDeviceCount}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
ngpus = psb_cuda_getDeviceCount()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
ngpus = psb_cuda_getDeviceCount()
\end{verbatim}
\end{minipage}
\end{center}
\fi
Get number of devices available on current computing node.
\subsection*{psb\_cuda\_getDevice }
\addcontentsline{toc}{subsection}{psb\_cuda\_getDevice}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
ngpus = psb_cuda_getDevice()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
ngpus = psb_cuda_getDevice()
\end{verbatim}
\end{minipage}
\end{center}
\fi
Get device in use by current process.
\subsection*{psb\_cuda\_setDevice }
\addcontentsline{toc}{subsection}{psb\_cuda\_setDevice}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
info = psb_cuda_setDevice(dev)
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
info = psb_cuda_setDevice(dev)
\end{verbatim}
\end{minipage}
\end{center}
\fi
Set device to be used by current process.
\subsection*{psb\_cuda\_DeviceHasUVA }
\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceHasUVA}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
hasUva = psb_cuda_DeviceHasUVA()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
hasUva = psb_cuda_DeviceHasUVA()
\end{verbatim}
\end{minipage}
\end{center}
\fi
Returns true if device currently in use supports UVA
(Unified Virtual Addressing).
\subsection*{psb\_cuda\_WarpSize }
\addcontentsline{toc}{subsection}{psb\_cuda\_WarpSize}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
nw = psb_cuda_WarpSize()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
nw = psb_cuda_WarpSize()
\end{verbatim}
\end{minipage}
\end{center}
\fi
Returns the warp size.
\subsection*{psb\_cuda\_MultiProcessors }
\addcontentsline{toc}{subsection}{psb\_cuda\_MultiProcessors}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
nmp = psb_cuda_MultiProcessors()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
nmp = psb_cuda_MultiProcessors()
\end{verbatim}
\end{minipage}
\end{center}
\fi
Returns the number of multiprocessors in the CUDA device.
\subsection*{psb\_cuda\_MaxThreadsPerMP }
\addcontentsline{toc}{subsection}{psb\_cuda\_MaxThreadsPerMP}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
nt = psb_cuda_MaxThreadsPerMP()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
nt = psb_cuda_MaxThreadsPerMP()
\end{verbatim}
\end{minipage}
\end{center}
\fi
Returns the maximum number of threads per multiprocessor.
\subsection*{psb\_cuda\_MaxRegistersPerBlock }
\addcontentsline{toc}{subsection}{psb\_cuda\_MaxRegisterPerBlock}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
nr = psb_cuda_MaxRegistersPerBlock()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
nr = psb_cuda_MaxRegistersPerBlock()
\end{verbatim}
\end{minipage}
\end{center}
\fi
Returns the maximum number of register per thread block.
\subsection*{psb\_cuda\_MemoryClockRate }
\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryClockRate}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
cl = psb_cuda_MemoryClockRate()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
cl = psb_cuda_MemoryClockRate()
\end{verbatim}
\end{minipage}
\end{center}
\fi
Returns the memory clock rate in KHz, as an integer.
\subsection*{psb\_cuda\_MemoryBusWidth }
\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryBusWidth}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
nb = psb_cuda_MemoryBusWidth()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
nb = psb_cuda_MemoryBusWidth()
\end{verbatim}
\end{minipage}
\end{center}
\fi
Returns the memory bus width in bits.
\subsection*{psb\_cuda\_MemoryPeakBandwidth }
\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryPeakBandwidth}
\ifpdf
\begin{minted}[breaklines=true]{fortran}
bw = psb_cuda_MemoryPeakBandwidth()
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
bw = psb_cuda_MemoryPeakBandwidth()
\end{verbatim}
\end{minipage}
\end{center}
\fi
Returns the peak memory bandwidth in MB/s (real double precision).

@ -0,0 +1,598 @@
\section{Extensions}\label{sec:ext-intro}
The EXT, CUDA and RSB subdirectories contains a set of extensions to the base
library. The extensions provide additional storage formats beyond the
ones already contained in the base library, as well as interfaces
to:
\begin{description}
\item[SPGPU] a CUDA library originally published as
\url{https://code.google.com/p/spgpu/} and now included in the
\verb|cuda| subdir, for computations on NVIDIA GPUs;
\item[LIBRSB] \url{http://sourceforge.net/projects/librsb/}, for
computations on multicore parallel machines.
\end{description}
The infrastructure laid out in the base library to allow for these
extensions is detailed in the references~\cite{DesPat:11,CaFiRo:2014,Sparse03};
the CUDA-specific data formats are described in~\cite{OurTechRep}.
\subsection{Using the extensions}
\label{sec:ext-appstruct}
A sample application using the PSBLAS extensions will contain the
following steps:
\begin{itemize}
\item \verb|USE| the appropriat modules (\verb|psb_ext_mod|,
\verb|psb_cuda_mod|);
\item Declare a \emph{mold} variable of the necessary type
(e.g. \verb|psb_d_ell_sparse_mat|, \verb|psb_d_hlg_sparse_mat|,
\verb|psb_d_vect_cuda|);
\item Pass the mold variable to the base library interface where
needed to ensure the appropriate dynamic type.
\end{itemize}
Suppose you want to use the CUDA-enabled ELLPACK data structure; you
would use a piece of code like this (and don't forget, you need
CUDA-side vectors along with the matrices):
\ifpdf
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
program my_cuda_test
use psb_base_mod
use psb_util_mod
use psb_ext_mod
use psb_cuda_mod
type(psb_dspmat_type) :: a, agpu
type(psb_d_vect_type) :: x, xg, bg
real(psb_dpk_), allocatable :: xtmp(:)
type(psb_d_vect_cuda) :: vmold
type(psb_d_elg_sparse_mat) :: aelg
type(psb_ctxt_type) :: ctxt
integer :: iam, np
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
call psb_cuda_init(ctxt, iam)
! My own home-grown matrix generator
call gen_matrix(ctxt,idim,desc_a,a,x,info)
if (info /= 0) goto 9999
call a%cscnv(agpu,info,mold=aelg)
if (info /= 0) goto 9999
xtmp = x%get_vect()
call xg%bld(xtmp,mold=vmold)
call bg%bld(size(xtmp),mold=vmold)
! Do sparse MV
call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info)
9999 continue
if (info == 0) then
write(*,*) '42'
else
write(*,*) 'Something went wrong ',info
end if
call psb_cuda_exit()
call psb_exit(ctxt)
stop
end program my_cuda_test
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
program my_cuda_test
use psb_base_mod
use psb_util_mod
use psb_ext_mod
use psb_cuda_mod
type(psb_dspmat_type) :: a, agpu
type(psb_d_vect_type) :: x, xg, bg
real(psb_dpk_), allocatable :: xtmp(:)
type(psb_d_vect_cuda) :: vmold
type(psb_d_elg_sparse_mat) :: aelg
type(psb_ctxt_type) :: ctxt
integer :: iam, np
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
call psb_cuda_init(ctxt, iam)
! My own home-grown matrix generator
call gen_matrix(ctxt,idim,desc_a,a,x,info)
if (info /= 0) goto 9999
call a%cscnv(agpu,info,mold=aelg)
if (info /= 0) goto 9999
xtmp = x%get_vect()
call xg%bld(xtmp,mold=vmold)
call bg%bld(size(xtmp),mold=vmold)
! Do sparse MV
call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info)
9999 continue
if (info == 0) then
write(*,*) '42'
else
write(*,*) 'Something went wrong ',info
end if
call psb_cuda_exit()
call psb_exit(ctxt)
stop
end program my_cuda_test
\end{verbatim}
\end{minipage}
\end{center}
\fi
A full example of this strategy can be seen in the
\texttt{test/ext/kernel} and \texttt{test/\-cuda/\-kernel} subdirectories,
where we provide sample programs
to test the speed of the sparse matrix-vector product with the various
data structures included in the library.
\subsection{Extensions' Data Structures}
\label{sec:ext-datastruct}
%\ifthenelse{\boolean{mtc}}{\minitoc}{}
Access to the facilities provided by the EXT library is mainly
achieved through the data types that are provided within.
The data classes are derived from the base classes in PSBLAS, through
the Fortran~2003 mechanism of \emph{type extension}~\cite{MRC:11}.
The data classes are divided between the general purpose CPU
extensions, the GPU interfaces and the RSB interfaces.
In the description we will make use of the notation introduced in
Table~\ref{tab:notation}.
\begin{table}[ht]
\caption{Notation for parameters describing a sparse matrix}
\begin{center}
{\footnotesize
\begin{tabular}{ll}
\hline
Name & Description \\
\hline
M & Number of rows in matrix \\
N & Number of columns in matrix \\
NZ & Number of nonzeros in matrix \\
AVGNZR & Average number of nonzeros per row \\
MAXNZR & Maximum number of nonzeros per row \\
NDIAG & Numero of nonzero diagonals\\
AS & Coefficients array \\
IA & Row indices array \\
JA & Column indices array \\
IRP & Row start pointers array \\
JCP & Column start pointers array \\
NZR & Number of nonzeros per row array \\
OFFSET & Offset for diagonals \\
\hline
\end{tabular}
}
\end{center}
\label{tab:notation}
\end{table}
\begin{figure}[ht]
\centering
% \includegraphics[width=5.2cm]{figures/mat.eps}
\ifcase\pdfoutput
\includegraphics[width=5.2cm]{mat.png}
\or
\includegraphics[width=5.2cm]{figures/mat.pdf}
\fi
\caption{Example of sparse matrix}
\label{fig:dense}
\end{figure}
\subsection{CPU-class extensions}
\subsubsection*{ELLPACK}
The ELLPACK/ITPACK format (shown in Figure~\ref{fig:ell})
comprises two 2-dimensional arrays \verb|AS| and
\verb|JA| with \verb|M| rows and \verb|MAXNZR| columns, where
\verb|MAXNZR| is the maximum
number of nonzeros in any row~\cite{ELLPACK}.
Each row of the arrays \verb|AS| and \verb|JA| contains the
coefficients and column indices; rows shorter than
\verb|MAXNZR| are padded with zero coefficients and appropriate column
indices, e.g. the last valid one found in the same row.
\begin{figure}[ht]
\centering
% \includegraphics[width=8.2cm]{figures/ell.eps}
\ifcase\pdfoutput
\includegraphics[width=8.2cm]{ell.png}
\or
\includegraphics[width=8.2cm]{figures/ell.pdf}
\fi
\caption{ELLPACK compression of matrix in Figure~\ref{fig:dense}}
\label{fig:ell}
\end{figure}
\begin{algorithm}
\lstset{language=Fortran}
\small
\begin{lstlisting}
do i=1,n
t=0
do j=1,maxnzr
t = t + as(i,j)*x(ja(i,j))
end do
y(i) = t
end do
\end{lstlisting}
\caption{\label{alg:ell} Matrix-Vector product in ELL format}
\end{algorithm}
The matrix-vector product $y=Ax$ can be computed with the code shown in
Alg.~\ref{alg:ell}; it costs one memory write per outer iteration,
plus three memory reads and two floating-point operations per inner
iteration.
Unless all rows have exactly the same number of nonzeros, some of the
coefficients in the \verb|AS| array will be zeros; therefore this
data structure will have an overhead both in terms of memory space
and redundant operations (multiplications by zero). The overhead can
be acceptable if:
\begin{enumerate}
\item The maximum number of nonzeros per row is not much larger than
the average;
\item The regularity of the data structure allows for faster code,
e.g. by allowing vectorization, thereby offsetting the additional
storage requirements.
\end{enumerate}
In the extreme case where the input matrix has one full row, the
ELLPACK structure would require more memory than the normal 2D array
storage. The ELLPACK storage format was very popular in the vector
computing days; in modern CPUs it is not quite as popular, but it
is the basis for many GPU formats.
The relevant data type is \verb|psb_T_ell_sparse_mat|:
\ifpdf
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat
!
! ITPACK/ELL format, extended.
!
integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:)
real(psb_dpk_), allocatable :: val(:,:)
contains
....
end type psb_d_ell_sparse_mat
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat
!
! ITPACK/ELL format, extended.
!
integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:)
real(psb_dpk_), allocatable :: val(:,:)
contains
....
end type psb_d_ell_sparse_mat
\end{verbatim}
\end{minipage}
\end{center}
\fi
\subsubsection*{Hacked ELLPACK}
The \textit{hacked ELLPACK} (\textbf{HLL}) format
alleviates the main problem of the ELLPACK format, that is,
the amount of memory required by padding for sparse matrices in
which the maximum row length is larger than the average.
The number of elements allocated to padding is $[(m*maxNR) -
(m*avgNR) = m*(maxNR-avgNR)]$
for both \verb|AS| and \verb|JA| arrays,
where $m$ is equal to the number of rows of the matrix, $maxNR$ is the
maximum number of nonzero elements
in every row and $avgNR$ is the average number of nonzeros.
Therefore a single densely populated row can seriously affect the
total size of the allocation.
To limit this effect, in the HLL format we break the original matrix
into equally sized groups of rows (called \textit{hacks}), and then store
these groups as independent matrices in ELLPACK format.
The groups can be arranged selecting rows in an arbitrarily manner;
indeed, if the rows are sorted by decreasing number of nonzeros we
obtain essentially the JAgged Diagonals format.
If the rows are not in the original order, then an additional vector
\textit{rIdx} is required, storing the actual row index for each row
in the data structure.
The multiple ELLPACK-like buffers are stacked together inside a
single, one dimensional array;
an additional vector \textit{hackOffsets} is provided to keep track
of the individual submatrices.
All hacks have the same number of rows \textit{hackSize}; hence,
the \textit{hackOffsets} vector is an array of
$(m/hackSize)+1$ elements, each one pointing to the first index of a
submatrix inside the stacked \textit{cM}/\textit{rP} buffers, plus an
additional element pointing past the end of the last block, where the
next one would begin.
We thus have the property that
the elements of the $k$-th \textit{hack} are stored between
\verb|hackOffsets[k]| and
\verb|hackOffsets[k+1]|, similarly to what happens in the CSR format.
\begin{figure}[ht]
\centering
% \includegraphics[width=8.2cm]{../figures/hll.eps}
\ifcase\pdfoutput
\includegraphics[width=.72\textwidth]{hll.png}
\or
\includegraphics[width=.72\textwidth]{../figures/hll.pdf}
\fi
\caption{Hacked ELLPACK compression of matrix in Figure~\ref{fig:dense}}
\label{fig:hll}
\end{figure}
With this data structure a very long row only affects one hack, and
therefore the additional memory is limited to the hack in which the
row appears.
The relevant data type is \verb|psb_T_hll_sparse_mat|:
\ifpdf
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat
!
! HLL format. (Hacked ELL)
!
integer(psb_ipk_) :: hksz
integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:)
real(psb_dpk_), allocatable :: val(:)
contains
....
end type
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat
!
! HLL format. (Hacked ELL)
!
integer(psb_ipk_) :: hksz
integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:)
real(psb_dpk_), allocatable :: val(:)
contains
....
end type
\end{verbatim}
\end{minipage}
\end{center}
\fi
\subsubsection*{Diagonal storage}
The DIAgonal (DIA) format (shown in Figure~\ref{fig:dia})
has a 2-dimensional array \verb|AS| containing in each column the
coefficients along a diagonal of the matrix, and an integer array
\verb|OFFSET| that determines where each diagonal starts. The
diagonals in \verb|AS| are padded with zeros as necessary.
The code to compute the matrix-vector product $y=Ax$ is shown in Alg.~\ref{alg:dia};
it costs one memory read per outer iteration,
plus three memory reads, one memory write and two floating-point
operations per inner iteration. The accesses to \verb|AS| and
\verb|x| are in strict sequential order, therefore no indirect
addressing is required.
\begin{figure}[ht]
\centering
% \includegraphics[width=8.2cm]{figures/dia.eps}
\ifcase\pdfoutput
\includegraphics[width=.72\textwidth]{dia.png}
\or
\includegraphics[width=.72\textwidth]{figures/dia.pdf}
\fi
\caption{DIA compression of matrix in Figure~\ref{fig:dense}}
\label{fig:dia}
\end{figure}
\begin{algorithm}
\ifpdf
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
do j=1,ndiag
if (offset(j) > 0) then
ir1 = 1; ir2 = m - offset(j);
else
ir1 = 1 - offset(j); ir2 = m;
end if
do i=ir1,ir2
y(i) = y(i) + alpha*as(i,j)*x(i+offset(j))
end do
end do
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
do j=1,ndiag
if (offset(j) > 0) then
ir1 = 1; ir2 = m - offset(j);
else
ir1 = 1 - offset(j); ir2 = m;
end if
do i=ir1,ir2
y(i) = y(i) + alpha*as(i,j)*x(i+offset(j))
end do
end do
\end{verbatim}
\end{minipage}
\end{center}
\fi
\caption{\label{alg:dia} Matrix-Vector product in DIA format}
\end{algorithm}
The relevant data type is \verb|psb_T_dia_sparse_mat|:
\ifpdf
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat
!
! DIA format, extended.
!
integer(psb_ipk_), allocatable :: offset(:)
integer(psb_ipk_) :: nzeros
real(psb_dpk_), allocatable :: data(:,:)
end type
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat
!
! DIA format, extended.
!
integer(psb_ipk_), allocatable :: offset(:)
integer(psb_ipk_) :: nzeros
real(psb_dpk_), allocatable :: data(:,:)
end type
\end{verbatim}
\end{minipage}
\end{center}
\fi
\subsubsection*{Hacked DIA}
Storage by DIAgonals is an attractive option for matrices whose
coefficients are located on a small set of diagonals, since they do
away with storing explicitly the indices and therefore reduce
significantly memory traffic. However, having a few coefficients
outside of the main set of diagonals may significantly increase the
amount of needed padding; moreover, while the DIA code is easily
vectorized, it does not necessarily make optimal use of the memory
hierarchy. While processing each diagonal we are updating entries in
the output vector \verb|y|, which is then accessed multiple times; if
the vector \verb|y| is too large to remain in the cache memory, the
associated cache miss penalty is paid multiple times.
The \textit{hacked DIA} (\textbf{HDIA}) format was designed to contain
the amount of padding, by breaking the original matrix
into equally sized groups of rows (\textit{hacks}), and then storing
these groups as independent matrices in DIA format. This approach is
similar to that of HLL, and requires using an offset vector for each
submatrix. Again, similarly to HLL, the various submatrices are
stacked inside a linear array to improve memory management. The fact
that the matrix is accessed in slices helps in reducing cache misses,
especially regarding accesses to the %output
vector \verb|y|.
An additional vector \textit{hackOffsets} is provided to complete
the matrix format; given that \textit{hackSize} is the number of rows of each hack,
the \textit{hackOffsets} vector is made by an array of
$(m/hackSize)+1$ elements, pointing to the first diagonal offset of a
submatrix inside the stacked \textit{offsets} buffers, plus an
additional element equal to the number of nonzero diagonals in the whole matrix.
We thus have the property that
the number of diagonals of the $k$-th \textit{hack} is given by
\textit{hackOffsets[k+1] - hackOffsets[k]}.
\begin{figure}[ht]
\centering
% \includegraphics[width=8.2cm]{../figures/hdia.eps}
\ifcase\pdfoutput
\includegraphics[width=.72\textwidth]{hdia.png}
\or
\includegraphics[width=.72\textwidth]{../figures/hdia.pdf}
\fi
\caption{Hacked DIA compression of matrix in Figure~\ref{fig:dense}}
\label{fig:hdia}
\end{figure}
The relevant data type is \verb|psb_T_hdia_sparse_mat|:
\ifpdf
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
type pm
real(psb_dpk_), allocatable :: data(:,:)
end type pm
type po
integer(psb_ipk_), allocatable :: off(:)
end type po
type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat
!
! HDIA format, extended.
!
type(pm), allocatable :: hdia(:)
type(po), allocatable :: offset(:)
integer(psb_ipk_) :: nblocks, nzeros
integer(psb_ipk_) :: hack = 64
integer(psb_long_int_k_) :: dim=0
contains
....
end type
\end{minted}
\else
\begin{center}
\begin{minipage}[tl]{0.9\textwidth}
\begin{verbatim}
type pm
real(psb_dpk_), allocatable :: data(:,:)
end type pm
type po
integer(psb_ipk_), allocatable :: off(:)
end type po
type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat
!
! HDIA format, extended.
!
type(pm), allocatable :: hdia(:)
type(po), allocatable :: offset(:)
integer(psb_ipk_) :: nblocks, nzeros
integer(psb_ipk_) :: hack = 64
integer(psb_long_int_k_) :: dim=0
contains
....
end type
\end{verbatim}
\end{minipage}
\end{center}
\fi

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save