Updated docs

repack-newsolve
sfilippone 7 months ago
parent 9f2b8a2623
commit 681ea2fff7

@ -2018,6 +2018,75 @@ CPPFLAGS="$SAVE_CPPFLAGS";
])dnl
dnl @synopsis PAC_ARG_WITH_LIBRSB
dnl
dnl Test for --with-librsb="pathname".
dnl
dnl Defines the path to LIBRSB build dir.
dnl
dnl note: Renamed after PAC_ARG_WITH_LIBS as in the Trilinos package.
dnl
dnl Example use:
dnl
dnl PAC_ARG_WITH_LIBRSB
dnl
dnl tests for --with-librsb and pre-pends to LIBRSB_PATH
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_ARG_WITH_LIBRSB,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
AC_ARG_WITH(librsb,
AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
--with-librsb=/opt/packages/librsb]),
[pac_cv_librsb_dir=$withval],
[pac_cv_librsb_dir=''])
if test "x$pac_cv_librsb_dir" != "x"; then
LIBS="-L$pac_cv_librsb_dir $LIBS"
RSB_INCLUDES="-I$pac_cv_librsb_dir"
# CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
RSB_LIBDIR="-L$pac_cv_librsb_dir"
fi
#AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
[pac_rsb_header_ok=yes],
[pac_rsb_header_ok=no; RSB_INCLUDES=""])
if test "x$pac_rsb_header_ok" == "xyes" ; then
RSB_LIBS="-lrsb $RSB_LIBDIR"
# LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
# AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
# AC_TRY_LINK_FUNC(spgpuCreate,
# [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
# [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
# AC_MSG_RESULT($pac_gpu_lib_ok)
# if test "x$pac_cv_have_spgpu" == "xyes" ; then
# AC_MSG_NOTICE([Have found SPGPU])
RSBLIBNAME="librsb.a";
LIBRSB_DIR="$pac_cv_librsb_dir";
# SPGPU_DEFINES="-DHAVE_SPGPU";
LIBRSB_INCDIR="$LIBRSB_DIR";
LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
# CUDA_DIR="$pac_cv_cuda_dir";
LIBRSB_DEFINES="-DHAVE_RSB";
LRSB=-lpsb_rsb
# CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
# CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
fi
# fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])
dnl
dnl @synopsis PAC_CHECK_SPGPU
dnl
dnl Will try to find the spgpu library and headers.

@ -204,7 +204,7 @@ PAC_ARG_WITH_FLAGS(module-path,MODULE_PATH)
# we just gave the user the chance to append values to these variables
###############################################################################
dnl Library oriented Autotools facilities (we don't care about this for now)
@ -844,6 +844,30 @@ if test "x$pac_cv_ipk_size" != "x4"; then
fi
###############################################################################
PAC_ARG_WITH_LIBRSB
LIBRSB_DIR="$pac_cv_librsb_dir";
AC_MSG_CHECKING([for LIBRSB install dir])
case $LIBRSB_DIR in
/*) ;;
*) dnl AC_MSG_ERROR([The LIBRSB installation dir must be an absolute pathname
dnl specified with --with-librsb=/path/to/librsb])
esac
dnl if test ! -d "$LIBRSB_DIR" ; then
dnl AC_MSG_ERROR([Could not find LIBRSB build dir $LIBRSB_DIR!])
dnl fi
pac_cv_status_file="$LIBRSB_DIR/librsb.a"
if test ! -f "$pac_cv_status_file" ; then
AC_MSG_RESULT([no])
#AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.])
else
AC_MSG_RESULT([$LIBRSB_DIR])
RSBTARGETLIB=rsbd;
RSBTARGETOBJ=rsbobj;
fi
###############################################################################
@ -944,6 +968,14 @@ AC_SUBST(CUDEFINES)
AC_SUBST(CUDAD)
AC_SUBST(CUDALD)
AC_SUBST(LCUDA)
AC_SUBST(LIBRSB_LIBS)
AC_SUBST(LIBRSB_INCLUDES)
AC_SUBST(LIBRSB_INCDIR)
AC_SUBST(LIBRSB_DIR)
AC_SUBST(LIBRSB_DEFINES)
AC_SUBST(LRSB)
###############################################################################
# the following files will be created by Automake

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

File diff suppressed because one or more lines are too long

@ -86,7 +86,8 @@
TOPFILE = userguide.tex
HTMLFILE = userhtml.tex
SECFILE = intro.tex commrout.tex datastruct.tex psbrout.tex toolsrout.tex\
methods.tex precs.tex penv.tex error.tex util.tex biblio.tex
methods.tex precs.tex penv.tex error.tex util.tex biblio.tex \
ext-intro.tex cuda.tex
FIGDIR = figures
XPDFFLAGS =
@ -139,7 +140,7 @@ PDF = $(join $(BASEFILE),.pdf)
PS = $(join $(BASEFILE),.ps)
GXS = $(join $(BASEFILE),.gxs)
GLX = $(join $(BASEFILE),.glx)
TARGETPDF= ../psblas-3.8.pdf
TARGETPDF= ../psblas-3.9.pdf
BASEHTML = $(patsubst %.tex,%,$(HTMLFILE))
HTML = $(join $(BASEHTML),.html)
HTMLDIR = ../html

@ -1,9 +1,5 @@
\begin{thebibliography}{99}
\bibitem{DesPat:11}
D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson
{\em Design Patterns for Scientific Computations on Sparse Matrices},
HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011
\bibitem{PARA04FOREST}
G.~Bella, S.~Filippone, A.~De Maio and M.~Testa,
@ -154,6 +150,11 @@ Lawson, C., Hanson, R., Kincaid, D. and Krogh, F.,
{\em Fortran 95/2003 explained.}
{Oxford University Press}, 2004.
%
\bibitem{MRC:11}
{Metcalf, M., Reid, J. and Cohen, M.}
{\em Modern Fortran explained.}
{Oxford University Press}, 2011.
%
%% \bibitem{DD2}
%% B.~Smith, P.~Bjorstad and W.~Gropp,
%% {\em Domain Decomposition: Parallel Multilevel Methods for Elliptic
@ -169,4 +170,20 @@ M.~Snir, S.~Otto, S.~Huss-Lederman, D.~Walker and J.~Dongarra,
{\em MPI: The Complete Reference. Volume 1 - The MPI Core}, second edition,
MIT Press, 1998.
%
\bibitem{DesPat:11}
D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson
{\em Design Patterns for Scientific Computations on Sparse Matrices},
HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011
\bibitem{CaFiRo:2014}
{ Cardellini, V.}, { Filippone, S.}, { and} { Rouson, D.} 2014,
Design patterns for sparse-matrix computations on hybrid {CPU/GPU}
platforms,
{\em Scientific Programming\/}~{\em 22,\/}~1, 1--19.
\bibitem{OurTechRep}
D.~Barbieri, V.~Cardellini, A.~Fanfarillo, S.~Filippone, Three storage formats
for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6, Universit\`a di
Roma Tor Vergata (February 2015).
\end{thebibliography}

@ -0,0 +1,244 @@
\subsection{CUDA-class extensions}
For computing with CUDA we define a dual memorization strategy in
which each variable on the CPU (``host'') side has a GPU (``device'')
side. When a GPU-type variable is initialized, the data contained is
(usually) the same on both sides. Each operator invoked on the
variable may change the data so that only the host side or the device
side are up-to-date.
Keeping track of the updates to data in the variables is essential: we want
to perform most computations on the GPU, but we cannot afford the time
needed to move data between the host memory and the device memory
because the bandwidth of the interconnection bus would become the main
bottleneck of the computation. Thus, each and every computational
routine in the library is built according to the following principles:
\begin{itemize}
\item If the data type being handled is {GPU}-enabled, make sure that
its device copy is up to date, perform any arithmetic operation on
the {GPU}, and if the data has been altered as a result, mark
the main-memory copy as outdated.
\item The main-memory copy is never updated unless this is requested
by the user either
\begin{description}
\item[explicitly] by invoking a synchronization method;
\item[implicitly] by invoking a method that involves other data items
that are not {GPU}-enabled, e.g., by assignment ov a vector to a
normal array.
\end{description}
\end{itemize}
In this way, data items are put on the {GPU} memory ``on demand'' and
remain there as long as ``normal'' computations are carried out.
As an example, the following call to a matrix-vector product
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
call psb_spmm(alpha,a,x,beta,y,desc_a,info)
\end{minted}
will transparently and automatically be performed on the {GPU} whenever
all three data inputs \fortinline|a|, \fortinline|x| and
\fortinline|y| are {GPU}-enabled. If a program makes many such calls
sequentially, then
\begin{itemize}
\item The first kernel invocation will find the data in main memory,
and will copy it to the {GPU} memory, thus incurring a significant
overhead; the result is however \emph{not} copied back, and
therefore:
\item Subsequent kernel invocations involving the same vector will
find the data on the {GPU} side so that they will run at full
speed.
\end{itemize}
For all invocations after the first the only data that will have to be
transferred to/from the main memory will be the scalars \fortinline|alpha|
and \fortinline|beta|, and the return code \fortinline|info|.
\begin{description}
\item[Vectors:] The data type \fortinline|psb_T_vect_gpu| provides a
GPU-enabled extension of the inner type \fortinline|psb_T_base_vect_type|,
and must be used together with the other inner matrix type to make
full use of the GPU computational capabilities;
\item[CSR:] The data type \fortinline|psb_T_csrg_sparse_mat| provides an
interface to the GPU version of CSR available in the NVIDIA CuSPARSE
library;
\item[HYB:] The data type \fortinline|psb_T_hybg_sparse_mat| provides an
interface to the HYB GPU storage available in the NVIDIA CuSPARSE
library. The internal structure is opaque, hence the host side is
just CSR; the HYB data format is only available up to CUDA version
10.
\item[ELL:] The data type \fortinline|psb_T_elg_sparse_mat| provides an
interface to the ELLPACK implementation from SPGPU;
\item[HLL:] The data type \fortinline|psb_T_hlg_sparse_mat| provides an
interface to the Hacked ELLPACK implementation from SPGPU;
\item[HDIA:] The data type \fortinline|psb_T_hdiag_sparse_mat| provides an
interface to the Hacked DIAgonals implementation from SPGPU;
\end{description}
\section{CUDA Environment Routines}
\label{sec:cudaenv}
\subsection*{psb\_cuda\_init --- Initializes PSBLAS-CUDA
environment}
\addcontentsline{toc}{subsection}{psb\_cuda\_init}
\begin{minted}[breaklines=true]{fortran}
call psb_cuda_init(ctxt [, device])
\end{minted}
This subroutine initializes the PSBLAS-CUDA environment.
\begin{description}
\item[Type:] Synchronous.
\item[\bf On Entry ]
\item[device] ID of CUDA device to attach to.\\
Scope: {\bf local}.\\
Type: {\bf optional}.\\
Intent: {\bf in}.\\
Specified as: an integer value. \
Default: use \fortinline|mod(iam,ngpu)| where \fortinline|iam| is the calling
process index and \fortinline|ngpu| is the total number of CUDA devices
available on the current node.
\end{description}
{\par\noindent\large\bfseries Notes}
\begin{enumerate}
\item A call to this routine must precede any other PSBLAS-CUDA call.
\end{enumerate}
\subsection*{psb\_cuda\_exit --- Exit from PSBLAS-CUDA
environment}
\addcontentsline{toc}{subsection}{psb\_cuda\_exit}
\begin{minted}[breaklines=true]{fortran}
call psb_cuda_exit(ctxt)
\end{minted}
This subroutine exits from the PSBLAS CUDA context.
\begin{description}
\item[Type:] Synchronous.
\item[\bf On Entry ]
\item[ctxt] the communication context identifying the virtual
parallel machine.\\
Scope: {\bf global}.\\
Type: {\bf required}.\\
Intent: {\bf in}.\\
Specified as: an integer variable.
\end{description}
\subsection*{psb\_cuda\_DeviceSync --- Synchronize CUDA device}
\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceSync}
\begin{minted}[breaklines=true]{fortran}
call psb_cuda_DeviceSync()
\end{minted}
This subroutine ensures that all previosly invoked kernels, i.e. all
invocation of CUDA-side code, have completed.
\subsection*{psb\_cuda\_getDeviceCount }
\addcontentsline{toc}{subsection}{psb\_cuda\_getDeviceCount}
\begin{minted}[breaklines=true]{fortran}
ngpus = psb_cuda_getDeviceCount()
\end{minted}
Get number of devices available on current computing node.
\subsection*{psb\_cuda\_getDevice }
\addcontentsline{toc}{subsection}{psb\_cuda\_getDevice}
\begin{minted}[breaklines=true]{fortran}
ngpus = psb_cuda_getDevice()
\end{minted}
Get device in use by current process.
\subsection*{psb\_cuda\_setDevice }
\addcontentsline{toc}{subsection}{psb\_cuda\_setDevice}
\begin{minted}[breaklines=true]{fortran}
info = psb_cuda_setDevice(dev)
\end{minted}
Set device to be used by current process.
\subsection*{psb\_cuda\_DeviceHasUVA }
\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceHasUVA}
\begin{minted}[breaklines=true]{fortran}
hasUva = psb_cuda_DeviceHasUVA()
\end{minted}
Returns true if device currently in use supports UVA (Unified Virtual Addressing).
\subsection*{psb\_cuda\_WarpSize }
\addcontentsline{toc}{subsection}{psb\_cuda\_WarpSize}
\begin{minted}[breaklines=true]{fortran}
nw = psb_cuda_WarpSize()
\end{minted}
Returns the warp size.
\subsection*{psb\_cuda\_MultiProcessors }
\addcontentsline{toc}{subsection}{psb\_cuda\_MultiProcessors}
\begin{minted}[breaklines=true]{fortran}
nmp = psb_cuda_MultiProcessors()
\end{minted}
Returns the number of multiprocessors in the CUDA device.
\subsection*{psb\_cuda\_MaxThreadsPerMP }
\addcontentsline{toc}{subsection}{psb\_cuda\_MaxThreadsPerMP}
\begin{minted}[breaklines=true]{fortran}
nt = psb_cuda_MaxThreadsPerMP()
\end{minted}
Returns the maximum number of threads per multiprocessor.
\subsection*{psb\_cuda\_MaxRegistersPerBlock }
\addcontentsline{toc}{subsection}{psb\_cuda\_MaxRegisterPerBlock}
\begin{minted}[breaklines=true]{fortran}
nr = psb_cuda_MaxRegistersPerBlock()
\end{minted}
Returns the maximum number of register per thread block.
\subsection*{psb\_cuda\_MemoryClockRate }
\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryClockRate}
\begin{minted}[breaklines=true]{fortran}
cl = psb_cuda_MemoryClockRate()
\end{minted}
Returns the memory clock rate in KHz, as an integer.
\subsection*{psb\_cuda\_MemoryBusWidth }
\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryBusWidth}
\begin{minted}[breaklines=true]{fortran}
nb = psb_cuda_MemoryBusWidth()
\end{minted}
Returns the memory bus width in bits.
\subsection*{psb\_cuda\_MemoryPeakBandwidth }
\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryPeakBandwidth}
\begin{minted}[breaklines=true]{fortran}
bw = psb_cuda_MemoryPeakBandwidth()
\end{minted}
Returns the peak memory bandwidth in MB/s (real double precision).

@ -0,0 +1,412 @@
\section{Extensions}\label{sec:ext-intro}
The EXT, CUDA and RSB subdirectories contains a set of extensions to the base
library. The extensions provide additional storage formats beyond the
ones already contained in the base library, as well as interfaces
to:
\begin{description}
\item[SPGPU] a CUDA library originally published as
\url{https://code.google.com/p/spgpu/} and now included in the
\verb|cuda| subdir, for computations on NVIDIA GPUs;
\item[LIBRSB] \url{http://sourceforge.net/projects/librsb/}, for
computations on multicore parallel machines.
\end{description}
The infrastructure laid out in the base library to allow for these
extensions is detailed in the references~\cite{DesPat:11,CaFiRo:2014,Sparse03};
the CUDA-specific data formats are described in~\cite{OurTechRep}.
\subsection{Using the extensions}
\label{sec:ext-appstruct}
A sample application using the PSBLAS extensions will contain the
following steps:
\begin{itemize}
\item \verb|USE| the appropriat modules (\verb|psb_ext_mod|,
\verb|psb_cuda_mod|);
\item Declare a \emph{mold} variable of the necessary type
(e.g. \verb|psb_d_ell_sparse_mat|, \verb|psb_d_hlg_sparse_mat|,
\verb|psb_d_vect_cuda|);
\item Pass the mold variable to the base library interface where
needed to ensure the appropriate dynamic type.
\end{itemize}
Suppose you want to use the CUDA-enabled ELLPACK data structure; you
would use a piece of code like this (and don't forget, you need
CUDA-side vectors along with the matrices):
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
program my_cuda_test
use psb_base_mod
use psb_util_mod
use psb_ext_mod
use psb_cuda_mod
type(psb_dspmat_type) :: a, agpu
type(psb_d_vect_type) :: x, xg, bg
real(psb_dpk_), allocatable :: xtmp(:)
type(psb_d_vect_cuda) :: vmold
type(psb_d_elg_sparse_mat) :: aelg
type(psb_ctxt_type) :: ctxt
integer :: iam, np
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
call psb_cuda_init(ctxt, iam)
! My own home-grown matrix generator
call gen_matrix(ctxt,idim,desc_a,a,x,info)
if (info /= 0) goto 9999
call a%cscnv(agpu,info,mold=aelg)
if (info /= 0) goto 9999
xtmp = x%get_vect()
call xg%bld(xtmp,mold=vmold)
call bg%bld(size(xtmp),mold=vmold)
! Do sparse MV
call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info)
9999 continue
if (info == 0) then
write(*,*) '42'
else
write(*,*) 'Something went wrong ',info
end if
call psb_cuda_exit()
call psb_exit(ctxt)
stop
end program my_cuda_test
\end{minted}
A full example of this strategy can be seen in the
\texttt{test/ext/kernel} and \texttt{test/\-cuda/\-kernel} subdirectories,
where we provide sample programs
to test the speed of the sparse matrix-vector product with the various
data structures included in the library.
\subsection{Extensions' Data Structures}
\label{sec:ext-datastruct}
%\ifthenelse{\boolean{mtc}}{\minitoc}{}
Access to the facilities provided by the EXT library is mainly
achieved through the data types that are provided within.
The data classes are derived from the base classes in PSBLAS, through
the Fortran~2003 mechanism of \emph{type extension}~\cite{MRC:11}.
The data classes are divided between the general purpose CPU
extensions, the GPU interfaces and the RSB interfaces.
In the description we will make use of the notation introduced in
Table~\ref{tab:notation}.
\begin{table}[ht]
\caption{Notation for parameters describing a sparse matrix}
\begin{center}
{\footnotesize
\begin{tabular}{ll}
\hline
Name & Description \\
\hline
M & Number of rows in matrix \\
N & Number of columns in matrix \\
NZ & Number of nonzeros in matrix \\
AVGNZR & Average number of nonzeros per row \\
MAXNZR & Maximum number of nonzeros per row \\
NDIAG & Numero of nonzero diagonals\\
AS & Coefficients array \\
IA & Row indices array \\
JA & Column indices array \\
IRP & Row start pointers array \\
JCP & Column start pointers array \\
NZR & Number of nonzeros per row array \\
OFFSET & Offset for diagonals \\
\hline
\end{tabular}
}
\end{center}
\label{tab:notation}
\end{table}
\begin{figure}[ht]
\centering
% \includegraphics[width=5.2cm]{figures/mat.eps}
\includegraphics[width=5.2cm]{figures/mat.pdf}
\caption{Example of sparse matrix}
\label{fig:dense}
\end{figure}
\subsection{CPU-class extensions}
\subsubsection*{ELLPACK}
The ELLPACK/ITPACK format (shown in Figure~\ref{fig:ell})
comprises two 2-dimensional arrays \verb|AS| and
\verb|JA| with \verb|M| rows and \verb|MAXNZR| columns, where
\verb|MAXNZR| is the maximum
number of nonzeros in any row~\cite{ELLPACK}.
Each row of the arrays \verb|AS| and \verb|JA| contains the
coefficients and column indices; rows shorter than
\verb|MAXNZR| are padded with zero coefficients and appropriate column
indices, e.g. the last valid one found in the same row.
\begin{figure}[ht]
\centering
% \includegraphics[width=8.2cm]{figures/ell.eps}
\includegraphics[width=8.2cm]{figures/ell.pdf}
\caption{ELLPACK compression of matrix in Figure~\ref{fig:dense}}
\label{fig:ell}
\end{figure}
\begin{algorithm}
\lstset{language=Fortran}
\small
\begin{lstlisting}
do i=1,n
t=0
do j=1,maxnzr
t = t + as(i,j)*x(ja(i,j))
end do
y(i) = t
end do
\end{lstlisting}
\caption{\label{alg:ell} Matrix-Vector product in ELL format}
\end{algorithm}
The matrix-vector product $y=Ax$ can be computed with the code shown in
Alg.~\ref{alg:ell}; it costs one memory write per outer iteration,
plus three memory reads and two floating-point operations per inner
iteration.
Unless all rows have exactly the same number of nonzeros, some of the
coefficients in the \verb|AS| array will be zeros; therefore this
data structure will have an overhead both in terms of memory space
and redundant operations (multiplications by zero). The overhead can
be acceptable if:
\begin{enumerate}
\item The maximum number of nonzeros per row is not much larger than
the average;
\item The regularity of the data structure allows for faster code,
e.g. by allowing vectorization, thereby offsetting the additional
storage requirements.
\end{enumerate}
In the extreme case where the input matrix has one full row, the
ELLPACK structure would require more memory than the normal 2D array
storage. The ELLPACK storage format was very popular in the vector
computing days; in modern CPUs it is not quite as popular, but it
is the basis for many GPU formats.
The relevant data type is \verb|psb_T_ell_sparse_mat|:
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat
!
! ITPACK/ELL format, extended.
!
integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:)
real(psb_dpk_), allocatable :: val(:,:)
contains
....
end type psb_d_ell_sparse_mat
\end{minted}
\subsubsection*{Hacked ELLPACK}
The \textit{hacked ELLPACK} (\textbf{HLL}) format
alleviates the main problem of the ELLPACK format, that is,
the amount of memory required by padding for sparse matrices in
which the maximum row length is larger than the average.
The number of elements allocated to padding is $[(m*maxNR) -
(m*avgNR) = m*(maxNR-avgNR)]$
for both \verb|AS| and \verb|JA| arrays,
where $m$ is equal to the number of rows of the matrix, $maxNR$ is the
maximum number of nonzero elements
in every row and $avgNR$ is the average number of nonzeros.
Therefore a single densely populated row can seriously affect the
total size of the allocation.
To limit this effect, in the HLL format we break the original matrix
into equally sized groups of rows (called \textit{hacks}), and then store
these groups as independent matrices in ELLPACK format.
The groups can be arranged selecting rows in an arbitrarily manner;
indeed, if the rows are sorted by decreasing number of nonzeros we
obtain essentially the JAgged Diagonals format.
If the rows are not in the original order, then an additional vector
\textit{rIdx} is required, storing the actual row index for each row
in the data structure.
The multiple ELLPACK-like buffers are stacked together inside a
single, one dimensional array;
an additional vector \textit{hackOffsets} is provided to keep track
of the individual submatrices.
All hacks have the same number of rows \textit{hackSize}; hence,
the \textit{hackOffsets} vector is an array of
$(m/hackSize)+1$ elements, each one pointing to the first index of a
submatrix inside the stacked \textit{cM}/\textit{rP} buffers, plus an
additional element pointing past the end of the last block, where the
next one would begin.
We thus have the property that
the elements of the $k$-th \textit{hack} are stored between \verb|hackOffsets[k]| and
\verb|hackOffsets[k+1]|, similarly to what happens in the CSR format.
\begin{figure}[ht]
\centering
% \includegraphics[width=8.2cm]{../figures/hll.eps}
\includegraphics[width=.72\textwidth]{../figures/hll.pdf}
\caption{Hacked ELLPACK compression of matrix in Figure~\ref{fig:dense}}
\label{fig:hll}
\end{figure}
With this data structure a very long row only affects one hack, and
therefore the additional memory is limited to the hack in which the
row appears.
The relevant data type is \verb|psb_T_hll_sparse_mat|:
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat
!
! HLL format. (Hacked ELL)
!
integer(psb_ipk_) :: hksz
integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:)
real(psb_dpk_), allocatable :: val(:)
contains
....
end type
\end{minted}
\subsubsection*{Diagonal storage}
The DIAgonal (DIA) format (shown in Figure~\ref{fig:dia})
has a 2-dimensional array \verb|AS| containing in each column the
coefficients along a diagonal of the matrix, and an integer array
\verb|OFFSET| that determines where each diagonal starts. The
diagonals in \verb|AS| are padded with zeros as necessary.
The code to compute the matrix-vector product $y=Ax$ is shown in Alg.~\ref{alg:dia};
it costs one memory read per outer iteration,
plus three memory reads, one memory write and two floating-point
operations per inner iteration. The accesses to \verb|AS| and
\verb|x| are in strict sequential order, therefore no indirect
addressing is required.
\begin{figure}[ht]
\centering
% \includegraphics[width=8.2cm]{figures/dia.eps}
\includegraphics[width=.72\textwidth]{figures/dia.pdf}
\caption{DIA compression of matrix in Figure~\ref{fig:dense}}
\label{fig:dia}
\end{figure}
\begin{algorithm}
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
do j=1,ndiag
if (offset(j) > 0) then
ir1 = 1; ir2 = m - offset(j);
else
ir1 = 1 - offset(j); ir2 = m;
end if
do i=ir1,ir2
y(i) = y(i) + alpha*as(i,j)*x(i+offset(j))
end do
end do
\end{minted}
\caption{\label{alg:dia} Matrix-Vector product in DIA format}
\end{algorithm}
The relevant data type is \verb|psb_T_dia_sparse_mat|:
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat
!
! DIA format, extended.
!
integer(psb_ipk_), allocatable :: offset(:)
integer(psb_ipk_) :: nzeros
real(psb_dpk_), allocatable :: data(:,:)
end type
\end{minted}
\subsubsection*{Hacked DIA}
Storage by DIAgonals is an attractive option for matrices whose
coefficients are located on a small set of diagonals, since they do
away with storing explicitly the indices and therefore reduce
significantly memory traffic. However, having a few coefficients
outside of the main set of diagonals may significantly increase the
amount of needed padding; moreover, while the DIA code is easily
vectorized, it does not necessarily make optimal use of the memory
hierarchy. While processing each diagonal we are updating entries in
the output vector \verb|y|, which is then accessed multiple times; if
the vector \verb|y| is too large to remain in the cache memory, the
associated cache miss penalty is paid multiple times.
The \textit{hacked DIA} (\textbf{HDIA}) format was designed to contain
the amount of padding, by breaking the original matrix
into equally sized groups of rows (\textit{hacks}), and then storing
these groups as independent matrices in DIA format. This approach is
similar to that of HLL, and requires using an offset vector for each
submatrix. Again, similarly to HLL, the various submatrices are
stacked inside a linear array to improve memory management. The fact
that the matrix is accessed in slices helps in reducing cache misses,
especially regarding accesses to the %output
vector \verb|y|.
An additional vector \textit{hackOffsets} is provided to complete
the matrix format; given that \textit{hackSize} is the number of rows of each hack,
the \textit{hackOffsets} vector is made by an array of
$(m/hackSize)+1$ elements, pointing to the first diagonal offset of a
submatrix inside the stacked \textit{offsets} buffers, plus an
additional element equal to the number of nonzero diagonals in the whole matrix.
We thus have the property that
the number of diagonals of the $k$-th \textit{hack} is given by
\textit{hackOffsets[k+1] - hackOffsets[k]}.
\begin{figure}[ht]
\centering
% \includegraphics[width=8.2cm]{../figures/hdia.eps}
\includegraphics[width=.72\textwidth]{../figures/hdia.pdf}
\caption{Hacked DIA compression of matrix in Figure~\ref{fig:dense}}
\label{fig:hdia}
\end{figure}
The relevant data type is \verb|psb_T_hdia_sparse_mat|:
\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
type pm
real(psb_dpk_), allocatable :: data(:,:)
end type pm
type po
integer(psb_ipk_), allocatable :: off(:)
end type po
type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat
!
! HDIA format, extended.
!
type(pm), allocatable :: hdia(:)
type(po), allocatable :: offset(:)
integer(psb_ipk_) :: nblocks, nzeros
integer(psb_ipk_) :: hack = 64
integer(psb_long_int_k_) :: dim=0
contains
....
end type
\end{minted}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

@ -0,0 +1 @@
tmp/userguide.pdf

@ -17,6 +17,7 @@
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}{Corollary}
\usepackage{listings}
\usepackage{algorithm2e}
\usepackage{minted}
\usemintedstyle{friendly}
\definecolor{bg}{rgb}{0.95,0.95,0.95}
@ -36,7 +37,7 @@
\relax
\pdfcompresslevel=0 %-- 0 = none, 9 = best
\pdfinfo{ %-- Info dictionary of PDF output /Author (Alfredo Buttari)
/Title (Parallel Sparse BLAS V. 3.8.0)
/Title (Parallel Sparse BLAS V. 3.9.0)
/Subject (Parallel Sparse Basic Linear Algebra Subroutines)
/Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners)
/Creator (pdfLaTeX)
@ -99,7 +100,7 @@
\begin{document}
{
\pdfbookmark{PSBLAS-v3.8.0 User's Guide}{title}
\pdfbookmark{PSBLAS-v3.9.0 User's Guide}{title}
\lstset{language=Fortran}
\newlength{\centeroffset}
\setlength{\centeroffset}{-0.5\oddsidemargin}
@ -109,7 +110,7 @@
\vspace*{\stretch{1}}
\noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth}
\flushright
{\Huge\bfseries PSBLAS 3.8.0 User's guide
{\Huge\bfseries PSBLAS 3.9.0 User's guide
}
\noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex]
\hfill\emph{\Large A reference guide for the Parallel Sparse BLAS library}
@ -130,7 +131,7 @@
{\bfseries
by Salvatore Filippone\\
and Alfredo Buttari}\\
May 1st, 2022
Aug 1st, 2024
\end{minipage}}
}
%\addtolength{\textwidth}{\centeroffset}
@ -159,7 +160,8 @@ May 1st, 2022
\include{util}
\include{precs}
\include{methods}
\include{ext-intro}
\include{cuda}
\cleardoublepage
\input{biblio}

@ -17,6 +17,9 @@
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}{Corollary}
\usepackage{listings}
\usepackage{algorithm2e}
\usepackage{minted}
\usemintedstyle{friendly}
\usepackage{microtype}
\ifpdf
\newmintinline[fortinline]{fortran}{}
@ -94,9 +97,9 @@
Alfredo Buttari } \\
%\\[10ex]
%\today
Software version: 3.8.0\\
Software version: 3.9.0\\
%\today
May 1st, 2022
Aug 1st, 2024
\cleardoublepage
\begingroup
\renewcommand*{\thepage}{toc}
@ -120,7 +123,8 @@ May 1st, 2022
\include{util}
\include{precs}
\include{methods}
\include{ext-intro}
\include{cuda}
\cleardoublepage
\input{biblio}

@ -0,0 +1,53 @@
include ../Make.inc
#
# Libraries used
#
PSBLIBDIR=$(PSBLASDIR)/lib/
PSBINCDIR=$(PSBLASDIR)/include
PSBMODDIR=$(PSBLASDIR)/modules
LIBDIR=../lib
INCDIR=../include
MODDIR=../modules
PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such
#
#CCOPT= -g
FINCLUDES=$(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). $(LIBRSB_INCLUDES) $(LIBRSB_DEFINES)
CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR)
LIBNAME=libpsb_rsb.a
FOBJS= rsb_mod.o psb_d_rsb_mat_mod.o \
psb_rsb_penv_mod.o psb_rsb_mod.o
COBJS= rsb_int.o
OBJS=$(COBJS) $(FOBJS)
lib: objs ilib
/bin/cp -p $(LIBNAME) $(LIBDIR)
objs: $(OBJS) iobjs
/bin/cp -p *$(.mod) $(MODDIR)
iobjs:
$(MAKE) -C impl objs
ilib: iobjs
$(MAKE) -C impl lib LIBNAME=$(LIBNAME)
clean: cclean iclean
/bin/rm -f $(FOBJS) *$(.mod) *.a
cclean:
/bin/rm -f $(COBJS)
iclean:
cd impl && $(MAKE) clean
verycleanlib:
(cd ../..; make veryclean)

@ -0,0 +1,30 @@
include ../../Make.inc
PSBLIBDIR=$(PSBLASDIR)/lib/
PSBINCDIR=$(PSBLASDIR)/include
PSBMODDIR=$(PSBLASDIR)/modules
LIBDIR=../../lib
INCDIR=../../include
MODDIR=../../modules
PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such
#
#CCOPT= -g
FINCLUDES=$(FMFLAG).. $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_INCLUDES) $(FIFLAG).. $(LIBRSB_DEFINES)
CINCLUDES=
LIBNAME=libpsb_rsb.a
OBJS= \
psb_d_cp_rsb_from_coo.o \
psb_d_mv_rsb_from_coo.o \
psb_d_cp_rsb_to_coo.o psb_d_rsb_csmv.o
objs: $(OBJS)
lib: objs
ar cur ../$(LIBNAME) $(OBJS)
clean:
/bin/rm -f $(OBJS)

@ -0,0 +1,78 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_d_cp_rsb_from_coo(a,b,info)
use psb_base_mod
use rsb_mod
use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_from_coo
implicit none
class(psb_d_rsb_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
!locals
type(psb_d_coo_sparse_mat) :: tmp
Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc
integer(psb_ipk_) :: nzm, ir, ic, k ,bs
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name
info = psb_success_
#ifdef HAVE_RSB
! This is to have fix_coo called behind the scenes
call b%cp_to_coo(tmp,info)
call tmp%fix(info)
if (info /= psb_success_) return
nr = tmp%get_nrows()
nc = tmp%get_ncols()
nza = tmp%get_nzeros()
! If it is sorted then we can lessen memory impact
a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat
bs = 1!RSB_DEFAULT_BLOCKING
info = Rsb_from_coo(a%rsbMat,b%val,b%ia,b%ja,nza,nr,nc,bs,bs)
call tmp%free()
#endif
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_d_cp_rsb_from_coo

@ -0,0 +1,77 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_d_cp_rsb_to_coo(a,b,info)
use psb_base_mod
use rsb
use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_to_coo
implicit none
class(psb_d_rsb_sparse_mat), intent(in) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_), pointer :: val_point(:)
type(c_ptr) :: t_p,s_p
!locals
integer(psb_ipk_) :: i, j, k,nr,nza,nc
info = psb_success_
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
call b%allocate(nr,nc,nza)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
allocate(val_point(nza))
t_p = c_loc(val_point(1))
info = rsb_mtx_get_coo(a%rsbMat, t_p, b%ia, b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE)
!info = rsb_mtx_switch_to_coo(a%rsbMat,t_p,b%ia,b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE)
k = rsb_perror(s_p,info)
do i=1,nza
b%val(i)=val_point(i)
enddo
deallocate(val_point)
call b%set_nzeros(nza)
call b%fix(info)
end subroutine psb_d_cp_rsb_to_coo

@ -0,0 +1,114 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_d_mv_rsb_from_coo(a,b,info)
use psb_base_mod
use psb_d_rsb_mat_mod, psb_protect_name => psb_d_mv_rsb_from_coo
implicit none
class(psb_d_rsb_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
!locals
Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic
info = psb_success_
call b%fix(info)
if (info /= psb_success_) return
nr = b%get_nrows()
nc = b%get_ncols()
nza = b%get_nzeros()
! if (b%is_sorted()) then
! ! If it is sorted then we can lessen memory impact
! a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
! ! First compute the number of nonzeros in each row.
! call psb_realloc(nr,a%irn,info)
! if (info /= 0) goto 9999
! a%irn = 0
! do i=1, nza
! a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1
! end do
! nzm = 0
! do i=1, nr
! nzm = max(nzm,a%irn(i))
! a%irn(i) = 0
! end do
! ! Second: copy the column indices.
! call psb_realloc(nr,a%idiag,info)
! if (info == 0) call psb_realloc(nr,nzm,a%ja,info)
! if (info /= 0) goto 9999
! do i=1, nza
! ir = b%ia(i)
! ic = b%ja(i)
! j = a%irn(ir) + 1
! a%ja(ir,j) = ic
! a%irn(ir) = j
! end do
! ! Third copy the other stuff
! deallocate(b%ia,b%ja,stat=info)
! if (info == 0) call psb_realloc(nr,a%idiag,info)
! if (info == 0) call psb_realloc(nr,nzm,a%val,info)
! if (info /= 0) goto 9999
! k = 0
! do i=1, nr
! a%idiag(i) = 0
! do j=1, a%irn(i)
! k = k + 1
! a%val(i,j) = b%val(k)
! if (i==a%ja(i,j)) a%idiag(i)=j
! end do
! do j=a%irn(i)+1, nzm
! a%ja(i,j) = i
! a%val(i,j) = dzero
! end do
! end do
! else
! If b is not sorted, the only way is to copy.
call a%cp_from_coo(b,info)
if (info /= 0) goto 9999
! end if
call b%free()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_d_mv_rsb_from_coo

@ -0,0 +1,138 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use rsb_mod
use psb_d_rsb_mat_mod, psb_protect_name => psb_d_rsb_csmv
implicit none
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer, intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer :: i,j,k,m,n, nnz, ir, jc
real(psb_dpk_) :: acc
type(c_ptr) :: gpX, gpY
logical :: tra
Integer :: err_act
character(len=20) :: name='d_rsb_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
#ifdef HAVE_RSB
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
! if (tra) then
! call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans)
! else
info = Rsb_spmv(a%rsbMat,x,alpha,y,beta,trans_)
if (info /= 0) goto 9999
! if (info == 0) &
! & info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_double)
! if (alpha /= dzero) then
! if (info == 0) &
! & info = writeMultiVecDevice(gpX,x)
! end if
! if (info == 0) &
! & info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_double)
! if (beta /= dzero) then
! if (info == 0) &
! & info = writeMultiVecDevice(gpY,y)
! end if
! if (info == 0) &
! & info = spmvHllDevice(a%deviceMat,alpha,gpX,beta,gpY)
! if (info == 0) &
! & info = readMultiVecDevice(gpY,y)
! if (info /= 0) goto 9999
! call freeMultiVecDevice(gpX)
! call freeMultiVecDevice(gpY)
! endif
! #else
! call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans)
! write(*,*) y
#endif
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error()
return
end if
return
end subroutine psb_d_rsb_csmv

@ -0,0 +1,487 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module psb_d_rsb_mat_mod
use psb_d_base_mat_mod
use iso_c_binding
type, extends(psb_d_base_sparse_mat) :: psb_d_rsb_sparse_mat
type(c_ptr) :: rsbMat = c_null_ptr
contains
! procedure, pass(a) :: get_size => d_rsb_get_size
procedure, pass(a) :: get_nzeros => d_rsb_get_nzeros
procedure, nopass :: get_fmt => d_rsb_get_fmt
procedure, pass(a) :: sizeof => d_rsb_sizeof
! procedure, pass(a) :: csmm => psb_d_rsb_csmm
procedure, pass(a) :: csmv => psb_d_rsb_csmv
! procedure, pass(a) :: inner_cssm => psb_d_rsb_cssm
! procedure, pass(a) :: inner_cssv => psb_d_rsb_cssv
! procedure, pass(a) :: scals => psb_d_rsb_scals
! procedure, pass(a) :: scalv => psb_d_rsb_scal
! procedure, pass(a) :: maxval => psb_d_rsb_maxval
! procedure, pass(a) :: csnmi => psb_d_rsb_csnmi
! procedure, pass(a) :: csnm1 => psb_d_rsb_csnm1
! procedure, pass(a) :: rowsum => psb_d_rsb_rowsum
! procedure, pass(a) :: arwsum => psb_d_rsb_arwsum
! procedure, pass(a) :: colsum => psb_d_rsb_colsum
! procedure, pass(a) :: aclsum => psb_d_rsb_aclsum
! procedure, pass(a) :: reallocate_nz => psb_d_rsb_reallocate_nz
! procedure, pass(a) :: allocate_mnnz => psb_d_rsb_allocate_mnnz
procedure, pass(a) :: cp_to_coo => psb_d_cp_rsb_to_coo
procedure, pass(a) :: cp_from_coo => psb_d_cp_rsb_from_coo
! procedure, pass(a) :: cp_to_fmt => psb_d_cp_rsb_to_fmt
! procedure, pass(a) :: cp_from_fmt => psb_d_cp_rsb_from_fmt
! procedure, pass(a) :: mv_to_coo => psb_d_mv_rsb_to_coo
procedure, pass(a) :: mv_from_coo => psb_d_mv_rsb_from_coo
! procedure, pass(a) :: mv_to_fmt => psb_d_mv_rsb_to_fmt
! procedure, pass(a) :: mv_from_fmt => psb_d_mv_rsb_from_fmt
! procedure, pass(a) :: csput => psb_d_rsb_csput
! procedure, pass(a) :: get_diag => psb_d_rsb_get_diag
! procedure, pass(a) :: csgetptn => psb_d_rsb_csgetptn
! procedure, pass(a) :: csgetrow => psb_d_rsb_csgetrow
! procedure, pass(a) :: get_nz_row => d_rsb_get_nz_row
! procedure, pass(a) :: reinit => psb_d_rsb_reinit
! procedure, pass(a) :: trim => psb_d_rsb_trim
! procedure, pass(a) :: print => psb_d_rsb_print
procedure, pass(a) :: free => d_rsb_free
! procedure, pass(a) :: mold => psb_d_rsb_mold
end type psb_d_rsb_sparse_mat
private :: d_rsb_get_nzeros, d_rsb_free, d_rsb_get_fmt, &
& d_rsb_get_size, d_rsb_sizeof, d_rsb_get_nz_row
interface
subroutine psb_d_rsb_reallocate_nz(nz,a)
import :: psb_d_rsb_sparse_mat, psb_ipk_
integer(psb_ipk_), intent(in) :: nz
class(psb_d_rsb_sparse_mat), intent(inout) :: a
end subroutine psb_d_rsb_reallocate_nz
end interface
interface
subroutine psb_d_rsb_reinit(a,clear)
import :: psb_d_rsb_sparse_mat
class(psb_d_rsb_sparse_mat), intent(inout) :: a
logical, intent(in), optional :: clear
end subroutine psb_d_rsb_reinit
end interface
interface
subroutine psb_d_rsb_trim(a)
import :: psb_d_rsb_sparse_mat
class(psb_d_rsb_sparse_mat), intent(inout) :: a
end subroutine psb_d_rsb_trim
end interface
interface
subroutine psb_d_rsb_mold(a,b,info)
import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_rsb_mold
end interface
interface
subroutine psb_d_rsb_allocate_mnnz(m,n,a,nz)
import :: psb_d_rsb_sparse_mat, psb_ipk_
integer(psb_ipk_), intent(in) :: m,n
class(psb_d_rsb_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
end subroutine psb_d_rsb_allocate_mnnz
end interface
interface
subroutine psb_d_rsb_print(iout,a,iv,head,ivr,ivc)
import :: psb_d_rsb_sparse_mat, psb_ipk_
integer(psb_ipk_), intent(in) :: iout
class(psb_d_rsb_sparse_mat), intent(in) :: a
integer(psb_ipk_), intent(in), optional :: iv(:)
character(len=*), optional :: head
integer(psb_ipk_), intent(in), optional :: ivr(:), ivc(:)
end subroutine psb_d_rsb_print
end interface
interface
subroutine psb_d_cp_rsb_to_coo(a,b,info)
import :: psb_d_coo_sparse_mat, psb_d_rsb_sparse_mat, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_cp_rsb_to_coo
end interface
interface
subroutine psb_d_cp_rsb_from_coo(a,b,info)
import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_cp_rsb_from_coo
end interface
interface
subroutine psb_d_cp_rsb_to_fmt(a,b,info)
import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_cp_rsb_to_fmt
end interface
interface
subroutine psb_d_cp_rsb_from_fmt(a,b,info)
import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_cp_rsb_from_fmt
end interface
interface
subroutine psb_d_mv_rsb_to_coo(a,b,info)
import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_mv_rsb_to_coo
end interface
interface
subroutine psb_d_mv_rsb_from_coo(a,b,info)
import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_mv_rsb_from_coo
end interface
interface
subroutine psb_d_mv_rsb_to_fmt(a,b,info)
import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_mv_rsb_to_fmt
end interface
interface
subroutine psb_d_mv_rsb_from_fmt(a,b,info)
import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_mv_rsb_from_fmt
end interface
interface
subroutine psb_d_rsb_csput(nz,ia,ja,val,a,imin,imax,jmin,jmax,info,gtl)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: val(:)
integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),&
& imin,imax,jmin,jmax
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: gtl(:)
end subroutine psb_d_rsb_csput
end interface
interface
subroutine psb_d_rsb_csgetptn(imin,imax,a,nz,ia,ja,info,&
& jmin,jmax,iren,append,nzin,rscale,cscale)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
integer(psb_ipk_), intent(in) :: imin,imax
integer(psb_ipk_), intent(out) :: nz
integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:)
integer(psb_ipk_),intent(out) :: info
logical, intent(in), optional :: append
integer(psb_ipk_), intent(in), optional :: iren(:)
integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin
logical, intent(in), optional :: rscale,cscale
end subroutine psb_d_rsb_csgetptn
end interface
interface
subroutine psb_d_rsb_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
& jmin,jmax,iren,append,nzin,rscale,cscale)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
integer(psb_ipk_), intent(in) :: imin,imax
integer(psb_ipk_), intent(out) :: nz
integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:)
real(psb_dpk_), allocatable, intent(inout) :: val(:)
integer(psb_ipk_),intent(out) :: info
logical, intent(in), optional :: append
integer(psb_ipk_), intent(in), optional :: iren(:)
integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin
logical, intent(in), optional :: rscale,cscale
end subroutine psb_d_rsb_csgetrow
end interface
interface
subroutine psb_d_rsb_csgetblk(imin,imax,a,b,info,&
& jmin,jmax,iren,append,rscale,cscale)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(in) :: imin,imax
integer(psb_ipk_),intent(out) :: info
logical, intent(in), optional :: append
integer(psb_ipk_), intent(in), optional :: iren(:)
integer(psb_ipk_), intent(in), optional :: jmin,jmax
logical, intent(in), optional :: rscale,cscale
end subroutine psb_d_rsb_csgetblk
end interface
interface
subroutine psb_d_rsb_cssv(alpha,a,x,beta,y,info,trans)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_rsb_cssv
subroutine psb_d_rsb_cssm(alpha,a,x,beta,y,info,trans)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
real(psb_dpk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_rsb_cssm
end interface
interface
subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_rsb_csmv
subroutine psb_d_rsb_csmm(alpha,a,x,beta,y,info,trans)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
real(psb_dpk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_rsb_csmm
end interface
interface
function psb_d_rsb_maxval(a) result(res)
import :: psb_d_rsb_sparse_mat, psb_dpk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_) :: res
end function psb_d_rsb_maxval
end interface
interface
function psb_d_rsb_csnmi(a) result(res)
import :: psb_d_rsb_sparse_mat, psb_dpk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_) :: res
end function psb_d_rsb_csnmi
end interface
interface
function psb_d_rsb_csnm1(a) result(res)
import :: psb_d_rsb_sparse_mat, psb_dpk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_) :: res
end function psb_d_rsb_csnm1
end interface
interface
subroutine psb_d_rsb_rowsum(d,a)
import :: psb_d_rsb_sparse_mat, psb_dpk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(out) :: d(:)
end subroutine psb_d_rsb_rowsum
end interface
interface
subroutine psb_d_rsb_arwsum(d,a)
import :: psb_d_rsb_sparse_mat, psb_dpk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(out) :: d(:)
end subroutine psb_d_rsb_arwsum
end interface
interface
subroutine psb_d_rsb_colsum(d,a)
import :: psb_d_rsb_sparse_mat, psb_dpk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(out) :: d(:)
end subroutine psb_d_rsb_colsum
end interface
interface
subroutine psb_d_rsb_aclsum(d,a)
import :: psb_d_rsb_sparse_mat, psb_dpk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(out) :: d(:)
end subroutine psb_d_rsb_aclsum
end interface
interface
subroutine psb_d_rsb_get_diag(a,d,info)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(out) :: d(:)
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_rsb_get_diag
end interface
interface
subroutine psb_d_rsb_scal(d,a,info,side)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
end subroutine psb_d_rsb_scal
end interface
interface
subroutine psb_d_rsb_scals(d,a,info)
import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_rsb_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_rsb_scals
end interface
contains
! == ===================================
!
!
!
! Getters
!
!
!
!
!
! == ===================================
function d_rsb_sizeof(a) result(res)
implicit none
class(psb_d_rsb_sparse_mat), intent(in) :: a
integer(psb_epk_) :: res
end function d_rsb_sizeof
function d_rsb_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'RSB'
end function d_rsb_get_fmt
function d_rsb_get_nzeros(a) result(res)
use rsb_mod
implicit none
class(psb_d_rsb_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: res
res = Rsb_get_nzeros(a%rsbMat)
end function d_rsb_get_nzeros
function d_rsb_get_size(a) result(res)
implicit none
class(psb_d_rsb_sparse_mat), intent(in) :: a
integer(psb_ipk_) :: res
end function d_rsb_get_size
function d_rsb_get_nz_row(idx,a) result(res)
implicit none
class(psb_d_rsb_sparse_mat), intent(in) :: a
integer(psb_ipk_), intent(in) :: idx
integer(psb_ipk_) :: res
res = 0
end function d_rsb_get_nz_row
! == ===================================
!
!
!
! Data management
!
!
!
!
!
! == ===================================
subroutine d_rsb_free(a)
use rsb_mod
implicit none
class(psb_d_rsb_sparse_mat), intent(inout) :: a
call freeRsbMat(a%rsbMat)
call a%set_null()
call a%set_nrows(0)
call a%set_ncols(0)
return
end subroutine d_rsb_free
end module psb_d_rsb_mat_mod

@ -0,0 +1,50 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module psb_rsb_mod
use psb_const_mod
use rsb_mod
use psb_rsb_penv_mod
! use psb_d_ell_mat_mod
! use psb_s_ell_mat_mod
! use psb_z_ell_mat_mod
! use psb_c_ell_mat_mod
! use psb_s_hll_mat_mod
! use psb_d_hll_mat_mod
! use psb_c_hll_mat_mod
! use psb_z_hll_mat_mod
! use psb_d_dia_mat_mod
! use psb_d_hdia_mat_mod
use psb_d_rsb_mat_mod
end module psb_rsb_mod

@ -0,0 +1,99 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module psb_rsb_penv_mod
use psb_const_mod
use psb_penv_mod
!use psi_comm_buffers_mod, only : psb_buffer_queue
use iso_c_binding
! interface psb_rsb_init
! module procedure psb_rsb_init
! end interface
#if defined(HAVE_RSB)
interface
function psb_C_rsb_init() &
& result(res) bind(c,name='rsbInit')
use iso_c_binding
integer(c_int) :: res
end function psb_C_rsb_init
end interface
interface
function psb_C_rsb_exit() &
& result(res) bind(c,name='rsbExit')
use iso_c_binding
integer(c_int) :: res
end function psb_C_rsb_exit
end interface
#endif
contains
! !!!!!!!!!!!!!!!!!!!!!!
!
! Environment handling
!
! !!!!!!!!!!!!!!!!!!!!!!
subroutine psb_rsb_init()
use psb_penv_mod
use psb_const_mod
use psb_error_mod
! type(psb_ctxt_type), intent(in) :: ctxt
! integer, intent(in), optional :: dev
integer :: info
#if defined (HAVE_RSB)
info = psb_C_rsb_init()
if (info/=0) write(*,*) 'error during rsb_init'
#endif
end subroutine psb_rsb_init
subroutine psb_rsb_exit()
use psb_penv_mod
use psb_const_mod
use psb_error_mod
! type(psb_ctxt_type), intent(in) :: ctxt
! integer, intent(in), optional :: dev
integer :: info
#if defined (HAVE_RSB)
info = psb_C_rsb_exit()
if (info/=0) write(*,*) 'error during rsb_exit'
#endif
end subroutine psb_rsb_exit
end module psb_rsb_penv_mod

@ -0,0 +1,110 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include <sys/time.h>
#if defined(HAVE_RSB)
#include "rsb.h"
#include "rsb_int.h"
int rsbInit()
{
rsb_err_t errval = RSB_ERR_NO_ERROR;
if((errval = rsb_lib_init(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR)
{
printf("Error initializing the library!\n");
return 1;
}
return 0;
}
int rsbExit()
{
rsb_err_t errval = RSB_ERR_NO_ERROR;
if((errval = rsb_lib_exit(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR)
{
printf("Error finalizing the library!\n");
return 1;
}
return 0;
}
int Rsb_double_from_coo(void **rsbMat, double *va, int *ia,int *ja,int nnz,int nr,
int nc, int br, int bc)
{
int i=0;
rsb_err_t errval = RSB_ERR_NO_ERROR;
*rsbMat = rsb_mtx_alloc_from_coo_const(va,ia,ja,nnz,RSB_NUMERICAL_TYPE_DOUBLE,nr,nc,br,bc,RSB_FLAG_FORTRAN_INDICES_INTERFACE,&errval);
if((!*rsbMat) || (errval != RSB_ERR_NO_ERROR))
{
printf("Error while allocating the matrix!\n");
return 1;
}
return 0;
}
//X is the input and y is the output
int Rsb_double_spmv(void *rsbMat, double *x, double alfa, double *y, double beta,char trans)
{
rsb_err_t errval = RSB_ERR_NO_ERROR;
if(trans=='N')
errval = rsb_spmv(RSB_TRANSPOSITION_N,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1);
else
errval = rsb_spmv(RSB_TRANSPOSITION_T,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1);
if(errval != RSB_ERR_NO_ERROR)
{
printf("Error performing a multiplication!\n");
return 1;
}
return 0;
}
//Should it return a long instead of integer?
int Rsb_getNZeros(void *rsbMat)
{
int res = 0;
rsb_mtx_get_info((struct rsb_mtx_t *)rsbMat,RSB_MIF_MATRIX_NNZ__TO__RSB_NNZ_INDEX_T,(void *)&res);
return res;
}
void freeRsbMat(void *rsbMat)
{
rsb_mtx_free(rsbMat);
}
#endif

@ -0,0 +1,2 @@
int Rsb_double_from_coo(void **rsbMat,double *va, int *ia,int *ja,int nnz,int nr,
int nc, int br, int bc);

@ -0,0 +1,235 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module rsb_mod
use rsb
use iso_c_binding
#ifdef HAVE_RSB
interface Rsb_from_coo
function Rsb_double_from_coo(rsbMat,va,ia,ja,nnz,nr,nc,br,bc) &
& result(res) bind(c,name='Rsb_double_from_coo')
use iso_c_binding
integer(c_int) :: res
type(c_ptr) :: rsbMat
real(c_double) :: va(*)
integer(c_int) :: ia(*),ja(*)
integer(c_int),value :: nnz,nr,nc,br,bc
end function Rsb_double_from_coo
end interface Rsb_from_coo
interface
function Rsb_get_nzeros(rsbMat) &
& result(res) bind(c,name='Rsb_getNZeros')
use iso_c_binding
integer(c_int) :: res
type(c_ptr),value :: rsbMat
end function Rsb_get_nzeros
end interface
interface Rsb_spmv
function Rsb_double_spmv(rsbMat,x,alfa,y,beta,trans) &
& result(res) bind(c,name='Rsb_double_spmv')
use iso_c_binding
integer(c_int) :: res
type(c_ptr),value :: rsbMat
real(c_double) :: x(*),y(*)
real(c_double),value :: alfa,beta
character(c_char),value :: trans
end function Rsb_double_spmv
end interface Rsb_spmv
interface
subroutine freeRsbMat(rsbMat) &
& bind(c,name='freeRsbMat')
use iso_c_binding
type(c_ptr), value :: rsbMat
end subroutine freeRsbMat
end interface
! interface writeEllDevice
! function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn) &
! & result(res) bind(c,name='writeEllDeviceFloat')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat
! integer(c_int), value :: ldj
! real(c_float) :: val(ldj,*)
! integer(c_int) :: ja(ldj,*),irn(*)
! end function writeEllDeviceFloat
! function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn) &
! & result(res) bind(c,name='writeEllDeviceDouble')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat
! integer(c_int), value :: ldj
! real(c_double) :: val(ldj,*)
! integer(c_int) :: ja(ldj,*),irn(*)
! end function writeEllDeviceDouble
! function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) &
! & result(res) bind(c,name='writeEllDeviceFloatComplex')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat
! integer(c_int), value :: ldj
! complex(c_float_complex) :: val(ldj,*)
! integer(c_int) :: ja(ldj,*),irn(*)
! end function writeEllDeviceFloatComplex
! function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) &
! & result(res) bind(c,name='writeEllDeviceDoubleComplex')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat
! integer(c_int), value :: ldj
! complex(c_double_complex) :: val(ldj,*)
! integer(c_int) :: ja(ldj,*),irn(*)
! end function writeEllDeviceDoubleComplex
! end interface writeEllDevice
! interface readEllDevice
! function readEllDeviceFloat(deviceMat,val,ja,ldj,irn) &
! & result(res) bind(c,name='readEllDeviceFloat')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat
! integer(c_int), value :: ldj
! real(c_float) :: val(ldj,*)
! integer(c_int) :: ja(ldj,*),irn(*)
! end function readEllDeviceFloat
! function readEllDeviceDouble(deviceMat,val,ja,ldj,irn) &
! & result(res) bind(c,name='readEllDeviceDouble')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat
! integer(c_int), value :: ldj
! real(c_double) :: val(ldj,*)
! integer(c_int) :: ja(ldj,*),irn(*)
! end function readEllDeviceDouble
! function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) &
! & result(res) bind(c,name='readEllDeviceFloatComplex')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat
! integer(c_int), value :: ldj
! complex(c_float_complex) :: val(ldj,*)
! integer(c_int) :: ja(ldj,*),irn(*)
! end function readEllDeviceFloatComplex
! function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) &
! & result(res) bind(c,name='readEllDeviceDoubleComplex')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat
! integer(c_int), value :: ldj
! complex(c_double_complex) :: val(ldj,*)
! integer(c_int) :: ja(ldj,*),irn(*)
! end function readEllDeviceDoubleComplex
! end interface readEllDevice
! interface
! subroutine resetEllTimer() bind(c,name='resetEllTimer')
! use iso_c_binding
! end subroutine resetEllTimer
! end interface
! interface
! function getEllTimer() &
! & bind(c,name='getEllTimer') result(res)
! use iso_c_binding
! real(c_double) :: res
! end function getEllTimer
! end interface
! interface
! function getEllDevicePitch(deviceMat) &
! & bind(c,name='getEllDevicePitch') result(res)
! use iso_c_binding
! type(c_ptr), value :: deviceMat
! integer(c_int) :: res
! end function getEllDevicePitch
! end interface
! interface
! function getEllDeviceMaxRowSize(deviceMat) &
! & bind(c,name='getEllDeviceMaxRowSize') result(res)
! use iso_c_binding
! type(c_ptr), value :: deviceMat
! integer(c_int) :: res
! end function getEllDeviceMaxRowSize
! end interface
! interface spmvEllDevice
! function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) &
! & result(res) bind(c,name='spmvEllDeviceFloat')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat, x, y
! real(c_float),value :: alpha, beta
! end function spmvEllDeviceFloat
! function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) &
! & result(res) bind(c,name='spmvEllDeviceDouble')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat, x, y
! real(c_double),value :: alpha, beta
! end function spmvEllDeviceDouble
! function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
! & result(res) bind(c,name='spmvEllDeviceFloatComplex')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat, x, y
! complex(c_float_complex),value :: alpha, beta
! end function spmvEllDeviceFloatComplex
! function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
! & result(res) bind(c,name='spmvEllDeviceDoubleComplex')
! use iso_c_binding
! integer(c_int) :: res
! type(c_ptr), value :: deviceMat, x, y
! complex(c_double_complex),value :: alpha, beta
! end function spmvEllDeviceDoubleComplex
! end interface spmvEllDevice
#endif
end module rsb_mod
Loading…
Cancel
Save