Updated docs

7 months ago · 681ea2fff7
parent 9f2b8a2623
commit 681ea2fff7
29 changed files with 25661 additions and 16733 deletions
--- a/config/pac.m4
+++ b/config/pac.m4
@ -2018,6 +2018,75 @@ CPPFLAGS="$SAVE_CPPFLAGS";
 ])dnl 


+dnl @synopsis PAC_ARG_WITH_LIBRSB
+dnl
+dnl Test for --with-librsb="pathname".
+dnl 
+dnl Defines the path to LIBRSB build dir.
+dnl
+dnl note: Renamed after PAC_ARG_WITH_LIBS as in the Trilinos package.
+dnl
+dnl Example use:
+dnl
+dnl PAC_ARG_WITH_LIBRSB
+dnl 
+dnl tests for --with-librsb and pre-pends to LIBRSB_PATH
+dnl
+dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
+dnl
+
+AC_DEFUN(PAC_ARG_WITH_LIBRSB,
+	 [SAVE_LIBS="$LIBS"
+	  SAVE_CPPFLAGS="$CPPFLAGS"
+
+	  AC_ARG_WITH(librsb,
+	  AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
+ 	  --with-librsb=/opt/packages/librsb]),
+	  [pac_cv_librsb_dir=$withval],
+	  [pac_cv_librsb_dir=''])
+	  
+	  if test "x$pac_cv_librsb_dir" != "x"; then 
+	  LIBS="-L$pac_cv_librsb_dir $LIBS"
+	  RSB_INCLUDES="-I$pac_cv_librsb_dir"
+	  # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
+	  RSB_LIBDIR="-L$pac_cv_librsb_dir"
+	  fi
+	  #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
+	  AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
+			  [pac_rsb_header_ok=yes],
+			  [pac_rsb_header_ok=no; RSB_INCLUDES=""])
+	  
+	  if test "x$pac_rsb_header_ok" == "xyes" ; then 
+	  RSB_LIBS="-lrsb $RSB_LIBDIR"
+	  # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
+	  # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
+	  # AC_TRY_LINK_FUNC(spgpuCreate, 
+	  # 		   [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
+	  # 		   [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
+	  # AC_MSG_RESULT($pac_gpu_lib_ok)
+	  # if test "x$pac_cv_have_spgpu" == "xyes" ; then 
+	  # AC_MSG_NOTICE([Have found SPGPU])
+	  RSBLIBNAME="librsb.a";
+	  LIBRSB_DIR="$pac_cv_librsb_dir";
+	  # SPGPU_DEFINES="-DHAVE_SPGPU";
+	  LIBRSB_INCDIR="$LIBRSB_DIR";
+	  LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
+	  LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
+	  # CUDA_DIR="$pac_cv_cuda_dir";
+	  LIBRSB_DEFINES="-DHAVE_RSB";
+	  LRSB=-lpsb_rsb
+	  # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
+	  # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
+	  FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
+	  CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
+	  fi
+#  fi
+LIBS="$SAVE_LIBS"
+CPPFLAGS="$SAVE_CPPFLAGS"
+])
+dnl
+
+
 dnl @synopsis PAC_CHECK_SPGPU
 dnl
 dnl Will try to find the spgpu library and headers.
--- a/configure.ac
+++ b/configure.ac
@ -204,7 +204,7 @@ PAC_ARG_WITH_FLAGS(module-path,MODULE_PATH)

 # we just gave the user the chance to append values to these variables

-###############################################################################
+

 dnl  Library oriented Autotools facilities (we don't care about this for now)

@ -844,6 +844,30 @@ if test "x$pac_cv_ipk_size" != "x4"; then
 fi


+###############################################################################
+PAC_ARG_WITH_LIBRSB
+LIBRSB_DIR="$pac_cv_librsb_dir";
+AC_MSG_CHECKING([for LIBRSB install dir])
+case $LIBRSB_DIR in 
+     /*) ;; 
+     *) dnl AC_MSG_ERROR([The LIBRSB installation dir must be an absolute pathname
+ dnl specified with --with-librsb=/path/to/librsb])
+esac
+dnl if test ! -d "$LIBRSB_DIR" ; then 
+dnl    AC_MSG_ERROR([Could not find LIBRSB build dir $LIBRSB_DIR!])	
+dnl fi
+pac_cv_status_file="$LIBRSB_DIR/librsb.a"
+if test ! -f "$pac_cv_status_file" ; then 
+  AC_MSG_RESULT([no])
+  #AC_MSG_ERROR([Could not find an installation  in $LIBRSB_DIR.])
+else
+  AC_MSG_RESULT([$LIBRSB_DIR])
+  RSBTARGETLIB=rsbd;
+  RSBTARGETOBJ=rsbobj;	
+fi
+
+
+


 ###############################################################################
@ -944,6 +968,14 @@ AC_SUBST(CUDEFINES)
 AC_SUBST(CUDAD)
 AC_SUBST(CUDALD)
 AC_SUBST(LCUDA)
+AC_SUBST(LIBRSB_LIBS)
+AC_SUBST(LIBRSB_INCLUDES)
+AC_SUBST(LIBRSB_INCDIR)
+AC_SUBST(LIBRSB_DIR)
+AC_SUBST(LIBRSB_DEFINES)
+AC_SUBST(LRSB)
+
+
 ###############################################################################
 # the following files will be created by Automake

--- a/docs/html/psblaslibraryext.png
+++ b/docs/html/psblaslibraryext.png
--- a/docs/psblas-3.9.pdf
+++ b/docs/psblas-3.9.pdf
--- a/docs/src/Makefile
+++ b/docs/src/Makefile
@ -86,7 +86,8 @@
 TOPFILE   = userguide.tex
 HTMLFILE  = userhtml.tex
 SECFILE   = intro.tex commrout.tex datastruct.tex psbrout.tex toolsrout.tex\
-	methods.tex precs.tex penv.tex error.tex util.tex biblio.tex
+	methods.tex precs.tex penv.tex error.tex util.tex biblio.tex \
+        ext-intro.tex cuda.tex
 FIGDIR    = figures

 XPDFFLAGS = 
@ -139,7 +140,7 @@ PDF      = $(join $(BASEFILE),.pdf)
 PS       = $(join $(BASEFILE),.ps)
 GXS      = $(join $(BASEFILE),.gxs)
 GLX      = $(join $(BASEFILE),.glx)
-TARGETPDF= ../psblas-3.8.pdf
+TARGETPDF= ../psblas-3.9.pdf
 BASEHTML = $(patsubst %.tex,%,$(HTMLFILE))
 HTML     = $(join $(BASEHTML),.html)
 HTMLDIR  = ../html
--- a/docs/src/biblio.tex
+++ b/docs/src/biblio.tex
@ -1,9 +1,5 @@

 \begin{thebibliography}{99}
-\bibitem{DesPat:11}
- D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson
-{\em Design Patterns for Scientific Computations on Sparse Matrices},
- HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011

 \bibitem{PARA04FOREST}
 G.~Bella, S.~Filippone, A.~De Maio and M.~Testa,
@ -154,6 +150,11 @@ Lawson, C.,  Hanson, R., Kincaid, D. and Krogh, F.,
 {\em Fortran 95/2003 explained.}
 {Oxford University Press}, 2004.
 %
+\bibitem{MRC:11}
+{Metcalf, M., Reid, J. and Cohen, M.}
+{\em Modern Fortran  explained.}
+{Oxford University Press}, 2011.
+%
 %% \bibitem{DD2}
 %% B.~Smith, P.~Bjorstad and W.~Gropp,
 %% {\em Domain Decomposition: Parallel Multilevel Methods for Elliptic
@ -169,4 +170,20 @@ M.~Snir, S.~Otto, S.~Huss-Lederman, D.~Walker and J.~Dongarra,
 {\em MPI: The Complete Reference. Volume 1 - The MPI Core}, second edition,
 MIT Press, 1998.
 %
+
+\bibitem{DesPat:11}
+ D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson
+{\em Design Patterns for Scientific Computations on Sparse Matrices},
+ HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011
+  
+\bibitem{CaFiRo:2014}
+{ Cardellini, V.}, { Filippone, S.}, { and} { Rouson, D.} 2014,
+ Design patterns for sparse-matrix computations on hybrid {CPU/GPU}
+  platforms,
+{\em Scientific Programming\/}~{\em 22,\/}~1, 1--19.
+\bibitem{OurTechRep}
+D.~Barbieri, V.~Cardellini, A.~Fanfarillo, S.~Filippone, Three storage formats
+  for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6, Universit\`a di
+  Roma Tor Vergata (February 2015).
+
 \end{thebibliography}
--- a/docs/src/cuda.tex
+++ b/docs/src/cuda.tex
@ -0,0 +1,244 @@
+
+\subsection{CUDA-class extensions}
+
+For computing  with CUDA we define a dual memorization strategy in
+which each variable on the CPU (``host'') side has a GPU (``device'')
+side. When a GPU-type variable is initialized, the data contained is
+(usually) the same on both sides. Each operator invoked on the
+variable may change the data so that only the host side or the device
+side are up-to-date. 
+
+Keeping track of the updates to data in the variables  is essential: we want
+to perform most  computations on the GPU, but we cannot afford the time
+needed to move data between the host  memory and the device memory
+because the bandwidth of the interconnection bus would become the main
+bottleneck of the computation. Thus, each and every computational
+routine in the library is built according to the following principles: 
+\begin{itemize}
+\item If the data type being handled is {GPU}-enabled, make sure that
+  its device copy is up to date, perform any arithmetic operation on
+  the {GPU}, and if the data has been altered as a result, mark
+  the main-memory copy as outdated.
+\item The main-memory copy is never updated unless this is requested
+  by the user either 
+\begin{description}
+\item[explicitly] by invoking a synchronization method;
+\item[implicitly] by invoking a method that involves other data items
+  that are not {GPU}-enabled, e.g., by assignment ov a vector to a
+  normal array. 
+\end{description}
+\end{itemize}
+In this way, data items are put on the {GPU} memory ``on demand'' and
+remain there as long as ``normal'' computations are carried out. 
+As an example, the following call to a matrix-vector product
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+    call psb_spmm(alpha,a,x,beta,y,desc_a,info)
+\end{minted}
+will transparently and automatically be performed on the {GPU} whenever
+all three data inputs \fortinline|a|, \fortinline|x|  and
+\fortinline|y| are {GPU}-enabled. If a program makes many such calls
+sequentially, then 
+\begin{itemize}
+\item The first kernel invocation will find the data in main memory,
+  and will copy it to the {GPU} memory, thus incurring a significant
+  overhead; the result is however \emph{not} copied back, and
+  therefore:
+\item Subsequent kernel invocations involving the same vector will
+  find the data on the {GPU} side so that they will run at full
+  speed.
+\end{itemize}
+For all invocations after the first the only data that will have to be
+transferred to/from the main memory will be the scalars \fortinline|alpha|
+and \fortinline|beta|, and the return code \fortinline|info|.  
+
+\begin{description}
+\item[Vectors:] The data type \fortinline|psb_T_vect_gpu| provides a
+  GPU-enabled extension of the inner type \fortinline|psb_T_base_vect_type|,
+  and must be used together with the other inner matrix type to make
+  full use of the GPU computational capabilities;
+\item[CSR:] The data type \fortinline|psb_T_csrg_sparse_mat| provides an
+  interface to the GPU version of CSR available in the NVIDIA CuSPARSE
+  library;
+\item[HYB:] The data type \fortinline|psb_T_hybg_sparse_mat| provides an
+  interface to the HYB GPU storage  available in the NVIDIA CuSPARSE
+  library. The internal structure is opaque, hence the host side is
+  just CSR; the HYB data format is only available up to CUDA version
+  10. 
+\item[ELL:] The data type \fortinline|psb_T_elg_sparse_mat| provides an
+  interface to the  ELLPACK implementation from SPGPU;
+
+\item[HLL:] The data type \fortinline|psb_T_hlg_sparse_mat| provides an
+  interface to the  Hacked ELLPACK implementation from SPGPU;
+\item[HDIA:] The data type \fortinline|psb_T_hdiag_sparse_mat| provides an
+  interface to the  Hacked DIAgonals implementation from SPGPU;
+\end{description}
+
+
+\section{CUDA Environment Routines}
+\label{sec:cudaenv}
+
+\subsection*{psb\_cuda\_init --- Initializes PSBLAS-CUDA
+  environment}
+\addcontentsline{toc}{subsection}{psb\_cuda\_init}
+
+\begin{minted}[breaklines=true]{fortran}
+call psb_cuda_init(ctxt [, device])
+\end{minted}
+
+This subroutine initializes the PSBLAS-CUDA  environment. 
+\begin{description}
+\item[Type:] Synchronous.
+\item[\bf  On Entry ]
+\item[device] ID of CUDA device to attach to.\\
+Scope: {\bf local}.\\
+Type: {\bf optional}.\\
+Intent: {\bf in}.\\
+Specified as: an integer value. \
+Default: use \fortinline|mod(iam,ngpu)| where \fortinline|iam| is the calling
+process index and \fortinline|ngpu| is the total number of CUDA devices
+available on the current node. 
+\end{description}
+
+
+{\par\noindent\large\bfseries Notes}
+\begin{enumerate}
+\item A call to this routine must precede any other PSBLAS-CUDA call. 
+\end{enumerate}
+
+\subsection*{psb\_cuda\_exit --- Exit from  PSBLAS-CUDA
+  environment}
+\addcontentsline{toc}{subsection}{psb\_cuda\_exit}
+
+\begin{minted}[breaklines=true]{fortran}
+call psb_cuda_exit(ctxt)
+\end{minted}
+
+This subroutine exits from the  PSBLAS CUDA context.
+\begin{description}
+\item[Type:] Synchronous.
+\item[\bf  On Entry ]
+\item[ctxt] the communication context identifying the virtual
+  parallel machine.\\
+Scope: {\bf global}.\\
+Type: {\bf required}.\\
+Intent: {\bf in}.\\
+Specified as: an integer variable.
+\end{description}
+
+
+
+
+\subsection*{psb\_cuda\_DeviceSync ---  Synchronize CUDA device}
+\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceSync}
+
+\begin{minted}[breaklines=true]{fortran}
+call psb_cuda_DeviceSync()
+\end{minted}
+
+This subroutine ensures that all previosly invoked kernels, i.e. all
+invocation of CUDA-side code, have completed.
+
+
+\subsection*{psb\_cuda\_getDeviceCount }
+\addcontentsline{toc}{subsection}{psb\_cuda\_getDeviceCount}
+
+\begin{minted}[breaklines=true]{fortran}
+ngpus =  psb_cuda_getDeviceCount()
+\end{minted}
+
+Get number of devices available on current computing node. 
+
+\subsection*{psb\_cuda\_getDevice }
+\addcontentsline{toc}{subsection}{psb\_cuda\_getDevice}
+
+\begin{minted}[breaklines=true]{fortran}
+ngpus =  psb_cuda_getDevice()
+\end{minted}
+
+Get  device in use by current process. 
+
+\subsection*{psb\_cuda\_setDevice }
+\addcontentsline{toc}{subsection}{psb\_cuda\_setDevice}
+
+\begin{minted}[breaklines=true]{fortran}
+info = psb_cuda_setDevice(dev)
+\end{minted}
+
+Set  device to be used  by current process. 
+
+\subsection*{psb\_cuda\_DeviceHasUVA }
+\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceHasUVA}
+
+\begin{minted}[breaklines=true]{fortran}
+hasUva = psb_cuda_DeviceHasUVA()
+\end{minted}
+
+Returns true if device currently in use supports UVA (Unified Virtual Addressing).
+
+\subsection*{psb\_cuda\_WarpSize }
+\addcontentsline{toc}{subsection}{psb\_cuda\_WarpSize}
+
+\begin{minted}[breaklines=true]{fortran}
+nw = psb_cuda_WarpSize()
+\end{minted}
+
+Returns the warp size.
+
+
+\subsection*{psb\_cuda\_MultiProcessors }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MultiProcessors}
+
+\begin{minted}[breaklines=true]{fortran}
+nmp = psb_cuda_MultiProcessors()
+\end{minted}
+
+Returns the number of multiprocessors in the CUDA device.
+
+\subsection*{psb\_cuda\_MaxThreadsPerMP }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MaxThreadsPerMP}
+
+\begin{minted}[breaklines=true]{fortran}
+nt = psb_cuda_MaxThreadsPerMP()
+\end{minted}
+
+Returns the maximum number of threads per multiprocessor. 
+
+
+\subsection*{psb\_cuda\_MaxRegistersPerBlock }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MaxRegisterPerBlock}
+
+\begin{minted}[breaklines=true]{fortran}
+nr = psb_cuda_MaxRegistersPerBlock()
+\end{minted}
+
+Returns the maximum number of register per thread block. 
+
+
+\subsection*{psb\_cuda\_MemoryClockRate }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryClockRate}
+
+\begin{minted}[breaklines=true]{fortran}
+cl = psb_cuda_MemoryClockRate()
+\end{minted}
+
+Returns the memory clock rate in KHz, as an integer. 
+
+\subsection*{psb\_cuda\_MemoryBusWidth }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryBusWidth}
+
+\begin{minted}[breaklines=true]{fortran}
+nb = psb_cuda_MemoryBusWidth()
+\end{minted}
+
+Returns the memory bus width in bits.
+
+\subsection*{psb\_cuda\_MemoryPeakBandwidth }
+\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryPeakBandwidth}
+
+\begin{minted}[breaklines=true]{fortran}
+bw = psb_cuda_MemoryPeakBandwidth()
+\end{minted}
+Returns the peak memory bandwidth in MB/s (real double precision).
+
+
+
--- a/docs/src/ext-intro.tex
+++ b/docs/src/ext-intro.tex
@ -0,0 +1,412 @@
+\section{Extensions}\label{sec:ext-intro}
+
+The EXT, CUDA and RSB  subdirectories contains a set of extensions to the base
+library. The extensions provide additional storage formats beyond the
+ones already contained in the base library, as well as interfaces
+to:
+\begin{description}
+\item[SPGPU] a CUDA library originally published as
+  \url{https://code.google.com/p/spgpu/} and now included in the
+  \verb|cuda| subdir, for computations on   NVIDIA GPUs;
+\item[LIBRSB] \url{http://sourceforge.net/projects/librsb/}, for
+  computations on multicore parallel machines. 
+\end{description}
+The infrastructure laid out in the base library to allow for these
+extensions is detailed in the references~\cite{DesPat:11,CaFiRo:2014,Sparse03};
+the CUDA-specific data formats are described in~\cite{OurTechRep}. 
+
+
+\subsection{Using the extensions}
+\label{sec:ext-appstruct}
+A sample application using the PSBLAS extensions will contain the
+following steps:
+\begin{itemize}
+\item \verb|USE| the appropriat modules (\verb|psb_ext_mod|,
+  \verb|psb_cuda_mod|);
+\item Declare a \emph{mold} variable of the necessary type
+  (e.g. \verb|psb_d_ell_sparse_mat|, \verb|psb_d_hlg_sparse_mat|,
+  \verb|psb_d_vect_cuda|);
+\item Pass the mold variable to the base library interface where
+  needed to ensure the appropriate dynamic type.
+\end{itemize}
+Suppose you want to use the CUDA-enabled ELLPACK data structure; you
+would use a piece of code like this (and don't forget, you need
+CUDA-side vectors along with the matrices):
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+program my_cuda_test
+  use psb_base_mod
+  use psb_util_mod 
+  use psb_ext_mod
+  use psb_cuda_mod
+  type(psb_dspmat_type) :: a, agpu
+  type(psb_d_vect_type) :: x, xg, bg
+
+  real(psb_dpk_), allocatable :: xtmp(:)
+  type(psb_d_vect_cuda)       :: vmold
+  type(psb_d_elg_sparse_mat) :: aelg
+  type(psb_ctxt_type) :: ctxt
+  integer             :: iam, np
+
+
+  call psb_init(ctxt)
+  call psb_info(ctxt,iam,np)
+  call psb_cuda_init(ctxt, iam)
+
+  
+  ! My own home-grown matrix generator
+  call gen_matrix(ctxt,idim,desc_a,a,x,info)
+  if (info /= 0) goto 9999
+
+  call a%cscnv(agpu,info,mold=aelg)
+  if (info /= 0) goto 9999
+  xtmp = x%get_vect() 
+  call xg%bld(xtmp,mold=vmold)
+  call bg%bld(size(xtmp),mold=vmold)
+  
+  ! Do sparse MV
+  call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info)
+
+ 
+9999 continue
+  if (info == 0) then 
+     write(*,*) '42'
+  else
+     write(*,*) 'Something went wrong ',info
+  end if
+  
+  
+  call psb_cuda_exit()
+  call psb_exit(ctxt)
+  stop
+end program my_cuda_test
+\end{minted}
+A full example of this strategy can be seen in the
+\texttt{test/ext/kernel} and \texttt{test/\-cuda/\-kernel} subdirectories,
+where we provide  sample programs 
+to test the speed of the sparse matrix-vector product with the various
+data structures included in the library. 
+
+
+\subsection{Extensions' Data Structures}
+\label{sec:ext-datastruct}
+%\ifthenelse{\boolean{mtc}}{\minitoc}{}
+
+Access to the facilities provided by the EXT library is mainly
+achieved through the data types that are provided within. 
+The data classes are derived from the base  classes in PSBLAS, through 
+the Fortran~2003 mechanism of \emph{type extension}~\cite{MRC:11}.  
+
+The data classes are divided between the general purpose CPU
+extensions, the GPU interfaces and the RSB interfaces.
+In the description we will make use of the notation introduced in
+Table~\ref{tab:notation}. 
+
+\begin{table}[ht]
+\caption{Notation for parameters describing a sparse matrix}
+\begin{center}
+{\footnotesize
+\begin{tabular}{ll}
+\hline
+Name & Description \\
+\hline
+M		& Number of rows in matrix		 \\
+N		& Number of columns in matrix		 \\
+NZ              & Number of nonzeros in matrix   \\
+AVGNZR          & Average number of nonzeros per row  \\
+MAXNZR          & Maximum number of nonzeros per row  \\
+NDIAG           & Numero of nonzero diagonals\\
+AS	        & Coefficients 	array		 \\
+IA	        & Row indices array			 \\
+JA	        & Column  indices array			 \\
+IRP	        & Row start pointers array			 \\
+JCP	        & Column start pointers array			 \\
+NZR 	        & Number of nonzeros per row array \\
+OFFSET          & Offset for diagonals			 \\
+\hline
+\end{tabular}
+}
+\end{center}
+\label{tab:notation}
+\end{table}
+
+\begin{figure}[ht]
+	\centering
+%		\includegraphics[width=5.2cm]{figures/mat.eps}
+		\includegraphics[width=5.2cm]{figures/mat.pdf}
+	\caption{Example of sparse matrix}
+	\label{fig:dense}
+\end{figure} 
+
+\subsection{CPU-class extensions}
+
+
+\subsubsection*{ELLPACK}
+
+The ELLPACK/ITPACK format (shown in Figure~\ref{fig:ell}) 
+comprises  two 2-dimensional arrays \verb|AS| and
+\verb|JA|  with \verb|M| rows and \verb|MAXNZR| columns, where
+\verb|MAXNZR| is the maximum
+number of nonzeros in any row~\cite{ELLPACK}. 
+Each row of the arrays \verb|AS| and \verb|JA| contains the
+coefficients and column indices; rows shorter than
+\verb|MAXNZR| are padded with zero coefficients and appropriate column
+indices, e.g. the last valid one found in the same row.
+
+\begin{figure}[ht]
+	\centering
+%		\includegraphics[width=8.2cm]{figures/ell.eps}
+		\includegraphics[width=8.2cm]{figures/ell.pdf}
+	\caption{ELLPACK compression of matrix in Figure~\ref{fig:dense}}
+	\label{fig:ell}
+\end{figure} 
+
+
+\begin{algorithm}
+\lstset{language=Fortran}
+\small
+  \begin{lstlisting}
+    do i=1,n
+      t=0
+      do j=1,maxnzr
+        t = t +  as(i,j)*x(ja(i,j))
+      end do
+      y(i) = t
+    end do
+  \end{lstlisting}
+  \caption{\label{alg:ell} Matrix-Vector product in ELL format}
+\end{algorithm}
+The matrix-vector product $y=Ax$ can be computed with the code shown in
+Alg.~\ref{alg:ell}; it costs  one  memory write per outer iteration, 
+plus three memory reads  and two floating-point operations per inner
+iteration.   
+
+Unless all rows have exactly the same number of nonzeros, some of the
+coefficients in the \verb|AS| array will be zeros; therefore this
+data structure will have  an overhead both in terms of memory space
+and redundant operations (multiplications by zero).  The overhead can
+be acceptable if: 
+\begin{enumerate}
+\item The maximum number of nonzeros per row is not much larger than
+  the    average;
+\item The regularity of the data structure allows for faster  code,
+  e.g. by allowing vectorization, thereby offsetting the additional
+  storage requirements.  
+\end{enumerate}
+In the extreme case where the input matrix has one full row, the
+ELLPACK structure would require more memory than the normal 2D array
+storage. The ELLPACK storage format was very popular in the vector
+computing days; in modern CPUs it is not quite as popular, but it
+is  the basis for many GPU formats. 
+
+The relevant data type is \verb|psb_T_ell_sparse_mat|:
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+  type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat
+    !
+    ! ITPACK/ELL format, extended.
+    !     
+    
+    integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:)
+    real(psb_dpk_), allocatable :: val(:,:)
+
+  contains
+    ....
+  end type psb_d_ell_sparse_mat
+\end{minted}
+
+
+\subsubsection*{Hacked ELLPACK}
+
+The \textit{hacked ELLPACK} (\textbf{HLL}) format 
+alleviates the main problem of the ELLPACK format, that is, 
+the  amount of  memory required by  padding for  sparse matrices in
+which the maximum row length is  larger than the average.
+
+The number of  elements  allocated to padding is $[(m*maxNR) -
+(m*avgNR) = m*(maxNR-avgNR)]$ 
+for both \verb|AS|  and \verb|JA| arrays,
+where $m$ is equal to the number of rows of the matrix, $maxNR$ is the
+maximum number of nonzero elements 
+in every row and $avgNR$ is the average number of nonzeros. 
+Therefore a single densely populated row can seriously affect the
+total size of the allocation. 
+
+To limit this effect, in the HLL format  we break the original matrix
+into equally sized groups of rows (called \textit{hacks}), and then store
+these groups as independent matrices in ELLPACK format. 
+The groups can be arranged selecting rows in an arbitrarily manner;
+indeed, if the rows are sorted by decreasing number of nonzeros we
+obtain essentially the JAgged Diagonals format. 
+If the rows are not in the original order, then an   additional vector
+\textit{rIdx} is required, storing the actual row index  for each row
+in the data structure.
+
+The multiple ELLPACK-like buffers are stacked together inside a
+single, one dimensional array; 
+an additional  vector \textit{hackOffsets} is provided to keep track
+of the individual submatrices.
+All hacks have the same number of rows  \textit{hackSize}; hence, 
+the \textit{hackOffsets} vector is  an array of
+$(m/hackSize)+1$ elements, each one pointing  to the first index of a
+submatrix inside the stacked \textit{cM}/\textit{rP} buffers, plus an
+additional element pointing past the end of the last block, where the
+next one would begin. 
+We thus have the property that  
+the elements of the $k$-th \textit{hack} are stored between \verb|hackOffsets[k]| and
+\verb|hackOffsets[k+1]|, similarly to what happens in the CSR format. 
+
+\begin{figure}[ht]
+	\centering
+%		\includegraphics[width=8.2cm]{../figures/hll.eps}
+	 \includegraphics[width=.72\textwidth]{../figures/hll.pdf}
+	\caption{Hacked ELLPACK compression of matrix in Figure~\ref{fig:dense}}
+	\label{fig:hll}
+\end{figure} 
+
+With this data structure a very long row only affects one hack, and
+therefore the additional memory is limited to the hack in which the
+row appears.
+
+The relevant data type is \verb|psb_T_hll_sparse_mat|:
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+  type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat
+    !
+    ! HLL format. (Hacked ELL) 
+    !     
+    integer(psb_ipk_) :: hksz
+    integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:)
+    real(psb_dpk_), allocatable :: val(:)
+
+  contains
+   ....
+  end type
+\end{minted}
+
+\subsubsection*{Diagonal storage}
+
+
+The DIAgonal (DIA) format (shown in Figure~\ref{fig:dia}) 
+has   a 2-dimensional array \verb|AS| containing in each column the
+coefficients along a  diagonal of the matrix, and an integer array
+\verb|OFFSET|  that determines  where each diagonal starts. The
+diagonals in \verb|AS| are padded with zeros as necessary. 
+
+The code to compute the matrix-vector product $y=Ax$ is shown in Alg.~\ref{alg:dia};
+it costs one  memory read per outer iteration, 
+plus three memory reads, one memory write  and two floating-point
+operations per inner iteration. The accesses to  \verb|AS| and
+\verb|x| are in strict sequential order,  therefore no indirect
+addressing is required.  
+
+\begin{figure}[ht]
+	\centering
+%		\includegraphics[width=8.2cm]{figures/dia.eps}
+		\includegraphics[width=.72\textwidth]{figures/dia.pdf}
+	\caption{DIA compression of matrix in Figure~\ref{fig:dense}}
+	\label{fig:dia}
+\end{figure} 
+
+
+\begin{algorithm}
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+    do j=1,ndiag
+      if (offset(j) > 0) then 
+        ir1 = 1; ir2 = m - offset(j);
+      else
+        ir1 = 1 - offset(j); ir2 = m;
+      end if
+      do i=ir1,ir2
+        y(i) = y(i) + alpha*as(i,j)*x(i+offset(j))
+      end do
+    end do
+\end{minted}
+  \caption{\label{alg:dia} Matrix-Vector product in DIA format}
+\end{algorithm}
+
+
+The relevant data type is \verb|psb_T_dia_sparse_mat|:
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+  type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat
+    !
+    ! DIA format, extended.
+    !     
+    
+    integer(psb_ipk_), allocatable :: offset(:)
+    integer(psb_ipk_) :: nzeros
+    real(psb_dpk_), allocatable :: data(:,:)
+
+  end type
+\end{minted}
+
+
+
+\subsubsection*{Hacked DIA}
+
+Storage by DIAgonals is an attractive option for matrices whose
+coefficients are located on a small set of diagonals, since they do
+away with storing explicitly the indices and therefore reduce
+significantly memory traffic. However, having a few coefficients
+outside of the main set of diagonals may  significantly increase the
+amount of needed padding; moreover, while the DIA code is easily
+vectorized, it does not necessarily make optimal use of the memory
+hierarchy. While processing each diagonal we are updating entries in
+the output vector \verb|y|, which is then accessed multiple times; if 
+the vector \verb|y| is too large to remain in the cache memory, the
+associated cache miss penalty is paid multiple times. 
+
+The \textit{hacked DIA} (\textbf{HDIA}) format was designed to contain
+the amount of padding, by  breaking  the original matrix
+into equally sized groups of rows (\textit{hacks}), and then storing
+these groups as independent matrices in DIA format. This approach is
+similar to that of HLL, and requires using an offset vector for each
+submatrix. Again, similarly to HLL, the various submatrices are
+stacked inside a linear array to improve memory management. The fact
+that the matrix is accessed in slices helps in reducing cache misses,
+especially regarding accesses to the %output 
+vector \verb|y|.  
+
+
+An additional vector \textit{hackOffsets} is provided to complete
+the matrix format; given  that \textit{hackSize} is the number of rows of each hack,
+the \textit{hackOffsets} vector is made by an array of
+$(m/hackSize)+1$ elements,  pointing to the first diagonal offset of a
+submatrix inside the stacked \textit{offsets} buffers, plus an
+additional element equal to the number of nonzero diagonals in the whole matrix. 
+We thus have the property that  
+the number of diagonals of the $k$-th \textit{hack} is given by
+\textit{hackOffsets[k+1] - hackOffsets[k]}.  
+
+\begin{figure}[ht]
+	\centering
+%		\includegraphics[width=8.2cm]{../figures/hdia.eps}
+		\includegraphics[width=.72\textwidth]{../figures/hdia.pdf}
+	\caption{Hacked DIA compression of matrix in Figure~\ref{fig:dense}}
+	\label{fig:hdia}
+\end{figure} 
+
+The relevant data type is \verb|psb_T_hdia_sparse_mat|:
+\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran}
+  type pm
+     real(psb_dpk_), allocatable  :: data(:,:)
+  end type pm
+
+  type po
+     integer(psb_ipk_), allocatable  :: off(:)
+  end type po
+
+  type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat
+    !
+    ! HDIA format, extended.
+    !
+    
+    type(pm), allocatable :: hdia(:)
+    type(po), allocatable :: offset(:)
+    integer(psb_ipk_) :: nblocks, nzeros
+    integer(psb_ipk_) :: hack = 64
+    integer(psb_long_int_k_) :: dim=0
+
+  contains
+   ....
+  end type
+\end{minted}
+
+
--- a/docs/src/figures/dia.pdf
+++ b/docs/src/figures/dia.pdf
--- a/docs/src/figures/ell.pdf
+++ b/docs/src/figures/ell.pdf
--- a/docs/src/figures/hdia.pdf
+++ b/docs/src/figures/hdia.pdf
--- a/docs/src/figures/hll.pdf
+++ b/docs/src/figures/hll.pdf
--- a/docs/src/figures/mat.pdf
+++ b/docs/src/figures/mat.pdf
--- a/docs/src/figures/psblaslibraryext.png
+++ b/docs/src/figures/psblaslibraryext.png
--- a/docs/src/userguide.pdf
+++ b/docs/src/userguide.pdf
@ -0,0 +1 @@
+tmp/userguide.pdf
--- a/docs/src/userguide.tex
+++ b/docs/src/userguide.tex
@ -17,6 +17,7 @@
 \newtheorem{theorem}{Theorem}
 \newtheorem{corollary}{Corollary}
 \usepackage{listings}
+\usepackage{algorithm2e}
 \usepackage{minted}
 \usemintedstyle{friendly}
 \definecolor{bg}{rgb}{0.95,0.95,0.95}
@ -36,7 +37,7 @@
 \relax
 \pdfcompresslevel=0             %-- 0 = none, 9 = best
 \pdfinfo{                       %-- Info dictionary of PDF output  /Author (Alfredo Buttari)
-  /Title (Parallel Sparse BLAS V. 3.8.0)
+  /Title (Parallel Sparse BLAS V. 3.9.0)
  /Subject (Parallel Sparse Basic Linear Algebra Subroutines)
  /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners)
  /Creator (pdfLaTeX)
@ -99,7 +100,7 @@

 \begin{document}
 {
-\pdfbookmark{PSBLAS-v3.8.0 User's Guide}{title}
+\pdfbookmark{PSBLAS-v3.9.0 User's Guide}{title}
 \lstset{language=Fortran}
 \newlength{\centeroffset}
 \setlength{\centeroffset}{-0.5\oddsidemargin}
@ -109,7 +110,7 @@
 \vspace*{\stretch{1}}
 \noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth}
 \flushright
-{\Huge\bfseries PSBLAS 3.8.0 User's guide
+{\Huge\bfseries PSBLAS 3.9.0 User's guide
 }
 \noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex]
 \hfill\emph{\Large A reference guide for the Parallel Sparse BLAS library}
@ -130,7 +131,7 @@
 {\bfseries 
 by Salvatore Filippone\\
 and Alfredo Buttari}\\ 
-May 1st, 2022
+Aug 1st, 2024
 \end{minipage}}
 }
 %\addtolength{\textwidth}{\centeroffset}
@ -159,7 +160,8 @@ May 1st, 2022
 \include{util}
 \include{precs}
 \include{methods}
-
+\include{ext-intro}
+\include{cuda}
 \cleardoublepage
 \input{biblio}

--- a/docs/src/userhtml.tex
+++ b/docs/src/userhtml.tex
@ -17,6 +17,9 @@
 \newtheorem{theorem}{Theorem}
 \newtheorem{corollary}{Corollary}
 \usepackage{listings}
+\usepackage{algorithm2e}
+\usepackage{minted}
+\usemintedstyle{friendly}
 \usepackage{microtype}
 \ifpdf
 \newmintinline[fortinline]{fortran}{}
@ -94,9 +97,9 @@
   Alfredo Buttari } \\
 %\\[10ex]
 %\today
-Software version: 3.8.0\\
+Software version: 3.9.0\\
 %\today
-May 1st, 2022
+Aug 1st, 2024
 \cleardoublepage
 \begingroup
  \renewcommand*{\thepage}{toc}
@ -120,7 +123,8 @@ May 1st, 2022
 \include{util}
 \include{precs}
 \include{methods}
-
+\include{ext-intro}
+\include{cuda}
 \cleardoublepage

 \input{biblio}
--- a/rsb/Makefile
+++ b/rsb/Makefile
@ -0,0 +1,53 @@
+include ../Make.inc
+#
+# Libraries used
+#
+PSBLIBDIR=$(PSBLASDIR)/lib/
+PSBINCDIR=$(PSBLASDIR)/include
+PSBMODDIR=$(PSBLASDIR)/modules
+LIBDIR=../lib
+INCDIR=../include
+MODDIR=../modules
+PSBLAS_LIB= -L$(PSBLIBDIR)  -lpsb_util  -lpsb_base
+#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base 
+LDLIBS=$(PSBLDLIBS)
+#
+# Compilers and such
+#
+#CCOPT= -g
+FINCLUDES=$(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). $(LIBRSB_INCLUDES) $(LIBRSB_DEFINES)
+CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR)
+LIBNAME=libpsb_rsb.a
+
+
+FOBJS=  rsb_mod.o psb_d_rsb_mat_mod.o \
+	psb_rsb_penv_mod.o psb_rsb_mod.o
+
+COBJS= rsb_int.o
+
+OBJS=$(COBJS) $(FOBJS)
+
+lib: objs ilib 
+	/bin/cp -p $(LIBNAME) $(LIBDIR)
+
+objs: $(OBJS) iobjs
+	/bin/cp -p *$(.mod) $(MODDIR)
+
+iobjs:
+	$(MAKE) -C impl objs
+ilib: iobjs
+	$(MAKE) -C impl lib LIBNAME=$(LIBNAME)
+
+clean: cclean iclean
+	/bin/rm -f  $(FOBJS) *$(.mod) *.a
+
+cclean: 
+	/bin/rm -f  $(COBJS) 
+iclean:
+	cd impl && $(MAKE) clean
+
+verycleanlib: 
+	(cd ../..; make veryclean)
+
+
+
--- a/rsb/impl/Makefile
+++ b/rsb/impl/Makefile
@ -0,0 +1,30 @@
+include ../../Make.inc
+PSBLIBDIR=$(PSBLASDIR)/lib/
+PSBINCDIR=$(PSBLASDIR)/include
+PSBMODDIR=$(PSBLASDIR)/modules
+LIBDIR=../../lib
+INCDIR=../../include
+MODDIR=../../modules
+PSBLAS_LIB= -L$(PSBLIBDIR)  -lpsb_util  -lpsb_base
+#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base 
+LDLIBS=$(PSBLDLIBS)
+#
+# Compilers and such
+#
+#CCOPT= -g
+FINCLUDES=$(FMFLAG).. $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_INCLUDES) $(FIFLAG).. $(LIBRSB_DEFINES)
+CINCLUDES=
+LIBNAME=libpsb_rsb.a
+
+OBJS= \
+psb_d_cp_rsb_from_coo.o \
+psb_d_mv_rsb_from_coo.o \
+psb_d_cp_rsb_to_coo.o psb_d_rsb_csmv.o
+
+objs: $(OBJS)
+
+lib: objs
+	ar cur ../$(LIBNAME) $(OBJS)
+
+clean:
+	/bin/rm -f $(OBJS)
--- a/rsb/impl/psb_d_cp_rsb_from_coo.F90
+++ b/rsb/impl/psb_d_cp_rsb_from_coo.F90
@ -0,0 +1,78 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+subroutine psb_d_cp_rsb_from_coo(a,b,info) 
+  
+  use psb_base_mod
+  use rsb_mod
+  use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_from_coo
+  implicit none 
+
+  class(psb_d_rsb_sparse_mat), intent(inout) :: a
+  class(psb_d_coo_sparse_mat), intent(in)    :: b
+  integer(psb_ipk_), intent(out)             :: info
+
+  !locals
+  type(psb_d_coo_sparse_mat) :: tmp
+  Integer(Psb_ipk_)            :: nza, nr, i,j,irw, idl,err_act, nc
+  integer(psb_ipk_)            :: nzm, ir, ic, k ,bs
+  integer(psb_ipk_)            :: debug_level, debug_unit
+  character(len=20)            :: name
+
+  info = psb_success_
+#ifdef HAVE_RSB
+  ! This is to have fix_coo called behind the scenes
+  call b%cp_to_coo(tmp,info)
+
+  call tmp%fix(info)
+  if (info /= psb_success_) return
+
+  nr  = tmp%get_nrows()
+  nc  = tmp%get_ncols()
+  nza = tmp%get_nzeros()
+  ! If it is sorted then we can lessen memory impact 
+  a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat
+
+  bs = 1!RSB_DEFAULT_BLOCKING
+
+  info = Rsb_from_coo(a%rsbMat,b%val,b%ia,b%ja,nza,nr,nc,bs,bs)
+
+  call tmp%free()
+#endif
+
+  return
+
+9999 continue
+  info = psb_err_alloc_dealloc_
+  return
+
+end subroutine psb_d_cp_rsb_from_coo
--- a/rsb/impl/psb_d_cp_rsb_to_coo.f90
+++ b/rsb/impl/psb_d_cp_rsb_to_coo.f90
@ -0,0 +1,77 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+subroutine psb_d_cp_rsb_to_coo(a,b,info) 
+  
+  use psb_base_mod
+  use rsb
+  use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_to_coo
+  implicit none 
+
+  class(psb_d_rsb_sparse_mat), intent(in)    :: a
+  class(psb_d_coo_sparse_mat), intent(inout) :: b
+  integer(psb_ipk_), intent(out)             :: info
+  real(psb_dpk_), pointer :: val_point(:)
+  type(c_ptr) :: t_p,s_p
+
+  !locals
+  integer(psb_ipk_)   :: i, j, k,nr,nza,nc
+
+  info = psb_success_
+
+  nr  = a%get_nrows()
+  nc  = a%get_ncols()
+  nza = a%get_nzeros()
+
+  call b%allocate(nr,nc,nza)
+  b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
+
+  allocate(val_point(nza))
+
+  t_p = c_loc(val_point(1))
+
+  info = rsb_mtx_get_coo(a%rsbMat, t_p, b%ia, b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE)
+
+  !info = rsb_mtx_switch_to_coo(a%rsbMat,t_p,b%ia,b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE)
+
+  k = rsb_perror(s_p,info)
+
+  do i=1,nza
+     b%val(i)=val_point(i)
+  enddo
+
+  deallocate(val_point)
+
+  call b%set_nzeros(nza)
+  call b%fix(info)
+
+end subroutine psb_d_cp_rsb_to_coo
--- a/rsb/impl/psb_d_mv_rsb_from_coo.f90
+++ b/rsb/impl/psb_d_mv_rsb_from_coo.f90
@ -0,0 +1,114 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+subroutine psb_d_mv_rsb_from_coo(a,b,info) 
+  
+  use psb_base_mod
+  use psb_d_rsb_mat_mod, psb_protect_name => psb_d_mv_rsb_from_coo
+  implicit none 
+
+  class(psb_d_rsb_sparse_mat), intent(inout) :: a
+  class(psb_d_coo_sparse_mat), intent(inout) :: b
+  integer(psb_ipk_), intent(out)             :: info
+
+  !locals
+  Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic
+
+  info = psb_success_
+
+  call b%fix(info)
+  if (info /= psb_success_) return
+
+  nr  = b%get_nrows()
+  nc  = b%get_ncols()
+  nza = b%get_nzeros()
+  ! if (b%is_sorted()) then 
+  !   ! If it is sorted then we can lessen memory impact 
+  !   a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
+
+  !   ! First compute the number of nonzeros in each row.
+  !   call psb_realloc(nr,a%irn,info) 
+  !   if (info /= 0) goto 9999
+  !   a%irn = 0
+  !   do i=1, nza
+  !     a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1
+  !   end do
+  !   nzm = 0 
+  !   do i=1, nr
+  !     nzm = max(nzm,a%irn(i))
+  !     a%irn(i) = 0
+  !   end do
+  !   ! Second: copy the column indices.
+  !   call psb_realloc(nr,a%idiag,info) 
+  !   if (info == 0) call psb_realloc(nr,nzm,a%ja,info) 
+  !   if (info /= 0) goto 9999
+  !   do i=1, nza
+  !     ir = b%ia(i)
+  !     ic = b%ja(i)
+  !     j  = a%irn(ir) + 1 
+  !     a%ja(ir,j) = ic
+  !     a%irn(ir)  = j
+  !   end do
+  !   ! Third copy the other stuff
+  !   deallocate(b%ia,b%ja,stat=info) 
+  !   if (info == 0) call psb_realloc(nr,a%idiag,info)
+  !   if (info == 0) call psb_realloc(nr,nzm,a%val,info)
+  !   if (info /= 0) goto 9999
+  !   k = 0 
+  !   do i=1, nr
+  !     a%idiag(i) = 0 
+  !     do j=1, a%irn(i)
+  !       k = k + 1 
+  !       a%val(i,j) = b%val(k)
+  !       if (i==a%ja(i,j)) a%idiag(i)=j
+  !     end do
+  !     do j=a%irn(i)+1, nzm
+  !       a%ja(i,j) = i
+  !       a%val(i,j) = dzero
+  !     end do
+  !   end do
+
+  ! else 
+    ! If b is not sorted, the only way is to copy. 
+    call a%cp_from_coo(b,info)
+    if (info /= 0) goto 9999
+  ! end if
+
+  call b%free()
+
+  return
+
+9999 continue
+  info = psb_err_alloc_dealloc_
+  return
+
+end subroutine psb_d_mv_rsb_from_coo
--- a/rsb/impl/psb_d_rsb_csmv.F90
+++ b/rsb/impl/psb_d_rsb_csmv.F90
@ -0,0 +1,138 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans) 
+  
+  use psb_base_mod
+  use rsb_mod
+  use psb_d_rsb_mat_mod, psb_protect_name => psb_d_rsb_csmv
+  implicit none 
+  class(psb_d_rsb_sparse_mat), intent(in) :: a
+  real(psb_dpk_), intent(in)          :: alpha, beta, x(:)
+  real(psb_dpk_), intent(inout)       :: y(:)
+  integer, intent(out)                :: info
+  character, optional, intent(in)     :: trans
+
+  character :: trans_
+  integer   :: i,j,k,m,n, nnz, ir, jc
+  real(psb_dpk_)  :: acc
+  type(c_ptr)    :: gpX, gpY
+  logical        :: tra
+  Integer        :: err_act
+  character(len=20)  :: name='d_rsb_csmv'
+  logical, parameter :: debug=.false.
+
+  call psb_erractionsave(err_act)
+  info = psb_success_
+#ifdef HAVE_RSB
+  if (present(trans)) then
+    trans_ = trans
+  else
+    trans_ = 'N'
+  end if
+
+  if (.not.a%is_asb()) then 
+    info = psb_err_invalid_mat_state_
+    call psb_errpush(info,name)
+    goto 9999
+  endif
+
+
+  tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
+
+  if (tra) then 
+    m = a%get_ncols()
+    n = a%get_nrows()
+  else
+    n = a%get_ncols()
+    m = a%get_nrows()
+  end if
+
+  if (size(x,1)<n) then 
+    info = 36
+    call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
+    goto 9999
+  end if
+
+  if (size(y,1)<m) then 
+    info = 36
+    call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
+    goto 9999
+  end if
+
+  ! if (tra) then 
+  !   call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) 
+  ! else
+
+  info = Rsb_spmv(a%rsbMat,x,alpha,y,beta,trans_)
+
+  if (info /= 0) goto 9999
+ 
+    ! if (info == 0) &
+    !      & info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_double)
+    ! if (alpha /= dzero) then 
+    !   if (info == 0) &
+    !        & info = writeMultiVecDevice(gpX,x)
+    ! end if
+    ! if (info == 0) &
+    !      & info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_double)
+    ! if (beta /= dzero) then 
+    !   if (info == 0) &
+    !        & info = writeMultiVecDevice(gpY,y)
+    ! end if
+    ! if (info == 0)  &
+    !      & info = spmvHllDevice(a%deviceMat,alpha,gpX,beta,gpY)
+    ! if (info == 0) &
+    !      & info = readMultiVecDevice(gpY,y)
+    ! if (info /= 0) goto 9999
+    ! call freeMultiVecDevice(gpX)
+    ! call freeMultiVecDevice(gpY)
+  ! endif
+! #else
+!   call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans)
+!    write(*,*) y
+#endif
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 continue
+  call psb_erractionrestore(err_act)
+
+  if (err_act == psb_act_abort_) then
+    call psb_error()
+    return
+  end if
+  return
+
+
+end subroutine psb_d_rsb_csmv
--- a/rsb/psb_d_rsb_mat_mod.f90
+++ b/rsb/psb_d_rsb_mat_mod.f90
@ -0,0 +1,487 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+module psb_d_rsb_mat_mod
+
+  use psb_d_base_mat_mod
+  use iso_c_binding
+
+  type, extends(psb_d_base_sparse_mat) :: psb_d_rsb_sparse_mat
+     
+     type(c_ptr) :: rsbMat = c_null_ptr
+
+  contains
+ !   procedure, pass(a) :: get_size     => d_rsb_get_size
+    procedure, pass(a) :: get_nzeros   => d_rsb_get_nzeros
+    procedure, nopass  :: get_fmt      => d_rsb_get_fmt
+    procedure, pass(a) :: sizeof       => d_rsb_sizeof
+ !   procedure, pass(a) :: csmm         => psb_d_rsb_csmm
+    procedure, pass(a) :: csmv         => psb_d_rsb_csmv
+    ! procedure, pass(a) :: inner_cssm   => psb_d_rsb_cssm
+    ! procedure, pass(a) :: inner_cssv   => psb_d_rsb_cssv
+    ! procedure, pass(a) :: scals        => psb_d_rsb_scals
+    ! procedure, pass(a) :: scalv        => psb_d_rsb_scal
+    ! procedure, pass(a) :: maxval       => psb_d_rsb_maxval
+    ! procedure, pass(a) :: csnmi        => psb_d_rsb_csnmi
+    ! procedure, pass(a) :: csnm1        => psb_d_rsb_csnm1
+    ! procedure, pass(a) :: rowsum       => psb_d_rsb_rowsum
+    ! procedure, pass(a) :: arwsum       => psb_d_rsb_arwsum
+    ! procedure, pass(a) :: colsum       => psb_d_rsb_colsum
+    ! procedure, pass(a) :: aclsum       => psb_d_rsb_aclsum
+    ! procedure, pass(a) :: reallocate_nz => psb_d_rsb_reallocate_nz
+    ! procedure, pass(a) :: allocate_mnnz => psb_d_rsb_allocate_mnnz
+    procedure, pass(a) :: cp_to_coo    => psb_d_cp_rsb_to_coo
+    procedure, pass(a) :: cp_from_coo  => psb_d_cp_rsb_from_coo
+    ! procedure, pass(a) :: cp_to_fmt    => psb_d_cp_rsb_to_fmt
+    ! procedure, pass(a) :: cp_from_fmt  => psb_d_cp_rsb_from_fmt
+!    procedure, pass(a) :: mv_to_coo    => psb_d_mv_rsb_to_coo
+    procedure, pass(a) :: mv_from_coo  => psb_d_mv_rsb_from_coo
+    ! procedure, pass(a) :: mv_to_fmt    => psb_d_mv_rsb_to_fmt
+    ! procedure, pass(a) :: mv_from_fmt  => psb_d_mv_rsb_from_fmt
+    ! procedure, pass(a) :: csput        => psb_d_rsb_csput
+    ! procedure, pass(a) :: get_diag     => psb_d_rsb_get_diag
+    ! procedure, pass(a) :: csgetptn     => psb_d_rsb_csgetptn
+    ! procedure, pass(a) :: csgetrow     => psb_d_rsb_csgetrow
+    ! procedure, pass(a) :: get_nz_row   => d_rsb_get_nz_row
+    ! procedure, pass(a) :: reinit       => psb_d_rsb_reinit
+    ! procedure, pass(a) :: trim         => psb_d_rsb_trim
+    ! procedure, pass(a) :: print        => psb_d_rsb_print
+    procedure, pass(a) :: free         => d_rsb_free
+    ! procedure, pass(a) :: mold         => psb_d_rsb_mold
+
+  end type psb_d_rsb_sparse_mat
+
+  private :: d_rsb_get_nzeros, d_rsb_free,  d_rsb_get_fmt, &
+       & d_rsb_get_size, d_rsb_sizeof, d_rsb_get_nz_row
+
+  interface
+    subroutine  psb_d_rsb_reallocate_nz(nz,a) 
+      import :: psb_d_rsb_sparse_mat, psb_ipk_
+      integer(psb_ipk_), intent(in) :: nz
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+    end subroutine psb_d_rsb_reallocate_nz
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_reinit(a,clear)
+      import :: psb_d_rsb_sparse_mat
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a   
+      logical, intent(in), optional :: clear
+    end subroutine psb_d_rsb_reinit
+  end interface
+  
+  interface
+    subroutine  psb_d_rsb_trim(a)
+      import :: psb_d_rsb_sparse_mat
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+    end subroutine psb_d_rsb_trim
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_mold(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)                  :: a
+      class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+      integer(psb_ipk_), intent(out)                           :: info
+    end subroutine psb_d_rsb_mold
+  end interface
+
+  interface
+    subroutine  psb_d_rsb_allocate_mnnz(m,n,a,nz) 
+      import :: psb_d_rsb_sparse_mat, psb_ipk_
+      integer(psb_ipk_), intent(in) :: m,n
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      integer(psb_ipk_), intent(in), optional :: nz
+    end subroutine psb_d_rsb_allocate_mnnz
+  end interface
+  
+  interface
+    subroutine psb_d_rsb_print(iout,a,iv,head,ivr,ivc)
+      import :: psb_d_rsb_sparse_mat, psb_ipk_
+      integer(psb_ipk_), intent(in)           :: iout
+      class(psb_d_rsb_sparse_mat), intent(in) :: a   
+      integer(psb_ipk_), intent(in), optional :: iv(:)
+      character(len=*), optional              :: head
+      integer(psb_ipk_), intent(in), optional :: ivr(:), ivc(:)
+    end subroutine psb_d_rsb_print
+  end interface
+  
+  interface 
+    subroutine psb_d_cp_rsb_to_coo(a,b,info) 
+      import :: psb_d_coo_sparse_mat, psb_d_rsb_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)    :: a
+      class(psb_d_coo_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)             :: info
+    end subroutine psb_d_cp_rsb_to_coo
+  end interface
+  
+  interface 
+    subroutine psb_d_cp_rsb_from_coo(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      class(psb_d_coo_sparse_mat), intent(in)    :: b
+      integer(psb_ipk_), intent(out)             :: info
+    end subroutine psb_d_cp_rsb_from_coo
+  end interface
+  
+  interface 
+    subroutine psb_d_cp_rsb_to_fmt(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)     :: a
+      class(psb_d_base_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_d_cp_rsb_to_fmt
+  end interface
+  
+  interface 
+    subroutine psb_d_cp_rsb_from_fmt(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      class(psb_d_base_sparse_mat), intent(in)   :: b
+      integer(psb_ipk_), intent(out)             :: info
+    end subroutine psb_d_cp_rsb_from_fmt
+  end interface
+  
+  interface 
+    subroutine psb_d_mv_rsb_to_coo(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      class(psb_d_coo_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)             :: info
+    end subroutine psb_d_mv_rsb_to_coo
+  end interface
+  
+  interface 
+    subroutine psb_d_mv_rsb_from_coo(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      class(psb_d_coo_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)             :: info
+    end subroutine psb_d_mv_rsb_from_coo
+  end interface
+  
+  interface 
+    subroutine psb_d_mv_rsb_to_fmt(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout)  :: a
+      class(psb_d_base_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_d_mv_rsb_to_fmt
+  end interface
+  
+  interface 
+    subroutine psb_d_mv_rsb_from_fmt(a,b,info) 
+      import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout)  :: a
+      class(psb_d_base_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_d_mv_rsb_from_fmt
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_csput(nz,ia,ja,val,a,imin,imax,jmin,jmax,info,gtl) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      real(psb_dpk_), intent(in)      :: val(:)
+      integer(psb_ipk_), intent(in)             :: nz,ia(:), ja(:),&
+           &  imin,imax,jmin,jmax
+      integer(psb_ipk_), intent(out)            :: info
+      integer(psb_ipk_), intent(in), optional   :: gtl(:)
+    end subroutine psb_d_rsb_csput
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_csgetptn(imin,imax,a,nz,ia,ja,info,&
+         & jmin,jmax,iren,append,nzin,rscale,cscale)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)        :: a
+      integer(psb_ipk_), intent(in)                  :: imin,imax
+      integer(psb_ipk_), intent(out)                 :: nz
+      integer(psb_ipk_), allocatable, intent(inout)  :: ia(:), ja(:)
+      integer(psb_ipk_),intent(out)                  :: info
+      logical, intent(in), optional        :: append
+      integer(psb_ipk_), intent(in), optional        :: iren(:)
+      integer(psb_ipk_), intent(in), optional        :: jmin,jmax, nzin
+      logical, intent(in), optional        :: rscale,cscale
+    end subroutine psb_d_rsb_csgetptn
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
+         & jmin,jmax,iren,append,nzin,rscale,cscale)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)        :: a
+      integer(psb_ipk_), intent(in)                  :: imin,imax
+      integer(psb_ipk_), intent(out)                 :: nz
+      integer(psb_ipk_), allocatable, intent(inout)  :: ia(:), ja(:)
+      real(psb_dpk_), allocatable,  intent(inout)    :: val(:)
+      integer(psb_ipk_),intent(out)                  :: info
+      logical, intent(in), optional        :: append
+      integer(psb_ipk_), intent(in), optional        :: iren(:)
+      integer(psb_ipk_), intent(in), optional        :: jmin,jmax, nzin
+      logical, intent(in), optional        :: rscale,cscale
+    end subroutine psb_d_rsb_csgetrow
+  end interface
+
+  interface 
+    subroutine psb_d_rsb_csgetblk(imin,imax,a,b,info,&
+       & jmin,jmax,iren,append,rscale,cscale)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in)    :: a
+      class(psb_d_coo_sparse_mat), intent(inout) :: b
+      integer(psb_ipk_), intent(in)              :: imin,imax
+      integer(psb_ipk_),intent(out)              :: info
+      logical, intent(in), optional        :: append
+      integer(psb_ipk_), intent(in), optional    :: iren(:)
+      integer(psb_ipk_), intent(in), optional    :: jmin,jmax
+      logical, intent(in), optional        :: rscale,cscale
+    end subroutine psb_d_rsb_csgetblk
+  end interface
+    
+  interface 
+    subroutine psb_d_rsb_cssv(alpha,a,x,beta,y,info,trans) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(in)          :: alpha, beta, x(:)
+      real(psb_dpk_), intent(inout)       :: y(:)
+      integer(psb_ipk_), intent(out)           :: info
+      character, optional, intent(in)          :: trans
+    end subroutine psb_d_rsb_cssv
+    subroutine psb_d_rsb_cssm(alpha,a,x,beta,y,info,trans) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(in)          :: alpha, beta, x(:,:)
+      real(psb_dpk_), intent(inout)       :: y(:,:)
+      integer(psb_ipk_), intent(out)       :: info
+      character, optional, intent(in)      :: trans
+    end subroutine psb_d_rsb_cssm
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(in)          :: alpha, beta, x(:)
+      real(psb_dpk_), intent(inout)       :: y(:)
+      integer(psb_ipk_), intent(out)       :: info
+      character, optional, intent(in)      :: trans
+    end subroutine psb_d_rsb_csmv
+    subroutine psb_d_rsb_csmm(alpha,a,x,beta,y,info,trans) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(in)          :: alpha, beta, x(:,:)
+      real(psb_dpk_), intent(inout)       :: y(:,:)
+      integer(psb_ipk_), intent(out)       :: info
+      character, optional, intent(in)      :: trans
+    end subroutine psb_d_rsb_csmm
+  end interface
+  
+  
+  interface 
+    function psb_d_rsb_maxval(a) result(res)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_)         :: res
+    end function psb_d_rsb_maxval
+  end interface
+  
+  interface 
+    function psb_d_rsb_csnmi(a) result(res)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_)         :: res
+    end function psb_d_rsb_csnmi
+  end interface
+  
+  interface 
+    function psb_d_rsb_csnm1(a) result(res)
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_)         :: res
+    end function psb_d_rsb_csnm1
+  end interface
+
+  interface 
+    subroutine psb_d_rsb_rowsum(d,a) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(out)              :: d(:)
+    end subroutine psb_d_rsb_rowsum
+  end interface
+
+  interface 
+    subroutine psb_d_rsb_arwsum(d,a) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(out)              :: d(:)
+    end subroutine psb_d_rsb_arwsum
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_colsum(d,a) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(out)              :: d(:)
+    end subroutine psb_d_rsb_colsum
+  end interface
+
+  interface 
+    subroutine psb_d_rsb_aclsum(d,a) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(out)              :: d(:)
+    end subroutine psb_d_rsb_aclsum
+  end interface
+    
+  interface 
+    subroutine psb_d_rsb_get_diag(a,d,info) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(in) :: a
+      real(psb_dpk_), intent(out)     :: d(:)
+      integer(psb_ipk_), intent(out)   :: info
+    end subroutine psb_d_rsb_get_diag
+  end interface
+  
+  interface 
+    subroutine psb_d_rsb_scal(d,a,info,side) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      real(psb_dpk_), intent(in)      :: d(:)
+      integer(psb_ipk_), intent(out)   :: info
+      character, intent(in), optional  :: side
+    end subroutine psb_d_rsb_scal
+  end interface
+  
+  interface
+    subroutine psb_d_rsb_scals(d,a,info) 
+      import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_
+      class(psb_d_rsb_sparse_mat), intent(inout) :: a
+      real(psb_dpk_), intent(in)      :: d
+      integer(psb_ipk_), intent(out)   :: info
+    end subroutine psb_d_rsb_scals
+  end interface
+  
+
+
+contains 
+
+  ! == ===================================
+  !
+  !
+  !
+  ! Getters 
+  !
+  !
+  !
+  !
+  !
+  ! == ===================================
+
+  
+  function d_rsb_sizeof(a) result(res)
+    implicit none 
+    class(psb_d_rsb_sparse_mat), intent(in) :: a
+    integer(psb_epk_) :: res
+
+    
+
+      
+  end function d_rsb_sizeof
+
+  function d_rsb_get_fmt() result(res)
+    implicit none 
+    character(len=5) :: res
+    res = 'RSB'
+  end function d_rsb_get_fmt
+  
+  function d_rsb_get_nzeros(a) result(res)
+    use rsb_mod
+    implicit none
+    class(psb_d_rsb_sparse_mat), intent(in) :: a
+    integer(psb_ipk_) :: res
+
+    res = Rsb_get_nzeros(a%rsbMat)
+    
+  end function d_rsb_get_nzeros
+
+  function d_rsb_get_size(a) result(res)
+    implicit none 
+    class(psb_d_rsb_sparse_mat), intent(in) :: a
+    integer(psb_ipk_) :: res
+
+  end function d_rsb_get_size
+
+
+  function  d_rsb_get_nz_row(idx,a) result(res)
+
+    implicit none
+    
+    class(psb_d_rsb_sparse_mat), intent(in) :: a
+    integer(psb_ipk_), intent(in)           :: idx
+    integer(psb_ipk_)                       :: res
+    
+    res = 0 
+ 
+    
+  end function d_rsb_get_nz_row
+
+
+
+  ! == ===================================
+  !
+  !
+  !
+  ! Data management
+  !
+  !
+  !
+  !
+  !
+  ! == ===================================  
+
+  subroutine  d_rsb_free(a)
+    use rsb_mod
+    implicit none 
+
+    class(psb_d_rsb_sparse_mat), intent(inout) :: a
+
+    call freeRsbMat(a%rsbMat)
+    
+    call a%set_null()
+    call a%set_nrows(0)
+    call a%set_ncols(0)
+    
+    return
+
+  end subroutine d_rsb_free
+
+
+end module psb_d_rsb_mat_mod
--- a/rsb/psb_rsb_mod.F90
+++ b/rsb/psb_rsb_mod.F90
@ -0,0 +1,50 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+module psb_rsb_mod
+  use psb_const_mod
+  use rsb_mod
+  use psb_rsb_penv_mod
+  ! use psb_d_ell_mat_mod
+  ! use psb_s_ell_mat_mod
+  ! use psb_z_ell_mat_mod
+  ! use psb_c_ell_mat_mod
+
+  ! use psb_s_hll_mat_mod
+  ! use psb_d_hll_mat_mod
+  ! use psb_c_hll_mat_mod
+  ! use psb_z_hll_mat_mod
+  
+  ! use psb_d_dia_mat_mod
+  ! use psb_d_hdia_mat_mod
+  use psb_d_rsb_mat_mod
+end module psb_rsb_mod
--- a/rsb/psb_rsb_penv_mod.F90
+++ b/rsb/psb_rsb_penv_mod.F90
@ -0,0 +1,99 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+module psb_rsb_penv_mod
+  use psb_const_mod
+  use psb_penv_mod
+  !use psi_comm_buffers_mod, only : psb_buffer_queue
+  use iso_c_binding
+
+!  interface psb_rsb_init
+!    module procedure  psb_rsb_init
+!  end interface
+#if defined(HAVE_RSB)
+  interface 
+    function psb_C_rsb_init() &
+         & result(res) bind(c,name='rsbInit')
+      use iso_c_binding
+      integer(c_int)		:: res
+    end function psb_C_rsb_init
+ end interface
+ 
+  interface 
+     function psb_C_rsb_exit() &
+         & result(res) bind(c,name='rsbExit')
+       use iso_c_binding
+       integer(c_int)		:: res
+     end function psb_C_rsb_exit
+  end interface
+
+#endif
+
+contains
+  ! !!!!!!!!!!!!!!!!!!!!!!
+  !
+  ! Environment handling 
+  !
+  ! !!!!!!!!!!!!!!!!!!!!!!
+
+
+  subroutine psb_rsb_init()
+    use psb_penv_mod
+    use psb_const_mod
+    use psb_error_mod
+    ! type(psb_ctxt_type), intent(in) :: ctxt
+    ! integer, intent(in), optional :: dev
+
+    integer :: info
+
+#if defined (HAVE_RSB)
+    info = psb_C_rsb_init()
+    if (info/=0) write(*,*) 'error during rsb_init'
+#endif
+  end subroutine psb_rsb_init
+
+  subroutine psb_rsb_exit()
+    use psb_penv_mod
+    use psb_const_mod
+    use psb_error_mod
+    ! type(psb_ctxt_type), intent(in) :: ctxt
+    ! integer, intent(in), optional :: dev
+
+    integer :: info
+
+#if defined (HAVE_RSB)
+    info = psb_C_rsb_exit()
+    if (info/=0) write(*,*) 'error during rsb_exit'
+#endif
+  end subroutine psb_rsb_exit
+
+end module psb_rsb_penv_mod
--- a/rsb/rsb_int.c
+++ b/rsb/rsb_int.c
@ -0,0 +1,110 @@
+  /*             Parallel Sparse BLAS   GPU plugin  */
+  /*   (C) Copyright 2013 */
+
+  /*                      Salvatore Filippone */
+  /*                      Alessandro Fanfarillo */
+ 
+  /* Redistribution and use in source and binary forms, with or without */
+  /* modification, are permitted provided that the following conditions */
+  /* are met: */
+  /*   1. Redistributions of source code must retain the above copyright */
+  /*      notice, this list of conditions and the following disclaimer. */
+  /*   2. Redistributions in binary form must reproduce the above copyright */
+  /*      notice, this list of conditions, and the following disclaimer in the */
+  /*      documentation and/or other materials provided with the distribution. */
+  /*   3. The name of the PSBLAS group or the names of its contributors may */
+  /*      not be used to endorse or promote products derived from this */
+  /*      software without specific written permission. */
+ 
+  /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
+  /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
+  /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
+  /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
+  /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
+  /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
+  /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
+  /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
+  /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
+  /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
+  /* POSSIBILITY OF SUCH DAMAGE. */
+ 
+#include <sys/time.h>
+#if defined(HAVE_RSB)
+#include "rsb.h"
+#include "rsb_int.h"
+
+int rsbInit()
+{
+  rsb_err_t errval = RSB_ERR_NO_ERROR;
+
+  if((errval = rsb_lib_init(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR)
+    {
+      printf("Error initializing the library!\n");
+      return 1;
+    }
+  
+  return 0;
+}
+
+int rsbExit()
+{
+  rsb_err_t errval = RSB_ERR_NO_ERROR;
+
+  if((errval = rsb_lib_exit(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR)
+    {
+      printf("Error finalizing the library!\n");
+      return 1;
+    }
+  
+  return 0;
+}
+
+int Rsb_double_from_coo(void **rsbMat, double *va, int *ia,int *ja,int nnz,int nr,
+			int nc, int br, int bc)
+{
+  int i=0;
+  rsb_err_t errval = RSB_ERR_NO_ERROR;
+
+  *rsbMat = rsb_mtx_alloc_from_coo_const(va,ia,ja,nnz,RSB_NUMERICAL_TYPE_DOUBLE,nr,nc,br,bc,RSB_FLAG_FORTRAN_INDICES_INTERFACE,&errval);
+
+  if((!*rsbMat) || (errval != RSB_ERR_NO_ERROR))
+    {
+      printf("Error while allocating the matrix!\n");
+      return 1;
+    }
+  return 0;
+}
+
+//X is the input and y is the output
+int Rsb_double_spmv(void *rsbMat, double *x, double alfa, double *y, double beta,char trans)
+{
+  rsb_err_t errval = RSB_ERR_NO_ERROR;
+
+  if(trans=='N')
+    errval = rsb_spmv(RSB_TRANSPOSITION_N,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1);
+  else
+    errval = rsb_spmv(RSB_TRANSPOSITION_T,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1);
+  
+  if(errval != RSB_ERR_NO_ERROR)
+    {
+      printf("Error performing a multiplication!\n");
+      return 1;
+    }
+  
+  return 0;
+}
+
+//Should it return a long instead of integer?
+int Rsb_getNZeros(void *rsbMat)
+{
+  int res = 0;
+  rsb_mtx_get_info((struct rsb_mtx_t *)rsbMat,RSB_MIF_MATRIX_NNZ__TO__RSB_NNZ_INDEX_T,(void *)&res);
+  return res;
+}
+
+void freeRsbMat(void *rsbMat)
+{
+  rsb_mtx_free(rsbMat);
+}
+
+#endif
--- a/rsb/rsb_int.h
+++ b/rsb/rsb_int.h
@ -0,0 +1,2 @@
+int Rsb_double_from_coo(void **rsbMat,double *va, int *ia,int *ja,int nnz,int nr,
+			int nc, int br, int bc);
--- a/rsb/rsb_mod.F90
+++ b/rsb/rsb_mod.F90
@ -0,0 +1,235 @@
+!                Parallel Sparse BLAS   GPU plugin 
+!      (C) Copyright 2013
+!  
+!                         Salvatore Filippone
+!                         Alessandro Fanfarillo
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+  
+
+module rsb_mod
+  use rsb
+  use iso_c_binding 
+
+#ifdef HAVE_RSB
+
+ interface Rsb_from_coo
+    function Rsb_double_from_coo(rsbMat,va,ia,ja,nnz,nr,nc,br,bc) &
+         & result(res) bind(c,name='Rsb_double_from_coo')
+      use iso_c_binding
+      integer(c_int) :: res
+      type(c_ptr) :: rsbMat
+      real(c_double) :: va(*)
+      integer(c_int) :: ia(*),ja(*)
+      integer(c_int),value :: nnz,nr,nc,br,bc
+    end function Rsb_double_from_coo
+ end interface Rsb_from_coo
+
+ interface
+    function Rsb_get_nzeros(rsbMat) &
+         & result(res) bind(c,name='Rsb_getNZeros')
+      use iso_c_binding
+      integer(c_int) :: res
+      type(c_ptr),value :: rsbMat
+    end function Rsb_get_nzeros
+ end interface
+
+ interface Rsb_spmv
+    function Rsb_double_spmv(rsbMat,x,alfa,y,beta,trans) &
+         & result(res) bind(c,name='Rsb_double_spmv')
+      use iso_c_binding
+      integer(c_int) :: res
+      type(c_ptr),value :: rsbMat
+      real(c_double) :: x(*),y(*)
+      real(c_double),value :: alfa,beta
+      character(c_char),value :: trans
+    end function Rsb_double_spmv
+ end interface Rsb_spmv
+
+  interface 
+    subroutine  freeRsbMat(rsbMat) &
+         & bind(c,name='freeRsbMat')
+      use iso_c_binding
+      type(c_ptr), value  :: rsbMat
+    end subroutine freeRsbMat
+  end interface
+
+  ! interface writeEllDevice
+ 
+  !   function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='writeEllDeviceFloat')
+  !     use iso_c_binding
+  !     integer(c_int)      :: res
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int), value :: ldj
+  !     real(c_float)       :: val(ldj,*)
+  !     integer(c_int)      :: ja(ldj,*),irn(*)
+  !   end function writeEllDeviceFloat
+
+  !   function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='writeEllDeviceDouble')
+  !     use iso_c_binding
+  !     integer(c_int)      :: res
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int), value :: ldj
+  !     real(c_double)      :: val(ldj,*)
+  !     integer(c_int)      :: ja(ldj,*),irn(*)
+  !   end function writeEllDeviceDouble
+
+  !   function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='writeEllDeviceFloatComplex')
+  !     use iso_c_binding
+  !     integer(c_int)           :: res
+  !     type(c_ptr), value       :: deviceMat
+  !     integer(c_int), value    :: ldj
+  !     complex(c_float_complex) :: val(ldj,*)
+  !     integer(c_int)           :: ja(ldj,*),irn(*)
+  !   end function writeEllDeviceFloatComplex
+
+  !   function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='writeEllDeviceDoubleComplex')
+  !     use iso_c_binding
+  !     integer(c_int)            :: res
+  !     type(c_ptr), value        :: deviceMat
+  !     integer(c_int), value     :: ldj
+  !     complex(c_double_complex) :: val(ldj,*)
+  !     integer(c_int)            :: ja(ldj,*),irn(*)
+  !   end function writeEllDeviceDoubleComplex
+
+  ! end interface writeEllDevice
+
+  ! interface readEllDevice 
+
+  !   function readEllDeviceFloat(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='readEllDeviceFloat')
+  !     use iso_c_binding
+  !     integer(c_int)      :: res
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int), value :: ldj
+  !     real(c_float)       :: val(ldj,*)
+  !     integer(c_int)      :: ja(ldj,*),irn(*)
+  !   end function readEllDeviceFloat
+
+  !   function readEllDeviceDouble(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='readEllDeviceDouble')
+  !     use iso_c_binding
+  !     integer(c_int)      :: res
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int), value :: ldj
+  !     real(c_double)      :: val(ldj,*)
+  !     integer(c_int)      :: ja(ldj,*),irn(*)
+  !   end function readEllDeviceDouble
+
+  !   function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='readEllDeviceFloatComplex')
+  !     use iso_c_binding
+  !     integer(c_int)           :: res
+  !     type(c_ptr), value       :: deviceMat
+  !     integer(c_int), value    :: ldj
+  !     complex(c_float_complex) :: val(ldj,*)
+  !     integer(c_int)           :: ja(ldj,*),irn(*)
+  !   end function readEllDeviceFloatComplex
+
+  !   function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) &
+  !        & result(res) bind(c,name='readEllDeviceDoubleComplex')
+  !     use iso_c_binding
+  !     integer(c_int)           :: res
+  !     type(c_ptr), value       :: deviceMat
+  !     integer(c_int), value    :: ldj
+  !     complex(c_double_complex) :: val(ldj,*)
+  !     integer(c_int)           :: ja(ldj,*),irn(*)
+  !   end function readEllDeviceDoubleComplex
+
+  ! end interface readEllDevice
+
+  ! interface 
+  !   subroutine resetEllTimer() bind(c,name='resetEllTimer')
+  !     use iso_c_binding
+  !   end subroutine resetEllTimer
+  ! end interface
+  ! interface 
+  !   function  getEllTimer() &
+  !        & bind(c,name='getEllTimer') result(res)
+  !     use iso_c_binding
+  !     real(c_double)      :: res
+  !   end function getEllTimer
+  ! end interface
+
+
+  ! interface 
+  !   function  getEllDevicePitch(deviceMat) &
+  !        & bind(c,name='getEllDevicePitch') result(res)
+  !     use iso_c_binding
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int)      :: res
+  !   end function getEllDevicePitch
+  ! end interface
+
+  ! interface 
+  !   function  getEllDeviceMaxRowSize(deviceMat) &
+  !        & bind(c,name='getEllDeviceMaxRowSize') result(res)
+  !     use iso_c_binding
+  !     type(c_ptr), value  :: deviceMat
+  !     integer(c_int)      :: res
+  !   end function getEllDeviceMaxRowSize
+  ! end interface
+
+
+  ! interface spmvEllDevice
+  !   function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) &
+  !        & result(res) bind(c,name='spmvEllDeviceFloat')
+  !     use iso_c_binding
+  !     integer(c_int)		:: res
+  !     type(c_ptr), value 	:: deviceMat, x, y
+  !     real(c_float),value     	:: alpha, beta
+  !   end function spmvEllDeviceFloat
+  !   function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) &
+  !        & result(res) bind(c,name='spmvEllDeviceDouble')
+  !     use iso_c_binding
+  !     integer(c_int)		:: res
+  !     type(c_ptr), value	:: deviceMat, x, y 
+  !     real(c_double),value     	:: alpha,  beta
+  !   end function spmvEllDeviceDouble
+  !   function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
+  !        & result(res) bind(c,name='spmvEllDeviceFloatComplex')
+  !     use iso_c_binding
+  !     integer(c_int)		     :: res
+  !     type(c_ptr), value	     :: deviceMat, x, y 
+  !     complex(c_float_complex),value :: alpha,  beta
+  !   end function spmvEllDeviceFloatComplex
+  !   function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
+  !        & result(res) bind(c,name='spmvEllDeviceDoubleComplex')
+  !     use iso_c_binding
+  !     integer(c_int)		      :: res
+  !     type(c_ptr), value	      :: deviceMat, x, y 
+  !     complex(c_double_complex),value :: alpha,  beta
+  !   end function spmvEllDeviceDoubleComplex
+  ! end interface spmvEllDevice
+    
+#endif
+
+
+end module rsb_mod