Merge branch 'repackage' into non-diag

non-diag
sfilippone 10 months ago
commit 74cf138a6c

@ -67,6 +67,26 @@ UTILMODNAME=@UTILMODNAME@
CBINDLIBNAME=libpsb_cbind.a CBINDLIBNAME=libpsb_cbind.a
CUDAD=@CUDAD@
CUDALD=@CUDALD@
LCUDA=@LCUDA@
SPGPU_LIBS=@SPGPU_LIBS@
CUDA_DIR=@CUDA_DIR@
CUDA_DEFINES=@CUDA_DEFINES@
CUDA_INCLUDES=@CUDA_INCLUDES@
CUDA_LIBS=@CUDA_LIBS@
CUDA_VERSION=@CUDA_VERSION@
CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@
.SUFFIXES: .cu
.cu.o:
$(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $<
@PSBLASRULES@ @PSBLASRULES@
PSBGPULDLIBS=$(LCUDA) $(SPGPU_LIBS) $(CUDA_LIBS) $(PSBLDLIBS) $(LIBS)

@ -1,6 +1,6 @@
include Make.inc include Make.inc
all: dirs based precd kryld utild cbindd libd all: dirs based precd kryld utild cbindd extd $(CUDAD) libd
@echo "=====================================" @echo "====================================="
@echo "PSBLAS libraries Compilation Successful." @echo "PSBLAS libraries Compilation Successful."
@ -12,15 +12,20 @@ dirs:
precd: based precd: based
utild: based utild: based
kryld: precd kryld: precd
extd: based
cudad: extd
cbindd: based precd kryld utild cbindd: based precd kryld utild
libd: based precd kryld utild cbindd libd: based precd kryld utild cbindd extd $(CUDALD)
$(MAKE) -C base lib $(MAKE) -C base lib
$(MAKE) -C prec lib $(MAKE) -C prec lib
$(MAKE) -C krylov lib $(MAKE) -C krylov lib
$(MAKE) -C util lib $(MAKE) -C util lib
$(MAKE) -C cbind lib $(MAKE) -C cbind lib
$(MAKE) -C ext lib
cudald: cudad
$(MAKE) -C cuda lib
based: based:
$(MAKE) -C base objs $(MAKE) -C base objs
@ -32,6 +37,10 @@ utild:
$(MAKE) -C util objs $(MAKE) -C util objs
cbindd: cbindd:
$(MAKE) -C cbind objs $(MAKE) -C cbind objs
extd: based
$(MAKE) -C ext objs
cudad: based extd
$(MAKE) -C cuda objs
install: all install: all
@ -56,6 +65,8 @@ clean:
$(MAKE) -C krylov clean $(MAKE) -C krylov clean
$(MAKE) -C util clean $(MAKE) -C util clean
$(MAKE) -C cbind clean $(MAKE) -C cbind clean
$(MAKE) -C ext clean
$(MAKE) -C cuda clean
check: all check: all
make check -C test/serial make check -C test/serial
@ -71,6 +82,8 @@ veryclean: cleanlib
cd krylov && $(MAKE) veryclean cd krylov && $(MAKE) veryclean
cd util && $(MAKE) veryclean cd util && $(MAKE) veryclean
cd cbind && $(MAKE) veryclean cd cbind && $(MAKE) veryclean
cd ext && $(MAKE) veryclean
cd cuda && $(MAKE) veryclean
cd test/fileread && $(MAKE) clean cd test/fileread && $(MAKE) clean
cd test/pargen && $(MAKE) clean cd test/pargen && $(MAKE) clean
cd test/util && $(MAKE) clean cd test/util && $(MAKE) clean

@ -1,4 +1,4 @@
PSBLAS library, version 3.8 PSBLAS library, version 3.9
=========================== ===========================
The architecture of the Fortran 2003 sparse BLAS is described in: The architecture of the Fortran 2003 sparse BLAS is described in:
@ -40,6 +40,15 @@ The main reference for the serial sparse BLAS is:
>linear algebra subprograms for sparse matrices: a user level interface, >linear algebra subprograms for sparse matrices: a user level interface,
>ACM Trans. Math. Softw., 23(3), 379-401, 1997. >ACM Trans. Math. Softw., 23(3), 379-401, 1997.
CUDA and GPU support
--------------------
This version of PSBLAS incorporates into a single package three
entities that were previouslty separated:
1. PSBLAS -- the base library
2. PSBLAS-EXT -- a library providing additional storage formats
3. SPGPU -- a package of kernels for NVIDIA GPUs originally
written by Davide Barbieri and Salvatore Filippone;
see the license file cuda/License-spgpu.md
INSTALLING INSTALLING
---------- ----------
@ -61,6 +70,11 @@ prerequisites (see also SERIAL below):
specify `--with-amd` (see `./configure --help` for more details). specify `--with-amd` (see `./configure --help` for more details).
We use the C interface to AMD. We use the C interface to AMD.
5. If you have CUDA available, use
--with-cuda=<path> to specify the CUDA toolkit location
--with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute
capabilities) to compile the CUDA code for.
The configure script will generate a Make.inc file suitable for building The configure script will generate a Make.inc file suitable for building
the library. The script is capable of recognizing the needed libraries the library. The script is capable of recognizing the needed libraries
with their default names; if they are in unusual places consider adding with their default names; if they are in unusual places consider adding

@ -136,9 +136,9 @@ module psb_const_mod
! !
! Version ! Version
! !
character(len=*), parameter :: psb_version_string_ = "3.8.0" character(len=*), parameter :: psb_version_string_ = "3.9.0"
integer(psb_ipk_), parameter :: psb_version_major_ = 3 integer(psb_ipk_), parameter :: psb_version_major_ = 3
integer(psb_ipk_), parameter :: psb_version_minor_ = 8 integer(psb_ipk_), parameter :: psb_version_minor_ = 9
integer(psb_ipk_), parameter :: psb_patchlevel_ = 0 integer(psb_ipk_), parameter :: psb_patchlevel_ = 0
! !

@ -2869,6 +2869,7 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
! Hence the call to set_nzeros done here. ! Hence the call to set_nzeros done here.
!$omp critical !$omp critical
nza = a%get_nzeros() nza = a%get_nzeros()
nzaold = nza
isza = a%get_size() isza = a%get_size()
! Build phase. Must handle reallocations in a sensible way. ! Build phase. Must handle reallocations in a sensible way.
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
@ -2878,14 +2879,19 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
info = psb_err_alloc_dealloc_; call psb_errpush(info,name) info = psb_err_alloc_dealloc_; call psb_errpush(info,name)
else else
nzaold = nza #if defined(OPENMP)
nza = nza + nz nza = nza + nz
#endif
call a%set_nzeros(nza) call a%set_nzeros(nza)
end if end if
!$omp end critical !$omp end critical
if (info /= 0) goto 9999 if (info /= 0) goto 9999
call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,&
& imin,imax,jmin,jmax,info) & imin,imax,jmin,jmax,info)
#if !defined(OPENMP)
nza = nzaold
call a%set_nzeros(nza)
#endif
call a%set_sorted(.false.) call a%set_sorted(.false.)
else if (a%is_upd()) then else if (a%is_upd()) then

@ -2869,6 +2869,7 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
! Hence the call to set_nzeros done here. ! Hence the call to set_nzeros done here.
!$omp critical !$omp critical
nza = a%get_nzeros() nza = a%get_nzeros()
nzaold = nza
isza = a%get_size() isza = a%get_size()
! Build phase. Must handle reallocations in a sensible way. ! Build phase. Must handle reallocations in a sensible way.
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
@ -2878,14 +2879,19 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
info = psb_err_alloc_dealloc_; call psb_errpush(info,name) info = psb_err_alloc_dealloc_; call psb_errpush(info,name)
else else
nzaold = nza #if defined(OPENMP)
nza = nza + nz nza = nza + nz
#endif
call a%set_nzeros(nza) call a%set_nzeros(nza)
end if end if
!$omp end critical !$omp end critical
if (info /= 0) goto 9999 if (info /= 0) goto 9999
call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,&
& imin,imax,jmin,jmax,info) & imin,imax,jmin,jmax,info)
#if !defined(OPENMP)
nza = nzaold
call a%set_nzeros(nza)
#endif
call a%set_sorted(.false.) call a%set_sorted(.false.)
else if (a%is_upd()) then else if (a%is_upd()) then

@ -2869,6 +2869,7 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
! Hence the call to set_nzeros done here. ! Hence the call to set_nzeros done here.
!$omp critical !$omp critical
nza = a%get_nzeros() nza = a%get_nzeros()
nzaold = nza
isza = a%get_size() isza = a%get_size()
! Build phase. Must handle reallocations in a sensible way. ! Build phase. Must handle reallocations in a sensible way.
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
@ -2878,14 +2879,19 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
info = psb_err_alloc_dealloc_; call psb_errpush(info,name) info = psb_err_alloc_dealloc_; call psb_errpush(info,name)
else else
nzaold = nza #if defined(OPENMP)
nza = nza + nz nza = nza + nz
#endif
call a%set_nzeros(nza) call a%set_nzeros(nza)
end if end if
!$omp end critical !$omp end critical
if (info /= 0) goto 9999 if (info /= 0) goto 9999
call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,&
& imin,imax,jmin,jmax,info) & imin,imax,jmin,jmax,info)
#if !defined(OPENMP)
nza = nzaold
call a%set_nzeros(nza)
#endif
call a%set_sorted(.false.) call a%set_sorted(.false.)
else if (a%is_upd()) then else if (a%is_upd()) then

@ -2869,6 +2869,7 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
! Hence the call to set_nzeros done here. ! Hence the call to set_nzeros done here.
!$omp critical !$omp critical
nza = a%get_nzeros() nza = a%get_nzeros()
nzaold = nza
isza = a%get_size() isza = a%get_size()
! Build phase. Must handle reallocations in a sensible way. ! Build phase. Must handle reallocations in a sensible way.
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
@ -2878,14 +2879,19 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
info = psb_err_alloc_dealloc_; call psb_errpush(info,name) info = psb_err_alloc_dealloc_; call psb_errpush(info,name)
else else
nzaold = nza #if defined(OPENMP)
nza = nza + nz nza = nza + nz
#endif
call a%set_nzeros(nza) call a%set_nzeros(nza)
end if end if
!$omp end critical !$omp end critical
if (info /= 0) goto 9999 if (info /= 0) goto 9999
call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,&
& imin,imax,jmin,jmax,info) & imin,imax,jmin,jmax,info)
#if !defined(OPENMP)
nza = nzaold
call a%set_nzeros(nza)
#endif
call a%set_sorted(.false.) call a%set_sorted(.false.)
else if (a%is_upd()) then else if (a%is_upd()) then

@ -2018,3 +2018,252 @@ CPPFLAGS="$SAVE_CPPFLAGS";
])dnl ])dnl
dnl @synopsis PAC_CHECK_SPGPU
dnl
dnl Will try to find the spgpu library and headers.
dnl
dnl Will use $CC
dnl
dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND.
dnl Note : This file will be likely to induce the compiler to create a module file
dnl (for a module called conftest).
dnl Depending on the compiler flags, this could cause a conftest.mod file to appear
dnl in the present directory, or in another, or with another name. So be warned!
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_CHECK_SPGPU,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_have_cuda" == "x"; then
PAC_CHECK_CUDA()
fi
dnl AC_MSG_NOTICE([From CUDA: $pac_cv_have_cuda ])
if test "x$pac_cv_have_cuda" == "xyes"; then
AC_ARG_WITH(spgpu, AC_HELP_STRING([--with-spgpu=DIR], [Specify the directory for SPGPU library and includes.]),
[pac_cv_spgpudir=$withval],
[pac_cv_spgpudir=''])
AC_LANG([C])
if test "x$pac_cv_spgpudir" != "x"; then
LIBS="-L$pac_cv_spgpudir/lib $LIBS"
GPU_INCLUDES="-I$pac_cv_spgpudir/include"
CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
GPU_LIBDIR="-L$pac_cv_spgpudir/lib"
fi
AC_MSG_CHECKING([spgpu dir $pac_cv_spgpudir])
AC_CHECK_HEADER([core.h],
[pac_gpu_header_ok=yes],
[pac_gpu_header_ok=no; GPU_INCLUDES=""])
if test "x$pac_gpu_header_ok" == "xyes" ; then
GPU_LIBS="-lspgpu $GPU_LIBDIR"
LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
AC_TRY_LINK_FUNC(spgpuCreate,
[pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
[pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
AC_MSG_RESULT($pac_gpu_lib_ok)
if test "x$pac_cv_have_spgpu" == "xyes" ; then
AC_MSG_NOTICE([Have found SPGPU])
SPGPULIBNAME="libpsbgpu.a";
SPGPU_DIR="$pac_cv_spgpudir";
SPGPU_DEFINES="-DHAVE_SPGPU";
SPGPU_INCDIR="$SPGPU_DIR/include";
SPGPU_INCLUDES="-I$SPGPU_INCDIR";
SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib";
LGPU=-lpsb_gpu
CUDA_DIR="$pac_cv_cuda_dir";
CUDA_DEFINES="-DHAVE_CUDA";
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES";
CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ;
fi
fi
fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])dnl
dnl @synopsis PAC_CHECK_CUDA
dnl
dnl Will try to find the cuda library and headers.
dnl
dnl Will use $CC
dnl
dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND.
dnl Note : This file will be likely to induce the compiler to create a module file
dnl (for a module called conftest).
dnl Depending on the compiler flags, this could cause a conftest.mod file to appear
dnl in the present directory, or in another, or with another name. So be warned!
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_CHECK_CUDA,
[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]),
[pac_cv_cuda_dir=$withval],
[pac_cv_cuda_dir=''])
AC_LANG([C])
SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_cuda_dir" != "x"; then
CUDA_DIR="$pac_cv_cuda_dir"
LIBS="-L$pac_cv_cuda_dir/lib $LIBS"
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_DEFINES="-DHAVE_CUDA"
CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
if test -f "$pac_cv_cuda_dir/bin/nvcc"; then
CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc"
else
CUDA_NVCC="nvcc"
fi
fi
AC_MSG_CHECKING([cuda dir $pac_cv_cuda_dir])
AC_CHECK_HEADER([cuda_runtime.h],
[pac_cuda_header_ok=yes],
[pac_cuda_header_ok=no; CUDA_INCLUDES=""])
if test "x$pac_cuda_header_ok" == "xyes" ; then
CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR"
LIBS="$CUDA_LIBS -lm $LIBS";
AC_MSG_CHECKING([for cudaMemcpy in $CUDA_LIBS])
AC_TRY_LINK_FUNC(cudaMemcpy,
[pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; ],
[pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS=""])
AC_MSG_RESULT($pac_cuda_lib_ok)
fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])dnl
dnl @synopsis PAC_ARG_WITH_CUDACC
dnl
dnl Test for --with-cudacc="set_of_cc".
dnl
dnl Defines the CC to compile for
dnl
dnl
dnl Example use:
dnl
dnl PAC_ARG_WITH_CUDACC
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN([PAC_ARG_WITH_CUDACC],
[
AC_ARG_WITH(cudacc,
AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, for example,
--with-cudacc=50,60,70,75]),
[pac_cv_cudacc=$withval],
[pac_cv_cudacc=''])
])
AC_DEFUN(PAC_ARG_WITH_LIBRSB,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
AC_ARG_WITH(librsb,
AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
--with-librsb=/opt/packages/librsb]),
[pac_cv_librsb_dir=$withval],
[pac_cv_librsb_dir=''])
if test "x$pac_cv_librsb_dir" != "x"; then
LIBS="-L$pac_cv_librsb_dir $LIBS"
RSB_INCLUDES="-I$pac_cv_librsb_dir"
# CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
RSB_LIBDIR="-L$pac_cv_librsb_dir"
fi
#AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
[pac_rsb_header_ok=yes],
[pac_rsb_header_ok=no; RSB_INCLUDES=""])
if test "x$pac_rsb_header_ok" == "xyes" ; then
RSB_LIBS="-lrsb $RSB_LIBDIR"
# LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
# AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
# AC_TRY_LINK_FUNC(spgpuCreate,
# [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
# [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
# AC_MSG_RESULT($pac_gpu_lib_ok)
# if test "x$pac_cv_have_spgpu" == "xyes" ; then
# AC_MSG_NOTICE([Have found SPGPU])
RSBLIBNAME="librsb.a";
LIBRSB_DIR="$pac_cv_librsb_dir";
# SPGPU_DEFINES="-DHAVE_SPGPU";
LIBRSB_INCDIR="$LIBRSB_DIR";
LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
# CUDA_DIR="$pac_cv_cuda_dir";
LIBRSB_DEFINES="-DHAVE_RSB";
LRSB=-lpsb_rsb
# CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
# CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
fi
# fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])
dnl
dnl @synopsis PAC_CHECK_CUDA_VERSION
dnl
dnl Will try to find the cuda version
dnl
dnl Will use $CC
dnl
dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND.
dnl Note : This file will be likely to induce the compiler to create a module file
dnl (for a module called conftest).
dnl Depending on the compiler flags, this could cause a conftest.mod file to appear
dnl in the present directory, or in another, or with another name. So be warned!
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_CHECK_CUDA_VERSION,
[AC_LANG_PUSH([C])
SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_have_cuda" == "x"; then
PAC_CHECK_CUDA()
fi
if test "x$pac_cv_have_cuda" == "xyes"; then
CUDA_DIR="$pac_cv_cuda_dir"
LIBS="-L$pac_cv_cuda_dir/lib $LIBS"
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_DEFINES="-DHAVE_CUDA"
CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR"
LIBS="$CUDA_LIBS -lm $LIBS";
AC_MSG_CHECKING([for CUDA version])
AC_LINK_IFELSE([AC_LANG_SOURCE([
#include <stdio.h>
#include <cuda.h>
int main(int argc, char *argv[])
{
printf("%d",CUDA_VERSION);
return(0);
} ])],
[pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`;],
[pac_cv_cuda_version="unknown";])
AC_MSG_RESULT($pac_cv_cuda_version)
fi
AC_LANG_POP([C])
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])dnl

366
configure vendored

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for PSBLAS 3.7.0. # Generated by GNU Autoconf 2.71 for PSBLAS 3.8.1.
# #
# Report bugs to <https://github.com/sfilippone/psblas3/issues>. # Report bugs to <https://github.com/sfilippone/psblas3/issues>.
# #
@ -611,8 +611,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='PSBLAS' PACKAGE_NAME='PSBLAS'
PACKAGE_TARNAME='psblas' PACKAGE_TARNAME='psblas'
PACKAGE_VERSION='3.7.0' PACKAGE_VERSION='3.8.1'
PACKAGE_STRING='PSBLAS 3.7.0' PACKAGE_STRING='PSBLAS 3.8.1'
PACKAGE_BUGREPORT='https://github.com/sfilippone/psblas3/issues' PACKAGE_BUGREPORT='https://github.com/sfilippone/psblas3/issues'
PACKAGE_URL='' PACKAGE_URL=''
@ -653,6 +653,20 @@ ac_subst_vars='am__EXEEXT_FALSE
am__EXEEXT_TRUE am__EXEEXT_TRUE
LTLIBOBJS LTLIBOBJS
LIBOBJS LIBOBJS
LCUDA
CUDALD
CUDAD
CUDEFINES
CUDA_NVCC
CUDA_SHORT_VERSION
CUDA_VERSION
CUDA_LIBS
CUDA_INCLUDES
CUDA_DEFINES
CUDA_DIR
EXTRALDLIBS
SPGPU_LIBS
SPGPU_FLAGS
METISINCFILE METISINCFILE
UTILLIBNAME UTILLIBNAME
METHDLIBNAME METHDLIBNAME
@ -815,7 +829,6 @@ enable_openmp
with_blas with_blas
with_blasdir with_blasdir
with_lapack with_lapack
with_rsb
with_metis with_metis
with_metisincfile with_metisincfile
with_metisdir with_metisdir
@ -825,6 +838,8 @@ with_amd
with_amddir with_amddir
with_amdincdir with_amdincdir
with_amdlibdir with_amdlibdir
with_cuda
with_cudacc
' '
ac_precious_vars='build_alias ac_precious_vars='build_alias
host_alias host_alias
@ -1390,7 +1405,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures PSBLAS 3.7.0 to adapt to many kinds of systems. \`configure' configures PSBLAS 3.8.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1457,7 +1472,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of PSBLAS 3.7.0:";; short | recursive ) echo "Configuration of PSBLAS 3.8.1:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@ -1505,12 +1520,6 @@ Optional Packages:
--with-blas=<lib> use BLAS library <lib> --with-blas=<lib> use BLAS library <lib>
--with-blasdir=<dir> search for BLAS library in <dir> --with-blasdir=<dir> search for BLAS library in <dir>
--with-lapack=<lib> use LAPACK library <lib> --with-lapack=<lib> use LAPACK library <lib>
--with-rsb Specify Recursive Sparse BLAS library linkage info
(that is, the output of librsb-config --static
--ldflags, or a directory where the usual
bin/include/lib subdirs with a regular RSB
installation resides, or nothing to make the
configure script invoke librsb-config)
--with-metis=LIBNAME Specify the library name for METIS library. Default: --with-metis=LIBNAME Specify the library name for METIS library. Default:
"-lmetis" "-lmetis"
--with-metisincfile=DIR Specify the name for METIS include file. --with-metisincfile=DIR Specify the name for METIS include file.
@ -1523,6 +1532,9 @@ Optional Packages:
--with-amddir=DIR Specify the directory for AMD library and includes. --with-amddir=DIR Specify the directory for AMD library and includes.
--with-amdincdir=DIR Specify the directory for AMD includes. --with-amdincdir=DIR Specify the directory for AMD includes.
--with-amdlibdir=DIR Specify the directory for AMD library. --with-amdlibdir=DIR Specify the directory for AMD library.
--with-cuda=DIR Specify the CUDA install directory.
--with-cudacc A comma-separated list of CCs to compile to, for
example, --with-cudacc=50,60,70,75
Some influential environment variables: Some influential environment variables:
FC Fortran compiler command FC Fortran compiler command
@ -1607,7 +1619,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
PSBLAS configure 3.7.0 PSBLAS configure 3.8.1
generated by GNU Autoconf 2.71 generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc. Copyright (C) 2021 Free Software Foundation, Inc.
@ -2291,7 +2303,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by PSBLAS $as_me 3.7.0, which was It was created by PSBLAS $as_me 3.8.1, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw $ $0$ac_configure_args_raw
@ -3265,7 +3277,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
# VERSION is the file containing the PSBLAS version code # VERSION is the file containing the PSBLAS version code
# FIXME # FIXME
psblas_cv_version="3.7.0" psblas_cv_version="3.8.1"
# A sample source file # A sample source file
@ -6393,7 +6405,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='psblas' PACKAGE='psblas'
VERSION='3.7.0' VERSION='3.8.1'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@ -9999,31 +10011,6 @@ fi
#AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd]) #AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd])
# Check whether --with-rsb was given.
if test ${with_rsb+y}
then :
withval=$with_rsb; if test x"$withval" = xno; then
want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi
else $as_nop
want_rsb_libs=""
fi
if test x"$want_rsb_libs" != x ; then
if test x"$want_rsb_libs" = xyes ; then
want_rsb_libs="`librsb-config --static --ldflags`"
else
if test -d "$want_rsb_libs" ; then
want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`"
else
true;
# we assume want_rsb_libs are linkage parameters
fi
fi
FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB"
fi
RSB_LIBS="$want_rsb_libs"
LIBS="$RSB_LIBS ${LIBS}"
# Check whether --with-metis was given. # Check whether --with-metis was given.
if test ${with_metis+y} if test ${with_metis+y}
@ -10610,6 +10597,274 @@ fi
# Check whether --with-cuda was given.
if test ${with_cuda+y}
then :
withval=$with_cuda; pac_cv_cuda_dir=$withval
else $as_nop
pac_cv_cuda_dir=''
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_cuda_dir" != "x"; then
CUDA_DIR="$pac_cv_cuda_dir"
LIBS="-L$pac_cv_cuda_dir/lib $LIBS"
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_DEFINES="-DHAVE_CUDA"
CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
if test -f "$pac_cv_cuda_dir/bin/nvcc"; then
CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc"
else
CUDA_NVCC="nvcc"
fi
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5
printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; }
ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default"
if test "x$ac_cv_header_cuda_runtime_h" = xyes
then :
pac_cuda_header_ok=yes
else $as_nop
pac_cuda_header_ok=no; CUDA_INCLUDES=""
fi
if test "x$pac_cuda_header_ok" == "xyes" ; then
CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR"
LIBS="$CUDA_LIBS -lm $LIBS";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5
printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
char cudaMemcpy ();
int
main (void)
{
return cudaMemcpy ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"
then :
pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes;
else $as_nop
pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS=""
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5
printf "%s\n" "$pac_cuda_lib_ok" >&6; }
fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
if test "x$pac_cv_have_cuda" == "xyes"; then
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_have_cuda" == "x"; then
# Check whether --with-cuda was given.
if test ${with_cuda+y}
then :
withval=$with_cuda; pac_cv_cuda_dir=$withval
else $as_nop
pac_cv_cuda_dir=''
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_cuda_dir" != "x"; then
CUDA_DIR="$pac_cv_cuda_dir"
LIBS="-L$pac_cv_cuda_dir/lib $LIBS"
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_DEFINES="-DHAVE_CUDA"
CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
if test -f "$pac_cv_cuda_dir/bin/nvcc"; then
CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc"
else
CUDA_NVCC="nvcc"
fi
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5
printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; }
ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default"
if test "x$ac_cv_header_cuda_runtime_h" = xyes
then :
pac_cuda_header_ok=yes
else $as_nop
pac_cuda_header_ok=no; CUDA_INCLUDES=""
fi
if test "x$pac_cuda_header_ok" == "xyes" ; then
CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR"
LIBS="$CUDA_LIBS -lm $LIBS";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5
printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
char cudaMemcpy ();
int
main (void)
{
return cudaMemcpy ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"
then :
pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes;
else $as_nop
pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS=""
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5
printf "%s\n" "$pac_cuda_lib_ok" >&6; }
fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
fi
if test "x$pac_cv_have_cuda" == "xyes"; then
CUDA_DIR="$pac_cv_cuda_dir"
LIBS="-L$pac_cv_cuda_dir/lib $LIBS"
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_DEFINES="-DHAVE_CUDA"
CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR"
LIBS="$CUDA_LIBS -lm $LIBS";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for CUDA version" >&5
printf %s "checking for CUDA version... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <stdio.h>
#include <cuda.h>
int main(int argc, char *argv)
{
printf("%d",CUDA_VERSION);
return(0);
}
_ACEOF
if ac_fn_c_try_link "$LINENO"
then :
pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`;
else $as_nop
pac_cv_cuda_version="unknown";
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cv_cuda_version" >&5
printf "%s\n" "$pac_cv_cuda_version" >&6; }
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
HAVE_CUDA="yes";
SPGPU_LIBS="-lspgpu";
CUDAD=cudad;
CUDALD=cudald;
LCUDA="-lpsb_cuda";
EXTRALDLIBS="-lstdc++";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $CUDAD $CUDALD" >&5
printf "%s\n" "$as_me: At this point GPUTARGET is $CUDAD $CUDALD" >&6;}
# Check whether --with-cudacc was given.
if test ${with_cudacc+y}
then :
withval=$with_cudacc; pac_cv_cudacc=$withval
else $as_nop
pac_cv_cudacc=''
fi
if test "x$pac_cv_cudacc" == "x"; then
pac_cv_cudacc="50,60,70,75";
CUDA_CC="$pac_cv_cudacc";
fi
if (( $pac_cv_cuda_version >= 11070 ))
then
CUDEFINES="--dopt=on";
fi
for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'`
do
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done
if test "x$pac_cv_cuda_version" != "xunknown"; then
CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
fi
fi
if test "x$pac_cv_ipk_size" != "x4"; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5
printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;}
HAVE_CUDA="no";
CUDA_CC="";
SPGPU_LIBS="";
CUDAD="";
CUDALD="";
CUDEFINES="";
CUDA_INCLUDES="";
CUDA_LIBS="";
fi
############################################################################### ###############################################################################
# Library target directory and archive files. # Library target directory and archive files.
############################################################################### ###############################################################################
@ -10667,9 +10922,9 @@ UTILLIBNAME=libpsb_util.a
PSBLASRULES=' PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
CXXDEFINES=$(PSBCXXDEFINES) CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES)
CDEFINES=$(PSBCDEFINES) CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
FDEFINES=$(PSBFDEFINES) FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
# These should be portable rules, arent they? # These should be portable rules, arent they?
@ -10688,6 +10943,19 @@ FDEFINES=$(PSBFDEFINES)
@ -11262,7 +11530,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by PSBLAS $as_me 3.7.0, which was This file was extended by PSBLAS $as_me 3.8.1, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@ -11321,7 +11589,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped' ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\ ac_cs_version="\\
PSBLAS config.status 3.7.0 PSBLAS config.status 3.8.1
configured by $0, generated by GNU Autoconf 2.71, configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"
@ -12022,6 +12290,9 @@ fi
FCOPT : ${FCOPT} FCOPT : ${FCOPT}
CCOPT : ${CCOPT} CCOPT : ${CCOPT}
CUDA : ${HAVE_CUDA}
CUDA_CC : ${CUDA_CC}
BLAS : ${BLAS_LIBS} BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis} METIS usable : ${psblas_cv_have_metis}
@ -12052,6 +12323,9 @@ printf "%s\n" "$as_me:
FCOPT : ${FCOPT} FCOPT : ${FCOPT}
CCOPT : ${CCOPT} CCOPT : ${CCOPT}
CUDA : ${HAVE_CUDA}
CUDA_CC : ${CUDA_CC}
BLAS : ${BLAS_LIBS} BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis} METIS usable : ${psblas_cv_have_metis}

@ -36,11 +36,11 @@ dnl NOTE : There is no cross compilation support.
############################################################################### ###############################################################################
# NOTE: the literal for version (the second argument to AC_INIT should be a literal!) # NOTE: the literal for version (the second argument to AC_INIT should be a literal!)
AC_INIT([PSBLAS],3.7.0, [https://github.com/sfilippone/psblas3/issues]) AC_INIT([PSBLAS],3.8.1, [https://github.com/sfilippone/psblas3/issues])
# VERSION is the file containing the PSBLAS version code # VERSION is the file containing the PSBLAS version code
# FIXME # FIXME
psblas_cv_version="3.7.0" psblas_cv_version="3.8.1"
# A sample source file # A sample source file
AC_CONFIG_SRCDIR([base/modules/psb_base_mod.f90]) AC_CONFIG_SRCDIR([base/modules/psb_base_mod.f90])
@ -729,23 +729,23 @@ PAC_MAKE_IS_GNUMAKE
# Note : also umfdi_local_search, ... # Note : also umfdi_local_search, ...
#AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd]) #AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd])
AC_ARG_WITH(rsb, AS_HELP_STRING([--with-rsb], [Specify Recursive Sparse BLAS library linkage info (that is, the output of librsb-config --static --ldflags, or a directory where the usual bin/include/lib subdirs with a regular RSB installation resides, or nothing to make the configure script invoke librsb-config)]), [if test x"$withval" = xno; then dnl AC_ARG_WITH(rsb, AS_HELP_STRING([--with-rsb], [Specify Recursive Sparse BLAS library linkage info (that is, the output of librsb-config --static --ldflags, or a directory where the usual bin/include/lib subdirs with a regular RSB installation resides, or nothing to make the configure script invoke librsb-config)]), [if test x"$withval" = xno; then
want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi], [want_rsb_libs=""]) dnl want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi], [want_rsb_libs=""])
if test x"$want_rsb_libs" != x ; then dnl if test x"$want_rsb_libs" != x ; then
if test x"$want_rsb_libs" = xyes ; then dnl if test x"$want_rsb_libs" = xyes ; then
want_rsb_libs="`librsb-config --static --ldflags`" dnl want_rsb_libs="`librsb-config --static --ldflags`"
else dnl else
if test -d "$want_rsb_libs" ; then dnl if test -d "$want_rsb_libs" ; then
want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" dnl want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`"
else dnl else
true; dnl true;
# we assume want_rsb_libs are linkage parameters dnl # we assume want_rsb_libs are linkage parameters
fi dnl fi
fi dnl fi
FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" dnl FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB"
fi dnl fi
RSB_LIBS="$want_rsb_libs" dnl RSB_LIBS="$want_rsb_libs"
LIBS="$RSB_LIBS ${LIBS}" dnl LIBS="$RSB_LIBS ${LIBS}"
dnl AC_CHECK_HEADERS([rsb.h], [ LIBS="${LIBS} $want_rsb_libs"], []) dnl AC_CHECK_HEADERS([rsb.h], [ LIBS="${LIBS} $want_rsb_libs"], [])
PAC_CHECK_METIS PAC_CHECK_METIS
@ -790,6 +790,57 @@ fi
PAC_CHECK_CUDA()
if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_CHECK_CUDA_VERSION()
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
dnl PAC_CHECK_SPGPU()
HAVE_CUDA="yes";
SPGPU_LIBS="-lspgpu";
CUDAD=cudad;
CUDALD=cudald;
LCUDA="-lpsb_cuda";
EXTRALDLIBS="-lstdc++";
AC_MSG_NOTICE([At this point GPUTARGET is $CUDAD $CUDALD])
PAC_ARG_WITH_CUDACC()
if test "x$pac_cv_cudacc" == "x"; then
pac_cv_cudacc="50,60,70,75";
CUDA_CC="$pac_cv_cudacc";
fi
if (( $pac_cv_cuda_version >= 11070 ))
then
CUDEFINES="--dopt=on";
fi
for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'`
do
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done
if test "x$pac_cv_cuda_version" != "xunknown"; then
CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
fi
fi
if test "x$pac_cv_ipk_size" != "x4"; then
AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU])
HAVE_CUDA="no";
CUDA_CC="";
SPGPU_LIBS="";
CUDAD="";
CUDALD="";
CUDEFINES="";
CUDA_INCLUDES="";
CUDA_LIBS="";
fi
############################################################################### ###############################################################################
# Library target directory and archive files. # Library target directory and archive files.
############################################################################### ###############################################################################
@ -847,9 +898,9 @@ AC_SUBST(FINCLUDES)
PSBLASRULES=' PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
CXXDEFINES=$(PSBCXXDEFINES) CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES)
CDEFINES=$(PSBCDEFINES) CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
FDEFINES=$(PSBFDEFINES) FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
# These should be portable rules, arent they? # These should be portable rules, arent they?
@ -871,7 +922,23 @@ AC_SUBST(PRECLIBNAME)
AC_SUBST(METHDLIBNAME) AC_SUBST(METHDLIBNAME)
AC_SUBST(UTILLIBNAME) AC_SUBST(UTILLIBNAME)
AC_SUBST(METISINCFILE) AC_SUBST(METISINCFILE)
AC_SUBST(SPGPU_FLAGS)
AC_SUBST(SPGPU_LIBS)
dnl AC_SUBST(SPGPU_DIR)
dnl AC_SUBST(SPGPU_INCLUDES)
dnl AC_SUBST(SPGPU_INCDIR)
AC_SUBST(EXTRALDLIBS)
AC_SUBST(CUDA_DIR)
AC_SUBST(CUDA_DEFINES)
AC_SUBST(CUDA_INCLUDES)
AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_VERSION)
AC_SUBST(CUDA_SHORT_VERSION)
AC_SUBST(CUDA_NVCC)
AC_SUBST(CUDEFINES)
AC_SUBST(CUDAD)
AC_SUBST(CUDALD)
AC_SUBST(LCUDA)
############################################################################### ###############################################################################
# the following files will be created by Automake # the following files will be created by Automake
@ -896,6 +963,9 @@ AC_MSG_NOTICE([
FCOPT : ${FCOPT} FCOPT : ${FCOPT}
CCOPT : ${CCOPT} CCOPT : ${CCOPT}
CUDA : ${HAVE_CUDA}
CUDA_CC : ${CUDA_CC}
BLAS : ${BLAS_LIBS} BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis} METIS usable : ${psblas_cv_have_metis}

@ -0,0 +1,37 @@
TOPDIR=../..
include $(TOPDIR)/Make.inc
#
# Libraries used
#
PSBLIBDIR=$(PSBLASDIR)/lib/
PSBINCDIR=$(PSBLASDIR)/include
LIBDIR=$(TOPDIR)/lib
INCDIR=$(TOPDIR)/include
PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such
#
#CCOPT= -g
FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG).
CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. -I$(INCDIR)
LIBNAME=libpsb_gpu.a
CUDAOBJS=psi_cuda_c_CopyCooToElg.o psi_cuda_c_CopyCooToHlg.o \
psi_cuda_d_CopyCooToElg.o psi_cuda_d_CopyCooToHlg.o \
psi_cuda_s_CopyCooToElg.o psi_cuda_s_CopyCooToHlg.o \
psi_cuda_z_CopyCooToElg.o psi_cuda_z_CopyCooToHlg.o
objs: $(CUDAOBJS)
lib: objs
ar cur ../$(LIBNAME) $(CUDAOBJS)
$(CUDAOBJS): psi_cuda_common.cuh psi_cuda_CopyCooToElg.cuh psi_cuda_CopyCooToHlg.cuh
clean:
/bin/rm -f $(CUDAOBJS)

@ -0,0 +1,104 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#include "psi_cuda_common.cuh"
#undef GEN_PSI_FUNC_NAME
#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToElg)
#define THREAD_BLOCK 256
#ifdef __cplusplus
extern "C" {
#endif
void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza,
int baseIdx, int hacksz, int ldv, int nzm,
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
int *idiag, int *rP, VALUE_TYPE *cM);
#ifdef __cplusplus
}
#endif
__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza,
int baseIdx, int hacksz, int ldv, int nzm,
int *rS, int *devIdisp, int *devJa, VALUE_TYPE *devVal,
int *idiag, int *rP, VALUE_TYPE *cM)
{
int ir, k, ipnt, rsz,jc;
int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK);
int i=ii+ki;
int idval=0;
if (ki >= nrws) return;
if (i >= nr) return;
ipnt=devIdisp[i];
rsz=rS[i];
ir = i;
for (k=0; k<rsz; k++) {
if (devJa[ipnt] == i+baseIdx) idval = ipnt-devIdisp[i]+baseIdx;
rP[ir] = devJa[ipnt];
cM[ir] = devVal[ipnt];
ir += ldv;
ipnt++;
}
// Here we are assuming that devJa[] has at least one valid entry
// Pick one valid value.
jc = devJa[devIdisp[1]];
for (k=rsz; k<nzm; k++) {
rP[ir] = jc;
cM[ir] = CONCAT(zero_,VALUE_TYPE)();
ir += ldv;
}
idiag[i]=idval;
}
void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, int i, int nr, int nza,
int baseIdx, int hacksz, int ldv, int nzm,
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
int *idiag, int *rP, VALUE_TYPE *cM)
{
dim3 block (THREAD_BLOCK, 1);
dim3 grid ((nrws + THREAD_BLOCK - 1) / THREAD_BLOCK);
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)
<<< grid, block, 0, handle->currentStream >>>(i,nrws, nr, nza, baseIdx, hacksz, ldv, nzm,
rS,devIdisp,devJa,devVal,idiag, rP,cM);
}
void
GEN_PSI_FUNC_NAME(TYPE_SYMBOL)
(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm,
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
int *idiag, int *rP, VALUE_TYPE *cM)
{ int i,j, nrws;
//int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX;
int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX);
//fprintf(stderr,"Loop on j: %d\n",j);
for (i=0; i<nr; i+=nrws) {
nrws = MIN(maxNForACall, nr - i);
//fprintf(stderr,"ifirst: %d i : %d nrws: %d i + ifirst + (nrws -1) -1 %d \n",ifirst,i,nrws,i + ifirst + (nrws -1) -1);
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(handle,nrws,i, nr, nza, baseIdx, hacksz, ldv, nzm,
rS,devIdisp, devJa, devVal, idiag, rP, cM);
}
}

@ -0,0 +1,108 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#include "psi_cuda_common.cuh"
#undef GEN_PSI_FUNC_NAME
#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToHlg)
#define THREAD_BLOCK 256
#ifdef __cplusplus
extern "C" {
#endif
void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz,
int noffs, int isz, int *rS, int *hackOffs, int *devIdisp,
int *devJa, VALUE_TYPE *devVal,
int *idiag, int *rP, VALUE_TYPE *cM);
#ifdef __cplusplus
}
#endif
__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza,
int baseIdx, int hacksz, int noffs, int isz,
int *rS, int *hackOffs, int *devIdisp,
int *devJa, VALUE_TYPE *devVal,
int *idiag, int *rP, VALUE_TYPE *cM)
{
int ir, k, ipnt, rsz,jc;
int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK);
int i=ii+ki;
if (ki >= nrws) return;
if (i<nr) {
int hackId = i / hacksz;
int hackLaneId = i % hacksz;
int hackOffset = hackOffs[hackId] + hackLaneId;
int nzm = (hackOffs[hackId+1]-hackOffs[hackId])/hacksz;
int idval=0;
rsz = rS[i];
ipnt = devIdisp[i];
ir = hackOffset;
for (k=0; k<rsz; k++) {
cM[ir] = devVal[ipnt];
if (devJa[ipnt] == i+baseIdx) idval = ipnt-devIdisp[i]+baseIdx;
rP[ir] = devJa[ipnt];
ir += hacksz;
ipnt++;
}
// Here we are assuming that devJa[] has at least one valid entry
// Pick one valid value.
jc = devJa[devIdisp[1]];
for (k=rsz; k<nzm; k++) {
rP[ir] = jc;
cM[ir] = CONCAT(zero_,VALUE_TYPE)();
ir += hacksz;
}
idiag[i]=idval;
}
}
void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, int i,
int nr, int nza, int baseIdx, int hacksz, int noffs, int isz,
int *rS, int *hackOffs, int *devIdisp, int *devJa,
VALUE_TYPE *devVal, int *idiag, int *rP, VALUE_TYPE *cM)
{
dim3 block (THREAD_BLOCK, 1);
dim3 grid ((nrws + THREAD_BLOCK - 1) / THREAD_BLOCK);
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)
<<< grid, block, 0, handle->currentStream >>>(i,nrws,nr, nza, baseIdx, hacksz, noffs, isz,
rS,hackOffs,devIdisp,devJa,devVal,idiag,rP,cM);
}
void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza,
int baseIdx, int hacksz, int noffs, int isz,
int *rS, int *hackOffs, int *devIdisp,
int *devJa, VALUE_TYPE *devVal,
int *idiag, int *rP, VALUE_TYPE *cM)
{ int i, nrws;
//int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX;
int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX);
//fprintf(stderr,"Loop on j: %d\n",j);
for (i=0; i<nr; i+=nrws) {
nrws = MIN(maxNForACall, nr - i);
//fprintf(stderr,"cpy_coo_2_hlg: i : %d nrws: %d \n", i,nrws);
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(handle,nrws,i, nr, nza, baseIdx, hacksz, noffs, isz,
rS, hackOffs, devIdisp, devJa, devVal, idiag, rP, cM);
}
}

@ -0,0 +1,10 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE cuFloatComplex
#define TYPE_SYMBOL c
#include "psi_cuda_CopyCooToElg.cuh"

@ -0,0 +1,10 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE cuFloatComplex
#define TYPE_SYMBOL c
#include "psi_cuda_CopyCooToHlg.cuh"

@ -0,0 +1,16 @@
#pragma once
#define PRE_CONCAT(A, B) A ## B
#define CONCAT(A, B) PRE_CONCAT(A, B)
#define MIN(A,B) ( (A)<(B) ? (A) : (B) )
#define SQUARE(x) ((x)*(x))
#define GET_ADDR(a,ix,iy,nc) a[(nc)*(ix)+(iy)]
#define GET_VAL(a,ix,iy,nc) (GET_ADDR(a,ix,iy,nc))
__device__ __host__ static float zero_float() { return 0.0f; }
__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); }
#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__)
__device__ __host__ static double zero_double() { return 0.0; }
__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); }
#endif

@ -0,0 +1,10 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE double
#define TYPE_SYMBOL d
#include "psi_cuda_CopyCooToElg.cuh"

@ -0,0 +1,10 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE double
#define TYPE_SYMBOL d
#include "psi_cuda_CopyCooToHlg.cuh"

@ -0,0 +1,10 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE float
#define TYPE_SYMBOL s
#include "psi_cuda_CopyCooToElg.cuh"

@ -0,0 +1,10 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE float
#define TYPE_SYMBOL s
#include "psi_cuda_CopyCooToHlg.cuh"

@ -0,0 +1,10 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE cuDoubleComplex
#define TYPE_SYMBOL z
#include "psi_cuda_CopyCooToElg.cuh"

@ -0,0 +1,10 @@
#include <stdlib.h>
#include <stdio.h>
#include "cintrf.h"
#include "vectordev.h"
#define VALUE_TYPE cuDoubleComplex
#define TYPE_SYMBOL z
#include "psi_cuda_CopyCooToHlg.cuh"

@ -0,0 +1,21 @@
(c) Copyright 2011-2021 Davide Barbieri, Salvatore Filippone
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,143 @@
include ../Make.inc
#
# Libraries used
#
LIBDIR=../lib
INCDIR=../include
MODDIR=../modules
PSBLAS_LIB= -lpsb_util -lpsb_base
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such
#
#CCOPT= -g
FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG).
CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I$(INCDIR)
LIBNAME=libpsb_cuda.a
FOBJS=cusparse_mod.o base_cusparse_mod.o \
s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o \
psb_vectordev_mod.o core_mod.o \
psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_i_vectordev_mod.o\
psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_base_vectordev_mod.o \
elldev_mod.o hlldev_mod.o diagdev_mod.o hdiagdev_mod.o \
psb_i_cuda_vect_mod.o \
psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\
psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\
psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \
psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\
psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \
psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\
psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \
psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\
psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \
psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\
psb_cuda_env_mod.o psb_cuda_mod.o \
psb_d_cuda_diag_mat_mod.o\
psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\
psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \
psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o \
dnsdev_mod.o
COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o\
svectordev.o dvectordev.o cvectordev.o zvectordev.o cuda_util.o \
fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o
OBJS=$(COBJS) $(FOBJS)
lib: objs
objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs
/bin/cp -p *$(.mod) $(MODDIR)
/bin/cp -p *.h $(INCDIR)
spgpuinc:
$(MAKE) -C spgpu includes
spgpuobjs:
$(MAKE) -C spgpu objs
spgpulib:
$(MAKE) -C spgpu lib
lib: ilib cudalib spgpulib
ar cur $(LIBNAME) $(OBJS)
/bin/cp -p $(LIBNAME) $(LIBDIR)
dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o
psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o
psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o
cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o
s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o : base_cusparse_mod.o
psb_d_cuda_hlg_mat_mod.o: hlldev_mod.o psb_d_cuda_vect_mod.o psb_cuda_env_mod.o
psb_d_cuda_elg_mat_mod.o: elldev_mod.o psb_d_cuda_vect_mod.o
psb_d_cuda_diag_mat_mod.o: diagdev_mod.o psb_d_cuda_vect_mod.o
psb_d_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_cuda_vect_mod.o
psb_s_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_s_cuda_vect_mod.o
psb_d_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_d_cuda_vect_mod.o
psb_c_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_c_cuda_vect_mod.o
psb_z_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_z_cuda_vect_mod.o
psb_s_cuda_hlg_mat_mod.o: hlldev_mod.o psb_s_cuda_vect_mod.o psb_cuda_env_mod.o
psb_s_cuda_elg_mat_mod.o: elldev_mod.o psb_s_cuda_vect_mod.o
psb_s_cuda_diag_mat_mod.o: diagdev_mod.o psb_s_cuda_vect_mod.o
psb_s_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_cuda_vect_mod.o
psb_s_cuda_csrg_mat_mod.o psb_s_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
psb_d_cuda_csrg_mat_mod.o psb_d_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
psb_z_cuda_hlg_mat_mod.o: hlldev_mod.o psb_z_cuda_vect_mod.o psb_cuda_env_mod.o
psb_z_cuda_elg_mat_mod.o: elldev_mod.o psb_z_cuda_vect_mod.o
psb_c_cuda_hlg_mat_mod.o: hlldev_mod.o psb_c_cuda_vect_mod.o psb_cuda_env_mod.o
psb_c_cuda_elg_mat_mod.o: elldev_mod.o psb_c_cuda_vect_mod.o
psb_c_cuda_csrg_mat_mod.o psb_c_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
psb_z_cuda_csrg_mat_mod.o psb_z_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
psb_vectordev_mod.o: psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_i_vectordev_mod.o
psb_i_vectordev_mod.o psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o: psb_base_vectordev_mod.o
vectordev.o: cuda_util.o vectordev.h
elldev.o: elldev.c
dnsdev.o: dnsdev.c
fcusparse.h elldev.c: elldev.h vectordev.h
fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse.h
fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse_fct.h
svectordev.o: svectordev.h vectordev.h
dvectordev.o: dvectordev.h vectordev.h
cvectordev.o: cvectordev.h vectordev.h
zvectordev.o: zvectordev.h vectordev.h
psb_cuda_env_mod.o: base_cusparse_mod.o
psb_cuda_mod.o: psb_cuda_env_mod.o psb_i_cuda_vect_mod.o\
psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\
psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\
psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \
psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\
psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \
psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\
psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \
psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\
psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \
psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\
psb_d_cuda_diag_mat_mod.o \
psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\
psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \
psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o
iobjs: $(FOBJS)
$(MAKE) -C impl objs
cudaobjs: $(FOBJS)
$(MAKE) -C CUDA objs
ilib: objs
$(MAKE) -C impl lib LIBNAME=$(LIBNAME)
cudalib: objs ilib
$(MAKE) -C CUDA lib LIBNAME=$(LIBNAME)
clean: cclean iclean cudaclean spgpuclean
/bin/rm -f $(FOBJS) *$(.mod) *.a
cclean:
/bin/rm -f $(COBJS)
iclean:
$(MAKE) -C impl clean
cudaclean:
$(MAKE) -C CUDA clean
spgpuclean:
$(MAKE) -C spgpu clean
veryclean: clean

@ -0,0 +1,113 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module base_cusparse_mod
use iso_c_binding
! Interface to CUSPARSE.
enum, bind(c)
enumerator cusparse_status_success
enumerator cusparse_status_not_initialized
enumerator cusparse_status_alloc_failed
enumerator cusparse_status_invalid_value
enumerator cusparse_status_arch_mismatch
enumerator cusparse_status_mapping_error
enumerator cusparse_status_execution_failed
enumerator cusparse_status_internal_error
enumerator cusparse_status_matrix_type_not_supported
end enum
enum, bind(c)
enumerator cusparse_matrix_type_general
enumerator cusparse_matrix_type_symmetric
enumerator cusparse_matrix_type_hermitian
enumerator cusparse_matrix_type_triangular
end enum
enum, bind(c)
enumerator cusparse_fill_mode_lower
enumerator cusparse_fill_mode_upper
end enum
enum, bind(c)
enumerator cusparse_diag_type_non_unit
enumerator cusparse_diag_type_unit
end enum
enum, bind(c)
enumerator cusparse_index_base_zero
enumerator cusparse_index_base_one
end enum
enum, bind(c)
enumerator cusparse_operation_non_transpose
enumerator cusparse_operation_transpose
enumerator cusparse_operation_conjugate_transpose
end enum
enum, bind(c)
enumerator cusparse_direction_row
enumerator cusparse_direction_column
end enum
interface
function FcusparseCreate() &
& bind(c,name="FcusparseCreate") result(res)
use iso_c_binding
integer(c_int) :: res
end function FcusparseCreate
end interface
interface
function FcusparseDestroy() &
& bind(c,name="FcusparseDestroy") result(res)
use iso_c_binding
integer(c_int) :: res
end function FcusparseDestroy
end interface
contains
function initFcusparse() result(res)
implicit none
integer(c_int) :: res
res = FcusparseCreate()
end function initFcusparse
function closeFcusparse() result(res)
implicit none
integer(c_int) :: res
res = FcusparseDestroy()
end function closeFcusparse
end module base_cusparse_mod

@ -0,0 +1,312 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module c_cusparse_mod
use base_cusparse_mod
type, bind(c) :: c_Cmat
type(c_ptr) :: Mat = c_null_ptr
end type c_Cmat
#if CUDA_SHORT_VERSION <= 10
type, bind(c) :: c_Hmat
type(c_ptr) :: Mat = c_null_ptr
end type c_Hmat
#endif
interface CSRGDeviceFree
function c_CSRGDeviceFree(Mat) &
& bind(c,name="c_CSRGDeviceFree") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int) :: res
end function c_CSRGDeviceFree
end interface
interface CSRGDeviceSetMatType
function c_CSRGDeviceSetMatType(Mat,type) &
& bind(c,name="c_CSRGDeviceSetMatType") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function c_CSRGDeviceSetMatType
end interface
interface CSRGDeviceSetMatFillMode
function c_CSRGDeviceSetMatFillMode(Mat,type) &
& bind(c,name="c_CSRGDeviceSetMatFillMode") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function c_CSRGDeviceSetMatFillMode
end interface
interface CSRGDeviceSetMatDiagType
function c_CSRGDeviceSetMatDiagType(Mat,type) &
& bind(c,name="c_CSRGDeviceSetMatDiagType") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function c_CSRGDeviceSetMatDiagType
end interface
interface CSRGDeviceSetMatIndexBase
function c_CSRGDeviceSetMatIndexBase(Mat,type) &
& bind(c,name="c_CSRGDeviceSetMatIndexBase") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function c_CSRGDeviceSetMatIndexBase
end interface
#if CUDA_SHORT_VERSION <= 10
interface CSRGDeviceCsrsmAnalysis
function c_CSRGDeviceCsrsmAnalysis(Mat) &
& bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int) :: res
end function c_CSRGDeviceCsrsmAnalysis
end interface
#else
interface CSRGIsNullSvBuffer
function c_CSRGIsNullSvBuffer(Mat) &
& bind(c,name="c_CSRGIsNullSvBuffer") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int) :: res
end function c_CSRGIsNullSvBuffer
end interface
#endif
interface CSRGDeviceAlloc
function c_CSRGDeviceAlloc(Mat,nr,nc,nz) &
& bind(c,name="c_CSRGDeviceAlloc") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int), value :: nr, nc, nz
integer(c_int) :: res
end function c_CSRGDeviceAlloc
end interface
interface CSRGDeviceGetParms
function c_CSRGDeviceGetParms(Mat,nr,nc,nz) &
& bind(c,name="c_CSRGDeviceGetParms") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int) :: nr, nc, nz
integer(c_int) :: res
end function c_CSRGDeviceGetParms
end interface
interface spsvCSRGDevice
function c_spsvCSRGDevice(Mat,alpha,x,beta,y) &
& bind(c,name="c_spsvCSRGDevice") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
type(c_ptr), value :: x
type(c_ptr), value :: y
complex(c_float_complex), value :: alpha,beta
integer(c_int) :: res
end function c_spsvCSRGDevice
end interface
interface spmvCSRGDevice
function c_spmvCSRGDevice(Mat,alpha,x,beta,y) &
& bind(c,name="c_spmvCSRGDevice") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
type(c_ptr), value :: x
type(c_ptr), value :: y
complex(c_float_complex), value :: alpha,beta
integer(c_int) :: res
end function c_spmvCSRGDevice
end interface
interface CSRGHost2Device
function c_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) &
& bind(c,name="c_CSRGHost2Device") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int), value :: m,n,nz
integer(c_int) :: irp(*), ja(*)
complex(c_float_complex) :: val(*)
integer(c_int) :: res
end function c_CSRGHost2Device
end interface
interface CSRGDevice2Host
function c_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) &
& bind(c,name="c_CSRGDevice2Host") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int), value :: m,n,nz
integer(c_int) :: irp(*), ja(*)
complex(c_float_complex) :: val(*)
integer(c_int) :: res
end function c_CSRGDevice2Host
end interface
#if CUDA_SHORT_VERSION <=10
interface HYBGDeviceAlloc
function c_HYBGDeviceAlloc(Mat,nr,nc,nz) &
& bind(c,name="c_HYBGDeviceAlloc") result(res)
use iso_c_binding
import c_hmat
type(c_Hmat) :: Mat
integer(c_int), value :: nr, nc, nz
integer(c_int) :: res
end function c_HYBGDeviceAlloc
end interface
interface HYBGDeviceFree
function c_HYBGDeviceFree(Mat) &
& bind(c,name="c_HYBGDeviceFree") result(res)
use iso_c_binding
import c_Hmat
type(c_Hmat) :: Mat
integer(c_int) :: res
end function c_HYBGDeviceFree
end interface
interface HYBGDeviceSetMatType
function c_HYBGDeviceSetMatType(Mat,type) &
& bind(c,name="c_HYBGDeviceSetMatType") result(res)
use iso_c_binding
import c_Hmat
type(c_Hmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function c_HYBGDeviceSetMatType
end interface
interface HYBGDeviceSetMatFillMode
function c_HYBGDeviceSetMatFillMode(Mat,type) &
& bind(c,name="c_HYBGDeviceSetMatFillMode") result(res)
use iso_c_binding
import c_Hmat
type(c_Hmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function c_HYBGDeviceSetMatFillMode
end interface
interface HYBGDeviceSetMatDiagType
function c_HYBGDeviceSetMatDiagType(Mat,type) &
& bind(c,name="c_HYBGDeviceSetMatDiagType") result(res)
use iso_c_binding
import c_Hmat
type(c_Hmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function c_HYBGDeviceSetMatDiagType
end interface
interface HYBGDeviceSetMatIndexBase
function c_HYBGDeviceSetMatIndexBase(Mat,type) &
& bind(c,name="c_HYBGDeviceSetMatIndexBase") result(res)
use iso_c_binding
import c_Hmat
type(c_Hmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function c_HYBGDeviceSetMatIndexBase
end interface
interface HYBGDeviceHybsmAnalysis
function c_HYBGDeviceHybsmAnalysis(Mat) &
& bind(c,name="c_HYBGDeviceHybsmAnalysis") result(res)
use iso_c_binding
import c_Hmat
type(c_Hmat) :: Mat
integer(c_int) :: res
end function c_HYBGDeviceHybsmAnalysis
end interface
interface spsvHYBGDevice
function c_spsvHYBGDevice(Mat,alpha,x,beta,y) &
& bind(c,name="c_spsvHYBGDevice") result(res)
use iso_c_binding
import c_Hmat
type(c_Hmat) :: Mat
type(c_ptr), value :: x
type(c_ptr), value :: y
complex(c_float_complex), value :: alpha,beta
integer(c_int) :: res
end function c_spsvHYBGDevice
end interface
interface spmvHYBGDevice
function c_spmvHYBGDevice(Mat,alpha,x,beta,y) &
& bind(c,name="c_spmvHYBGDevice") result(res)
use iso_c_binding
import c_Hmat
type(c_Hmat) :: Mat
type(c_ptr), value :: x
type(c_ptr), value :: y
complex(c_float_complex), value :: alpha,beta
integer(c_int) :: res
end function c_spmvHYBGDevice
end interface
interface HYBGHost2Device
function c_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) &
& bind(c,name="c_HYBGHost2Device") result(res)
use iso_c_binding
import c_Hmat
type(c_Hmat) :: Mat
integer(c_int), value :: m,n,nz
integer(c_int) :: irp(*), ja(*)
complex(c_float_complex) :: val(*)
integer(c_int) :: res
end function c_HYBGHost2Device
end interface
#endif
end module c_cusparse_mod

@ -0,0 +1,99 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cusparse_v2.h>
#include "cintrf.h"
#include "fcusparse.h"
/* Double precision real */
#define TYPE float complex
#define CUSPARSE_BASE_TYPE CUDA_C_32F
#define T_CSRGDeviceMat c_CSRGDeviceMat
#define T_Cmat c_Cmat
#define T_spmvCSRGDevice c_spmvCSRGDevice
#define T_spsvCSRGDevice c_spsvCSRGDevice
#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc
#define T_CSRGDeviceFree c_CSRGDeviceFree
#define T_CSRGHost2Device c_CSRGHost2Device
#define T_CSRGDevice2Host c_CSRGDevice2Host
#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode
#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType
#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms
#if CUDA_SHORT_VERSION <= 10
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis
#define cusparseTcsrmv cusparseCcsrmv
#define cusparseTcsrsv_solve cusparseCcsrsv_solve
#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis
#define T_HYBGDeviceMat c_HYBGDeviceMat
#define T_Hmat c_Hmat
#define T_HYBGDeviceFree c_HYBGDeviceFree
#define T_spmvHYBGDevice c_spmvHYBGDevice
#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc
#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType
#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase
#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType
#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode
#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis
#define T_spsvHYBGDevice c_spsvHYBGDevice
#define T_HYBGHost2Device c_HYBGHost2Device
#define cusparseThybmv cusparseChybmv
#define cusparseThybsv_solve cusparseChybsv_solve
#define cusparseThybsv_analysis cusparseChybsv_analysis
#define cusparseTcsr2hyb cusparseCcsr2hyb
#elif CUDA_VERSION < 11030
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis
#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve
#else
#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer
#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr
#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr
#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr
#endif
#include "fcusparse_fct.h"

@ -0,0 +1,47 @@
/* Parallel Sparse BLAS SPGPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#ifndef _CINTRF_H_
#define _CINTRF_H_
#include <stdlib.h>
#include <stdio.h>
#include "core.h"
#include "cuda_util.h"
#include "vector.h"
#include "vectordev.h"
#define ELL_PITCH_ALIGN_S 32
#define ELL_PITCH_ALIGN_D 16
#endif

@ -0,0 +1,53 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module core_mod
use iso_c_binding
integer(c_int), parameter :: spgpu_type_int = 0
integer(c_int), parameter :: spgpu_type_float = 1
integer(c_int), parameter :: spgpu_type_double = 2
integer(c_int), parameter :: spgpu_type_complex_float = 3
integer(c_int), parameter :: spgpu_type_complex_double = 4
integer(c_int), parameter :: spgpu_success = 0
integer(c_int), parameter :: spgpu_unsupported = 1
integer(c_int), parameter :: spgpu_unspecified = 2
integer(c_int), parameter :: spgpu_outofmem = 3
interface
subroutine psb_cudaSync() &
& bind(c,name='cudaSync')
use iso_c_binding
end subroutine psb_cudaSync
end interface
end module core_mod

@ -0,0 +1,799 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include "cuda_util.h"
static int hasUVA=-1;
static struct cudaDeviceProp *prop=NULL;
static spgpuHandle_t psb_cuda_handle = NULL;
static cublasHandle_t psb_cublas_handle = NULL;
int allocRemoteBuffer(void** buffer, int count)
{
cudaError_t err = cudaMalloc(buffer, count);
if (err == cudaSuccess)
{
return SPGPU_SUCCESS;
}
else
{
fprintf(stderr,"CUDA allocRemoteBuffer for %d bytes Error: %s \n",
count, cudaGetErrorString(err));
if(err == cudaErrorMemoryAllocation)
return SPGPU_OUTOFMEMORY;
else
return SPGPU_UNSPECIFIED;
}
}
int hostRegisterMapped(void *pointer, long size)
{
cudaError_t err = cudaHostRegister(pointer, size, cudaHostRegisterMapped);
if (err == cudaSuccess)
{
return SPGPU_SUCCESS;
}
else
{
fprintf(stderr,"CUDA hostRegisterMapped Error: %s\n", cudaGetErrorString(err));
if(err == cudaErrorMemoryAllocation)
return SPGPU_OUTOFMEMORY;
else
return SPGPU_UNSPECIFIED;
}
}
int getDevicePointer(void **d_p, void * h_p)
{
cudaError_t err = cudaHostGetDevicePointer(d_p,h_p,0);
if (err == cudaSuccess)
{
return SPGPU_SUCCESS;
}
else
{
fprintf(stderr,"CUDA getDevicePointer Error: %s\n", cudaGetErrorString(err));
if(err == cudaErrorMemoryAllocation)
return SPGPU_OUTOFMEMORY;
else
return SPGPU_UNSPECIFIED;
}
}
int registerMappedMemory(void *buffer, void **dp, int size)
{
//cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterMapped);
if (err == cudaSuccess) err = cudaHostGetDevicePointer(dp,buffer,0);
if (err == cudaSuccess)
{
err = cudaHostGetDevicePointer(dp,buffer,0);
if (err == cudaSuccess)
{
return SPGPU_SUCCESS;
}
else
{
fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err));
return SPGPU_UNSPECIFIED;
}
}
else
{
fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err));
if(err == cudaErrorMemoryAllocation)
return SPGPU_OUTOFMEMORY;
else
return SPGPU_UNSPECIFIED;
}
}
int allocMappedMemory(void **buffer, void **dp, int size)
{
cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
if (err == 0) err = cudaHostGetDevicePointer(dp,*buffer,0);
if (err == cudaSuccess)
{
return SPGPU_SUCCESS;
}
else
{
fprintf(stderr,"CUDA allocMappedMemory Error: %s\n", cudaGetErrorString(err));
if(err == cudaErrorMemoryAllocation)
return SPGPU_OUTOFMEMORY;
else
return SPGPU_UNSPECIFIED;
}
}
int unregisterMappedMemory(void *buffer)
{
//cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
cudaError_t err = cudaHostUnregister(buffer);
if (err == cudaSuccess)
{
return SPGPU_SUCCESS;
}
else
{
fprintf(stderr,"CUDA unregisterMappedMemory Error: %s\n", cudaGetErrorString(err));
if(err == cudaErrorMemoryAllocation)
return SPGPU_OUTOFMEMORY;
else
return SPGPU_UNSPECIFIED;
}
}
int writeRemoteBuffer(void* hostSrc, void* buffer, int count)
{
cudaError_t err = cudaMemcpy(buffer, hostSrc, count, cudaMemcpyHostToDevice);
if (err == cudaSuccess)
return SPGPU_SUCCESS;
else {
fprintf(stderr,"CUDA Error writeRemoteBuffer: %s %p %p %d\n",
cudaGetErrorString(err),buffer, hostSrc, count);
return SPGPU_UNSPECIFIED;
}
}
int readRemoteBuffer(void* hostDest, void* buffer, int count)
{
cudaError_t err1;
cudaError_t err;
#if 0
{
err1 =cudaGetLastError();
fprintf(stderr,"CUDA Error prior to readRemoteBuffer: %s %d\n",
cudaGetErrorString(err1),err1);
}
#endif
err = cudaMemcpy(hostDest, buffer, count, cudaMemcpyDeviceToHost);
if (err == cudaSuccess)
return SPGPU_SUCCESS;
else {
fprintf(stderr,"CUDA Error readRemoteBuffer: %s %p %p %d %d\n",
cudaGetErrorString(err),hostDest,buffer,count,err);
return SPGPU_UNSPECIFIED;
}
}
int freeRemoteBuffer(void* buffer)
{
cudaError_t err = cudaFree(buffer);
if (err == cudaSuccess)
return SPGPU_SUCCESS;
else {
fprintf(stderr,"CUDA Error freeRemoteBuffer: %s %p\n", cudaGetErrorString(err),buffer);
return SPGPU_UNSPECIFIED;
}
}
int gpuInit(int dev)
{
int count,err;
if ((err=cudaSetDeviceFlags(cudaDeviceMapHost))!=cudaSuccess)
fprintf(stderr,"Error On SetDeviceFlags: %d '%s'\n",err,cudaGetErrorString(err));
if ((prop=(struct cudaDeviceProp *) malloc(sizeof(struct cudaDeviceProp)))==NULL) {
fprintf(stderr,"CUDA Error gpuInit3: not malloced prop\n");
return SPGPU_UNSPECIFIED;
}
err = setDevice(dev);
if (err != cudaSuccess) {
fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err));
return SPGPU_UNSPECIFIED;
}
if (!psb_cublas_handle)
psb_cudaCreateCublasHandle();
hasUVA=getDeviceHasUVA();
FcusparseCreate();
return err;
}
void gpuClose()
{
cudaStream_t st1, st2;
if (! psb_cuda_handle)
st1=spgpuGetStream(psb_cuda_handle);
if (! psb_cublas_handle)
cublasGetStream(psb_cublas_handle,&st2);
FcusparseDestroy();
psb_cudaDestroyHandle();
if (st1 != st2)
psb_cudaDestroyCublasHandle();
free(prop);
prop=NULL;
hasUVA=-1;
}
int setDevice(int dev)
{
int count,err,idev;
err = cudaGetDeviceCount(&count);
if (err != cudaSuccess) {
fprintf(stderr,"CUDA Error setDevice: %s\n", cudaGetErrorString(err));
return SPGPU_UNSPECIFIED;
}
if ((0<=dev)&&(dev<count))
idev = dev;
else
idev = 0;
err = cudaSetDevice(idev);
if (err != cudaSuccess) {
fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err));
return SPGPU_UNSPECIFIED;
}
err = cudaGetDeviceProperties(prop,idev);
if (err != cudaSuccess) {
fprintf(stderr,"CUDA Error gpuInit4: %s\n", cudaGetErrorString(err));
return SPGPU_UNSPECIFIED;
}
return SPGPU_SUCCESS;
}
int getDevice()
{ int count;
cudaGetDevice(&count);
return(count);
}
int getDeviceHasUVA()
{ int count=0;
if (prop!=NULL)
count = prop->unifiedAddressing;
return(count);
}
int getGPUMultiProcessors()
{ int count=0;
if (prop!=NULL)
count = prop->multiProcessorCount;
return(count);
}
int getGPUMemoryBusWidth()
{ int count=0;
#if CUDART_VERSION >= 5000
if (prop!=NULL)
count = prop->memoryBusWidth;
#endif
return(count);
}
int getGPUMemoryClockRate()
{ int count=0;
#if CUDART_VERSION >= 5000
if (prop!=NULL)
count = prop->memoryClockRate;
#endif
return(count);
}
int getGPUWarpSize()
{ int count=0;
if (prop!=NULL)
count = prop->warpSize;
return(count);
}
int getGPUMaxThreadsPerBlock()
{ int count=0;
if (prop!=NULL)
count = prop->maxThreadsPerBlock;
return(count);
}
int getGPUMaxThreadsPerMP()
{ int count=0;
if (prop!=NULL)
count = prop->maxThreadsPerMultiProcessor;
return(count);
}
int getGPUMaxRegistersPerBlock()
{ int count=0;
if (prop!=NULL)
count = prop->regsPerBlock;
return(count);
}
void cpyGPUNameString(char *cstring)
{
*cstring='\0';
if (prop!=NULL)
strcpy(cstring,prop->name);
}
int DeviceHasUVA()
{
return(hasUVA == 1);
}
int getDeviceCount()
{ int count;
cudaError_t err;
err = cudaGetDeviceCount(&count);
if (err != cudaSuccess) {
fprintf(stderr,"CUDA Error getDeviceCount: %s\n", cudaGetErrorString(err));
return SPGPU_UNSPECIFIED;
}
return(count);
}
void cudaSync()
{
cudaError_t err;
err = cudaDeviceSynchronize();
if (err == cudaSuccess)
return SPGPU_SUCCESS;
else {
fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err));
return SPGPU_UNSPECIFIED;
}
}
void cudaReset()
{
cudaError_t err;
err = cudaDeviceReset();
if (err != cudaSuccess) {
fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err));
return SPGPU_UNSPECIFIED;
}
}
spgpuHandle_t psb_cudaGetHandle()
{
return psb_cuda_handle;
}
void psb_cudaCreateHandle()
{
if (!psb_cuda_handle)
spgpuCreate(&psb_cuda_handle, getDevice());
}
void psb_cudaDestroyHandle()
{
if (!psb_cuda_handle)
spgpuDestroy(psb_cuda_handle);
psb_cuda_handle = NULL;
}
cudaStream_t psb_cudaGetStream()
{
return spgpuGetStream(psb_cuda_handle);
}
void psb_cudaSetStream(cudaStream_t stream)
{
spgpuSetStream(psb_cuda_handle, stream);
return ;
}
cublasHandle_t psb_cudaGetCublasHandle()
{
if (!psb_cublas_handle)
psb_cudaCreateCublasHandle();
return psb_cublas_handle;
}
void psb_cudaCreateCublasHandle()
{ if (!psb_cublas_handle)
cublasCreate(&psb_cublas_handle);
}
void psb_cudaDestroyCublasHandle()
{
if (!psb_cublas_handle)
cublasDestroy(psb_cublas_handle);
psb_cublas_handle=NULL;
}
/* Simple memory tools */
int allocateInt(void **d_int, int n)
{
return allocRemoteBuffer((void **)(d_int), n*sizeof(int));
}
int writeInt(void *d_int, int* h_int, int n)
{
int i,j;
int *di;
i = writeRemoteBuffer((void*)h_int, (void*)d_int, n*sizeof(int));
return i;
}
int readInt(void* d_int, int* h_int, int n)
{ int i;
i = readRemoteBuffer((void *) h_int, (void *) d_int, n*sizeof(int));
//cudaSync();
return(i);
}
int writeIntFirst(int first, void *d_int, int* h_int, int n, int IndexBase)
{
int i,j;
int *di=(int *) d_int;
di = &(di[first-IndexBase]);
i = writeRemoteBuffer((void*)h_int, (void*)di, n*sizeof(int));
return i;
}
int readIntFirst(int first,void* d_int, int* h_int, int n, int IndexBase)
{ int i;
int *di=(int *) d_int;
di = &(di[first-IndexBase]);
i = readRemoteBuffer((void *) h_int, (void *) di, n*sizeof(int));
//cudaSync();
return(i);
}
int allocateMultiInt(void **d_int, int m, int n)
{
return allocRemoteBuffer((void **)(d_int), m*n*sizeof(int));
}
int writeMultiInt(void *d_int, int* h_int, int m, int n)
{
int i,j;
int *di;
i = writeRemoteBuffer((void*)h_int, (void*)d_int, m*n*sizeof(int));
return i;
}
int readMultiInt(void* d_int, int* h_int, int m, int n)
{ int i;
i = readRemoteBuffer((void *) h_int, (void *) d_int, m*n*sizeof(int));
//cudaSync();
return(i);
}
void freeInt(void *d_int)
{
//printf("Before freeInt\n");
freeRemoteBuffer(d_int);
}
int allocateFloat(void **d_float, int n)
{
return allocRemoteBuffer((void **)(d_float), n*sizeof(float));
}
int writeFloat(void *d_float, float* h_float, int n)
{
int i;
i = writeRemoteBuffer((void*)h_float, (void*)d_float, n*sizeof(float));
return i;
}
int readFloat(void* d_float, float* h_float, int n)
{ int i;
i = readRemoteBuffer((void *) h_float, (void *) d_float, n*sizeof(float));
return(i);
}
int writeFloatFirst(int df, void *d_float, float* h_float, int n, int IndexBase)
{
int i;
float *dv=(float *) d_float;
dv = &dv[df-IndexBase];
i = writeRemoteBuffer((void*)h_float, (void*)dv, n*sizeof(float));
return i;
}
int readFloatFirst(int df, void* d_float, float* h_float, int n, int IndexBase)
{ int i;
float *dv=(float *) d_float;
dv = &dv[df-IndexBase];
//fprintf(stderr,"readFloatFirst: %d %p %p %p %d \n",df,d_float,dv,h_float,n);
i = readRemoteBuffer((void *) h_float, (void *) dv, n*sizeof(float));
return(i);
}
int allocateMultiFloat(void **d_float, int m, int n)
{
return allocRemoteBuffer((void **)(d_float), m*n*sizeof(float));
}
int writeMultiFloat(void *d_float, float* h_float, int m, int n)
{
int i,j;
i = writeRemoteBuffer((void*)h_float, (void*)d_float, m*n*sizeof(float));
return i;
}
int readMultiFloat(void* d_float, float* h_float, int m, int n)
{ int i;
i = readRemoteBuffer((void *) h_float, (void *) d_float, m*n*sizeof(float));
//cudaSync();
return(i);
}
void freeFloat(void *d_float)
{
freeRemoteBuffer(d_float);
}
int allocateDouble(void **d_double, int n)
{
return allocRemoteBuffer((void **)(d_double), n*sizeof(double));
}
int writeDouble(void *d_double, double* h_double, int n)
{
int i;
i = writeRemoteBuffer((void*)h_double, (void*)d_double, n*sizeof(double));
return i;
}
int readDouble(void* d_double, double* h_double, int n)
{ int i;
i = readRemoteBuffer((void *) h_double, (void *) d_double, n*sizeof(double));
return(i);
}
int writeDoubleFirst(int df, void *d_double, double* h_double, int n, int IndexBase)
{
int i;
double *dv=(double *) d_double;
dv = &dv[df-IndexBase];
i = writeRemoteBuffer((void*)h_double, (void*)dv, n*sizeof(double));
return i;
}
int readDoubleFirst(int df, void* d_double, double* h_double, int n, int IndexBase)
{ int i;
double *dv=(double *) d_double;
dv = &dv[df-IndexBase];
//fprintf(stderr,"readDoubleFirst: %d %p %p %p %d \n",df,d_double,dv,h_double,n);
i = readRemoteBuffer((void *) h_double, (void *) dv, n*sizeof(double));
return(i);
}
int allocateMultiDouble(void **d_double, int m, int n)
{
return allocRemoteBuffer((void **)(d_double), m*n*sizeof(double));
}
int writeMultiDouble(void *d_double, double* h_double, int m, int n)
{
int i,j;
i = writeRemoteBuffer((void*)h_double, (void*)d_double, m*n*sizeof(double));
return i;
}
int readMultiDouble(void* d_double, double* h_double, int m, int n)
{ int i;
i = readRemoteBuffer((void *) h_double, (void *) d_double, m*n*sizeof(double));
//cudaSync();
return(i);
}
void freeDouble(void *d_double)
{
freeRemoteBuffer(d_double);
}
int allocateFloatComplex(void **d_FloatComplex, int n)
{
return allocRemoteBuffer((void **)(d_FloatComplex), n*sizeof(cuFloatComplex));
}
int writeFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int n)
{
int i;
i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, n*sizeof(cuFloatComplex));
return i;
}
int readFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int n)
{ int i;
i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, n*sizeof(cuFloatComplex));
return(i);
}
int allocateMultiFloatComplex(void **d_FloatComplex, int m, int n)
{
return allocRemoteBuffer((void **)(d_FloatComplex), m*n*sizeof(cuFloatComplex));
}
int writeMultiFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n)
{
int i,j;
i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, m*n*sizeof(cuFloatComplex));
return i;
}
int readMultiFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n)
{ int i;
i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, m*n*sizeof(cuFloatComplex));
//cudaSync();
return(i);
}
int writeFloatComplexFirst(int df, void *d_floatComplex,
cuFloatComplex* h_floatComplex, int n, int IndexBase)
{
int i;
cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex;
dv = &dv[df-IndexBase];
i = writeRemoteBuffer((void*)h_floatComplex, (void*)dv, n*sizeof(cuFloatComplex));
return i;
}
int readFloatComplexFirst(int df, void* d_floatComplex, cuFloatComplex* h_floatComplex,
int n, int IndexBase)
{ int i;
cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex;
dv = &dv[df-IndexBase];
i = readRemoteBuffer((void *) h_floatComplex, (void *) dv, n*sizeof(cuFloatComplex));
return(i);
}
void freeFloatComplex(void *d_FloatComplex)
{
freeRemoteBuffer(d_FloatComplex);
}
int allocateDoubleComplex(void **d_DoubleComplex, int n)
{
return allocRemoteBuffer((void **)(d_DoubleComplex), n*sizeof(cuDoubleComplex));
}
int writeDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n)
{
int i;
i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, n*sizeof(cuDoubleComplex));
return i;
}
int readDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n)
{ int i;
i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, n*sizeof(cuDoubleComplex));
return(i);
}
int writeDoubleComplexFirst(int df, void *d_doubleComplex,
cuDoubleComplex* h_doubleComplex, int n, int IndexBase)
{
int i;
cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex;
dv = &dv[df-IndexBase];
i = writeRemoteBuffer((void*)h_doubleComplex, (void*)dv, n*sizeof(cuDoubleComplex));
return i;
}
int readDoubleComplexFirst(int df, void* d_doubleComplex, cuDoubleComplex* h_doubleComplex,
int n, int IndexBase)
{ int i;
cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex;
dv = &dv[df-IndexBase];
i = readRemoteBuffer((void *) h_doubleComplex, (void *) dv, n*sizeof(cuDoubleComplex));
return(i);
}
int allocateMultiDoubleComplex(void **d_DoubleComplex, int m, int n)
{
return allocRemoteBuffer((void **)(d_DoubleComplex), m*n*sizeof(cuDoubleComplex));
}
int writeMultiDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n)
{
int i,j;
i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, m*n*sizeof(cuDoubleComplex));
return i;
}
int readMultiDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n)
{ int i;
i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, m*n*sizeof(cuDoubleComplex));
//cudaSync();
return(i);
}
void freeDoubleComplex(void *d_DoubleComplex)
{
freeRemoteBuffer(d_DoubleComplex);
}
double etime()
{
struct timeval tt;
struct timezone tz;
double temp;
if (gettimeofday(&tt,&tz) != 0) {
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
exit(-1);
}
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
return(temp);
}

@ -0,0 +1,137 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#ifndef _CUDA_UTIL_H_
#define _CUDA_UTIL_H_
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <string.h>
#include "cuda_runtime.h"
#include "core.h"
#include "cuComplex.h"
#include "fcusparse.h"
#include "cublas_v2.h"
int allocRemoteBuffer(void** buffer, int count);
int allocMappedMemory(void **buffer, void **dp, int size);
int registerMappedMemory(void *buffer, void **dp, int size);
int unregisterMappedMemory(void *buffer);
int writeRemoteBuffer(void* hostSrc, void* buffer, int count);
int readRemoteBuffer(void* hostDest, void* buffer, int count);
int freeRemoteBuffer(void* buffer);
int gpuInit(int dev);
int getDeviceCount();
int getDevice();
int setDevice(int dev);
int getGPUMultiProcessors();
int getGPUMemoryBusWidth();
int getGPUMemoryClockRate();
int getGPUWarpSize();
int getGPUMaxThreadsPerBlock();
int getGPUMaxThreadsPerMP();
int getGPUMaxRegistersPerBlock();
void cpyGPUNameString(char *cstring);
void cudaSync();
void cudaReset();
void gpuClose();
spgpuHandle_t psb_cudaGetHandle();
void psb_cudaCreateHandle();
void psb_cudaDestroyHandle();
cudaStream_t psb_cudaGetStream();
void psb_cudaSetStream(cudaStream_t stream);
cublasHandle_t psb_cudaGetCublasHandle();
void psb_cudaCreateCublasHandle();
void psb_cudaDestroyCublasHandle();
int allocateInt(void **, int);
int allocateMultiInt(void **, int, int);
int writeInt(void *, int *, int);
int writeMultiInt(void *, int* , int , int );
int readInt(void *, int *, int);
int readMultiInt(void*, int*, int, int );
int writeIntFirst(int,void *, int *, int,int);
int readIntFirst(int,void *, int *, int,int);
void freeInt(void *);
int allocateFloat(void **, int);
int allocateMultiFloat(void **, int, int);
int writeFloat(void *, float *, int);
int writeMultiFloat(void *, float* , int , int );
int readFloat(void *, float*, int);
int readMultiFloat(void*, float*, int, int );
int writeFloatFirst(int, void *, float*, int, int);
int readFloatFirst(int, void *, float*, int, int);
void freeFloat(void *);
int allocateDouble(void **, int);
int allocateMultiDouble(void **, int, int);
int writeDouble(void *, double*, int);
int writeMultiDouble(void *, double* , int , int );
int readDouble(void *, double*, int);
int readMultiDouble(void*, double*, int, int );
int writeDoubleFirst(int, void *, double*, int, int);
int readDoubleFirst(int, void *, double*, int, int);
void freeDouble(void *);
int allocateFloatComplex(void **, int);
int allocateMultiFloatComplex(void **, int, int);
int writeFloatComplex(void *, cuFloatComplex*, int);
int writeMultiFloatComplex(void *, cuFloatComplex* , int , int );
int readFloatComplex(void *, cuFloatComplex*, int);
int readMultiFloatComplex(void*, cuFloatComplex*, int, int );
int writeFloatComplexFirst(int, void *, cuFloatComplex*, int, int);
int readFloatComplexFirst(int, void *, cuFloatComplex*, int, int);
void freeFloatComplex(void *);
int allocateDoubleComplex(void **, int);
int allocateMultiDoubleComplex(void **, int, int);
int writeDoubleComplex(void *, cuDoubleComplex*, int);
int writeMultiDoubleComplex(void *, cuDoubleComplex* , int , int );
int readDoubleComplex(void *, cuDoubleComplex*, int);
int readMultiDoubleComplex(void*, cuDoubleComplex*, int, int );
int writeDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int);
int readDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int);
void freeDoubleComplex(void *);
double etime();
#endif

@ -0,0 +1,38 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module cusparse_mod
use base_cusparse_mod
use s_cusparse_mod
use d_cusparse_mod
use c_cusparse_mod
use z_cusparse_mod
end module cusparse_mod

@ -0,0 +1,323 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include <stdio.h>
#include <complex.h>
//#include "utils.h"
//#include "common.h"
#include "cvectordev.h"
int registerMappedFloatComplex(void *buff, void **d_p, int n, cuFloatComplex dummy)
{
return registerMappedMemory(buff,d_p,n*sizeof(cuFloatComplex));
}
int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec)
{ int i;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
// Ex updateFromHost vector function
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(cuFloatComplex));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
}
return(i);
}
int writeMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld)
{ int i;
i = writeMultiVecDeviceFloatComplex(deviceVec, (void *) hostVec);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatComplexR2",i);
}
return(i);
}
int readMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec)
{ int i,j;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(cuFloatComplex));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i);
}
return(i);
}
int readMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld)
{ int i;
i = readMultiVecDeviceFloatComplex(deviceVec, hostVec);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatComplexR2",i);
}
return(i);
}
int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last,
int indexBase, void* devMultiVecX)
{ int i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
spgpuHandle_t handle=psb_cudaGetHandle();
spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_);
return(i);
}
int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiVecVal,
int dupl, int indexBase, void* devMultiVecX)
{ int j=0, i=0,nmin=0,nmax=0;
int pitch = 0;
cuFloatComplex beta;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecIrl->pitch_;
if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
return SPGPU_UNSUPPORTED;
//fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_);
if (dupl == INS_OVERWRITE)
beta = make_cuFloatComplex(0.0, 0.0);
else if (dupl == INS_ADD)
beta = make_cuFloatComplex(1.0, 0.0);
else
beta = make_cuFloatComplex(0.0, 0.0);
spgpuCscat(handle, (cuFloatComplex *) devVecX->v_, n, (cuFloatComplex*)devVecVal->v_,
(int*)devVecIrl->v_, indexBase, beta);
return(i);
}
int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n,
int first, void* deviceIdx, int hfirst,
void* host_values, int indexBase)
{
int i, *idx;
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
i= igathMultiVecDeviceFloatComplex(deviceVec, vectorId, n,
first, (void*) devIdx->v_, hfirst, host_values, indexBase);
return(i);
}
int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
int first, void* indexes, int hfirst,
void* host_values, int indexBase)
{
int i, *idx =(int *) indexes;;
cuFloatComplex *hv = (cuFloatComplex *) host_values;;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_cudaGetHandle();
i=0;
hv = &(hv[hfirst-indexBase]);
idx = &(idx[first-indexBase]);
spgpuCgath(handle,hv, n, idx,indexBase,
(cuFloatComplex *) devVec->v_+vectorId*devVec->pitch_);
return(i);
}
int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n,
int first, void *deviceIdx,
int hfirst, void* host_values,
int indexBase, cuFloatComplex beta)
{
int i, *idx;
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
i= iscatMultiVecDeviceFloatComplex(deviceVec, vectorId, n, first,
(void*) devIdx->v_, hfirst,host_values,
indexBase, beta);
return(i);
}
int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
int first, void *indexes,
int hfirst, void* host_values,
int indexBase, cuFloatComplex beta)
{ int i=0;
cuFloatComplex *hv = (cuFloatComplex *) host_values;
int *idx=(int *) indexes;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_cudaGetHandle();
idx = &(idx[first-indexBase]);
hv = &(hv[hfirst-indexBase]);
spgpuCscat(handle, (cuFloatComplex *) devVec->v_, n, hv, idx, indexBase, beta);
return SPGPU_SUCCESS;
}
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
devVecA->count_, devVecA->pitch_);
return(i);
}
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
devVecA->count_, devVecA->pitch_);
return(i);
}
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
devVecA->count_, devVecA->pitch_);
return(i);
}
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
// Note: inner kernel can handle aliased input/output
spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_,
alpha, (cuFloatComplex *)devVecA->v_);
return(i);
}
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n,
void* devMultiVecA, void* devMultiVecB)
{int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_,
(cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_);
return(i);
}
int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVecX,
cuFloatComplex beta, void* devMultiVecY)
{ int j=0, i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
for(j=0;j<devVecY->count_;j++)
spgpuCaxpby(handle,(cuFloatComplex*)devVecY->v_+pitch*j, n, beta,
(cuFloatComplex*)devVecY->v_+pitch*j, alpha,
(cuFloatComplex*) devVecX->v_+pitch*j);
return(i);
}
int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha,
void *deviceVecA, void *deviceVecB)
{ int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED;
spgpuCmaxy(handle, (cuFloatComplex*)devVecB->v_, n, alpha,
(cuFloatComplex*)devVecA->v_,
(cuFloatComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_);
return(i);
}
int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA,
void *deviceVecB, cuFloatComplex beta,
void *deviceVecZ)
{ int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ ))
return SPGPU_UNSUPPORTED;
spgpuCmaxypbz(handle, (cuFloatComplex*)devVecZ->v_, n, beta,
(cuFloatComplex*)devVecZ->v_,
alpha, (cuFloatComplex*) devVecA->v_, (cuFloatComplex*) devVecB->v_,
devVecB->count_, devVecB->pitch_);
return(i);
}
int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA,
void *deviceVecB)
{ int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED;
spgpuCabs(handle, (cuFloatComplex*)devVecB->v_, n,
alpha, (cuFloatComplex*)devVecA->v_);
return(i);
}
int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA)
{ int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
spgpuHandle_t handle=psb_cudaGetHandle();
if (n > devVecA->size_)
return SPGPU_UNSUPPORTED;
spgpuCabs(handle, (cuFloatComplex*)devVecA->v_, n,
alpha, (cuFloatComplex*)devVecA->v_);
return(i);
}

@ -0,0 +1,78 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#pragma once
//#include "utils.h"
#include <complex.h>
#include "cuComplex.h"
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
int registerMappedFloatComplex(void *, void **, int, cuFloatComplex);
int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
int writeMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld);
int readMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
int readMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld);
int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last,
int indexBase, void* devVecX);
int geinsMultiVecDeviceFloatComplex(int n, void* devVecIrl, void* devVecVal,
int dupl, int indexBase, void* devVecX);
int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n,
int first, void* deviceIdx, int hfirst,
void* host_values, int indexBase);
int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
int first, void* indexes, int hfirst, void* host_values,
int indexBase);
int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, int first,
void *deviceIdx, int hfirst, void* host_values,
int indexBase, cuFloatComplex beta);
int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int first, void *indexes,
int hfirst, void* host_values, int indexBase, cuFloatComplex beta);
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA);
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);
int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB);
int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA,
void *deviceVecB, cuFloatComplex beta, void *deviceVecZ);
int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA);
int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha,
void *deviceVecA, void *deviceVecB);

@ -0,0 +1,313 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module d_cusparse_mod
use base_cusparse_mod
type, bind(c) :: d_Cmat
type(c_ptr) :: Mat = c_null_ptr
end type d_Cmat
#if CUDA_SHORT_VERSION <= 10
type, bind(c) :: d_Hmat
type(c_ptr) :: Mat = c_null_ptr
end type d_Hmat
#endif
interface CSRGDeviceFree
function d_CSRGDeviceFree(Mat) &
& bind(c,name="d_CSRGDeviceFree") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int) :: res
end function d_CSRGDeviceFree
end interface
interface CSRGDeviceSetMatType
function d_CSRGDeviceSetMatType(Mat,type) &
& bind(c,name="d_CSRGDeviceSetMatType") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function d_CSRGDeviceSetMatType
end interface
interface CSRGDeviceSetMatFillMode
function d_CSRGDeviceSetMatFillMode(Mat,type) &
& bind(c,name="d_CSRGDeviceSetMatFillMode") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function d_CSRGDeviceSetMatFillMode
end interface
interface CSRGDeviceSetMatDiagType
function d_CSRGDeviceSetMatDiagType(Mat,type) &
& bind(c,name="d_CSRGDeviceSetMatDiagType") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function d_CSRGDeviceSetMatDiagType
end interface
interface CSRGDeviceSetMatIndexBase
function d_CSRGDeviceSetMatIndexBase(Mat,type) &
& bind(c,name="d_CSRGDeviceSetMatIndexBase") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function d_CSRGDeviceSetMatIndexBase
end interface
#if CUDA_SHORT_VERSION <= 10
interface CSRGDeviceCsrsmAnalysis
function d_CSRGDeviceCsrsmAnalysis(Mat) &
& bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int) :: res
end function d_CSRGDeviceCsrsmAnalysis
end interface
#else
interface CSRGIsNullSvBuffer
function d_CSRGIsNullSvBuffer(Mat) &
& bind(c,name="d_CSRGIsNullSvBuffer") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int) :: res
end function d_CSRGIsNullSvBuffer
end interface
#endif
interface CSRGDeviceAlloc
function d_CSRGDeviceAlloc(Mat,nr,nc,nz) &
& bind(c,name="d_CSRGDeviceAlloc") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int), value :: nr, nc, nz
integer(c_int) :: res
end function d_CSRGDeviceAlloc
end interface
interface CSRGDeviceGetParms
function d_CSRGDeviceGetParms(Mat,nr,nc,nz) &
& bind(c,name="d_CSRGDeviceGetParms") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int) :: nr, nc, nz
integer(c_int) :: res
end function d_CSRGDeviceGetParms
end interface
interface spsvCSRGDevice
function d_spsvCSRGDevice(Mat,alpha,x,beta,y) &
& bind(c,name="d_spsvCSRGDevice") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
type(c_ptr), value :: x
type(c_ptr), value :: y
real(c_double), value :: alpha,beta
integer(c_int) :: res
end function d_spsvCSRGDevice
end interface
interface spmvCSRGDevice
function d_spmvCSRGDevice(Mat,alpha,x,beta,y) &
& bind(c,name="d_spmvCSRGDevice") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
type(c_ptr), value :: x
type(c_ptr), value :: y
real(c_double), value :: alpha,beta
integer(c_int) :: res
end function d_spmvCSRGDevice
end interface
interface CSRGHost2Device
function d_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) &
& bind(c,name="d_CSRGHost2Device") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int), value :: m,n,nz
integer(c_int) :: irp(*), ja(*)
real(c_double) :: val(*)
integer(c_int) :: res
end function d_CSRGHost2Device
end interface
interface CSRGDevice2Host
function d_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) &
& bind(c,name="d_CSRGDevice2Host") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int), value :: m,n,nz
integer(c_int) :: irp(*), ja(*)
real(c_double) :: val(*)
integer(c_int) :: res
end function d_CSRGDevice2Host
end interface
#if CUDA_SHORT_VERSION <= 10
interface HYBGDeviceAlloc
function d_HYBGDeviceAlloc(Mat,nr,nc,nz) &
& bind(c,name="d_HYBGDeviceAlloc") result(res)
use iso_c_binding
import d_hmat
type(d_Hmat) :: Mat
integer(c_int), value :: nr, nc, nz
integer(c_int) :: res
end function d_HYBGDeviceAlloc
end interface
interface HYBGDeviceFree
function d_HYBGDeviceFree(Mat) &
& bind(c,name="d_HYBGDeviceFree") result(res)
use iso_c_binding
import d_Hmat
type(d_Hmat) :: Mat
integer(c_int) :: res
end function d_HYBGDeviceFree
end interface
interface HYBGDeviceSetMatType
function d_HYBGDeviceSetMatType(Mat,type) &
& bind(c,name="d_HYBGDeviceSetMatType") result(res)
use iso_c_binding
import d_Hmat
type(d_Hmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function d_HYBGDeviceSetMatType
end interface
interface HYBGDeviceSetMatFillMode
function d_HYBGDeviceSetMatFillMode(Mat,type) &
& bind(c,name="d_HYBGDeviceSetMatFillMode") result(res)
use iso_c_binding
import d_Hmat
type(d_Hmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function d_HYBGDeviceSetMatFillMode
end interface
interface HYBGDeviceSetMatDiagType
function d_HYBGDeviceSetMatDiagType(Mat,type) &
& bind(c,name="d_HYBGDeviceSetMatDiagType") result(res)
use iso_c_binding
import d_Hmat
type(d_Hmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function d_HYBGDeviceSetMatDiagType
end interface
interface HYBGDeviceSetMatIndexBase
function d_HYBGDeviceSetMatIndexBase(Mat,type) &
& bind(c,name="d_HYBGDeviceSetMatIndexBase") result(res)
use iso_c_binding
import d_Hmat
type(d_Hmat) :: Mat
integer(c_int),value :: type
integer(c_int) :: res
end function d_HYBGDeviceSetMatIndexBase
end interface
interface HYBGDeviceHybsmAnalysis
function d_HYBGDeviceHybsmAnalysis(Mat) &
& bind(c,name="d_HYBGDeviceHybsmAnalysis") result(res)
use iso_c_binding
import d_Hmat
type(d_Hmat) :: Mat
integer(c_int) :: res
end function d_HYBGDeviceHybsmAnalysis
end interface
interface spsvHYBGDevice
function d_spsvHYBGDevice(Mat,alpha,x,beta,y) &
& bind(c,name="d_spsvHYBGDevice") result(res)
use iso_c_binding
import d_Hmat
type(d_Hmat) :: Mat
type(c_ptr), value :: x
type(c_ptr), value :: y
real(c_double), value :: alpha,beta
integer(c_int) :: res
end function d_spsvHYBGDevice
end interface
interface spmvHYBGDevice
function d_spmvHYBGDevice(Mat,alpha,x,beta,y) &
& bind(c,name="d_spmvHYBGDevice") result(res)
use iso_c_binding
import d_Hmat
type(d_Hmat) :: Mat
type(c_ptr), value :: x
type(c_ptr), value :: y
real(c_double), value :: alpha,beta
integer(c_int) :: res
end function d_spmvHYBGDevice
end interface
interface HYBGHost2Device
function d_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) &
& bind(c,name="d_HYBGHost2Device") result(res)
use iso_c_binding
import d_Hmat
type(d_Hmat) :: Mat
integer(c_int), value :: m,n,nz
integer(c_int) :: irp(*), ja(*)
real(c_double) :: val(*)
integer(c_int) :: res
end function d_HYBGHost2Device
end interface
#endif
end module d_cusparse_mod

@ -0,0 +1,99 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cusparse_v2.h>
#include "cintrf.h"
#include "fcusparse.h"
/* Double precision real */
#define TYPE double
#define CUSPARSE_BASE_TYPE CUDA_R_64F
#define T_CSRGDeviceMat d_CSRGDeviceMat
#define T_Cmat d_Cmat
#define T_spmvCSRGDevice d_spmvCSRGDevice
#define T_spsvCSRGDevice d_spsvCSRGDevice
#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc
#define T_CSRGDeviceFree d_CSRGDeviceFree
#define T_CSRGHost2Device d_CSRGHost2Device
#define T_CSRGDevice2Host d_CSRGDevice2Host
#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode
#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType
#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms
#if CUDA_SHORT_VERSION <= 10
#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType
#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase
#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis
#define cusparseTcsrmv cusparseDcsrmv
#define cusparseTcsrsv_solve cusparseDcsrsv_solve
#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis
#define T_HYBGDeviceMat d_HYBGDeviceMat
#define T_Hmat d_Hmat
#define T_HYBGDeviceFree d_HYBGDeviceFree
#define T_spmvHYBGDevice d_spmvHYBGDevice
#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc
#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType
#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase
#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType
#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode
#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis
#define T_spsvHYBGDevice d_spsvHYBGDevice
#define T_HYBGHost2Device d_HYBGHost2Device
#define cusparseThybmv cusparseDhybmv
#define cusparseThybsv_solve cusparseDhybsv_solve
#define cusparseThybsv_analysis cusparseDhybsv_analysis
#define cusparseTcsr2hyb cusparseDcsr2hyb
#elif CUDA_VERSION < 11030
#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType
#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase
#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis
#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve
#else
#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer
#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr
#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr
#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr
#endif
#include "fcusparse_fct.h"

@ -0,0 +1,261 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include "diagdev.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
//new
DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, unsigned int diags, unsigned int elementType)
{
DiagDeviceParams params;
params.elementType = elementType;
//numero di elementi di val
params.rows = rows;
params.columns = columns;
params.diags = diags;
return params;
}
//new
int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params)
{
struct DiagDevice *tmp = (struct DiagDevice *)malloc(sizeof(struct DiagDevice));
int ret=SPGPU_SUCCESS;
*remoteMatrix = (void *)tmp;
tmp->rows = params->rows;
tmp->cols = params->columns;
tmp->diags = params->diags;
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->off), tmp->diags*sizeof(int));
/* tmp->baseIndex = params->firstIndex; */
if (params->elementType == SPGPU_TYPE_INT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(int));
}
else if (params->elementType == SPGPU_TYPE_FLOAT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(float));
}
else if (params->elementType == SPGPU_TYPE_DOUBLE)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(double));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuFloatComplex));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuDoubleComplex));
}
else
return SPGPU_UNSUPPORTED; // Unsupported params
return ret;
}
void freeDiagDevice(void* remoteMatrix)
{
struct DiagDevice *devMat = (struct DiagDevice *) remoteMatrix;
//fprintf(stderr,"freeHllDevice\n");
if (devMat != NULL) {
freeRemoteBuffer(devMat->off);
freeRemoteBuffer(devMat->cM);
free(remoteMatrix);
}
}
//new
int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,unsigned int diags,unsigned int elementType)
{ int i;
DiagDeviceParams p;
p = getDiagDeviceParams(rows, columns, diags,elementType);
i = allocDiagDevice(deviceMat, &p);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
}
return(i);
}
int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n)
{ int i,fo,fa;
char buf_a[255], buf_o[255],tmp[255];
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
// Ex updateFromHost function
/* memset(buf_a,'\0',255); */
/* memset(buf_o,'\0',255); */
/* memset(tmp,'\0',255); */
/* strcat(buf_a,"mat_"); */
/* strcat(buf_o,"off_"); */
/* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */
/* strcat(buf_a,tmp); */
/* memset(tmp,'\0',255); */
/* sprintf(tmp,"%d.dat",devMat->cols); */
/* strcat(buf_o,tmp); */
/* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
/* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
/* i = write(fa, a, sizeof(double)*devMat->cols*devMat->rows); */
/* i = write(fo, off, sizeof(int)*devMat->cols); */
/* close(fa); */
/* close(fo); */
i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(double));
i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int));
if(i==0)
return SPGPU_SUCCESS;
else
return SPGPU_UNSUPPORTED;
}
int readDiagDeviceDouble(void* deviceMat, double* a, int* off)
{ int i;
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double));
i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
//new
int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
double beta, void* deviceY)
{
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef VERBOSE
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
#endif
/* spgpuDdiagspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */
spgpuDdiaspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta);
//cudaSync();
return SPGPU_SUCCESS;
}
int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n)
{ int i,fo,fa;
char buf_a[255], buf_o[255],tmp[255];
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
// Ex updateFromHost function
/* memset(buf_a,'\0',255); */
/* memset(buf_o,'\0',255); */
/* memset(tmp,'\0',255); */
/* strcat(buf_a,"mat_"); */
/* strcat(buf_o,"off_"); */
/* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */
/* strcat(buf_a,tmp); */
/* memset(tmp,'\0',255); */
/* sprintf(tmp,"%d.dat",devMat->cols); */
/* strcat(buf_o,tmp); */
/* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
/* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
/* i = write(fa, a, sizeof(float)*devMat->cols*devMat->rows); */
/* i = write(fo, off, sizeof(int)*devMat->cols); */
/* close(fa); */
/* close(fo); */
i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(float));
i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int));
if(i==0)
return SPGPU_SUCCESS;
else
return SPGPU_UNSUPPORTED;
}
int readDiagDeviceFloat(void* deviceMat, float* a, int* off)
{ int i;
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float));
i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
}*/
return SPGPU_SUCCESS;
}
//new
int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
float beta, void* deviceY)
{
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef VERBOSE
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
#endif
/* spgpuDdiagspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */
spgpuSdiaspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta);
//cudaSync();
return SPGPU_SUCCESS;
}

@ -0,0 +1,90 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#ifndef _DIAGDEV_H_
#define _DIAGDEV_H_
#include "cintrf.h"
#include "dia.h"
struct DiagDevice
{
// Compressed matrix
void *cM; //it can be float or double
// offset (same size of cM)
int *off;
int rows;
int cols;
int diags;
};
typedef struct DiagDeviceParams
{
unsigned int elementType;
// Number of rows.
// Used to allocate rS array
unsigned int rows;
//unsigned int hackOffsLength;
// Number of columns.
// Used for error-checking
unsigned int columns;
unsigned int diags;
} DiagDeviceParams;
DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns,
unsigned int elementType, unsigned int firstIndex);
int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int cols,
unsigned int elementType, unsigned int firstIndex);
int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params);
void freeDiagDevice(void* remoteMatrix);
int readDiagDeviceDouble(void* deviceMat, double* a, int* off);
int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n);
int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
double beta, void* deviceY);
int readDiagDeviceFloat(void* deviceMat, float* a, int* off);
int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n);
int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
float beta, void* deviceY);
#endif

@ -0,0 +1,224 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module diagdev_mod
use iso_c_binding
use core_mod
type, bind(c) :: diagdev_parms
integer(c_int) :: element_type
integer(c_int) :: rows
integer(c_int) :: columns
integer(c_int) :: firstIndex
end type diagdev_parms
interface
function FgetDiagDeviceParams(rows, columns, elementType, firstIndex) &
& result(res) bind(c,name='getDiagDeviceParams')
use iso_c_binding
import :: diagdev_parms
type(diagdev_parms) :: res
integer(c_int), value :: rows,columns,elementType,firstIndex
end function FgetDiagDeviceParams
end interface
interface
function FallocDiagDevice(deviceMat,rows,columns,&
& elementType,firstIndex) &
& result(res) bind(c,name='FallocDiagDevice')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: rows,columns,elementType,firstIndex
type(c_ptr) :: deviceMat
end function FallocDiagDevice
end interface
interface writeDiagDevice
function writeDiagDeviceFloat(deviceMat,a,off,n) &
& result(res) bind(c,name='writeDiagDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: n
real(c_float) :: a(n,*)
integer(c_int) :: off(*)!,irn(*)
end function writeDiagDeviceFloat
function writeDiagDeviceDouble(deviceMat,a,off,n) &
& result(res) bind(c,name='writeDiagDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int),value :: n
real(c_double) :: a(n,*)
integer(c_int) :: off(*)
end function writeDiagDeviceDouble
function writeDiagDeviceFloatComplex(deviceMat,a,off,n) &
& result(res) bind(c,name='writeDiagDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: n
complex(c_float_complex) :: a(n,*)
integer(c_int) :: off(*)!,irn(*)
end function writeDiagDeviceFloatComplex
function writeDiagDeviceDoubleComplex(deviceMat,a,off,n) &
& result(res) bind(c,name='writeDiagDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: n
complex(c_double_complex) :: a(n,*)
integer(c_int) :: off(*)!,irn(*)
end function writeDiagDeviceDoubleComplex
end interface
interface readDiagDevice
function readDiagDeviceFloat(deviceMat,a,off,n) &
& result(res) bind(c,name='readDiagDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
real(c_float) :: a(n,*)
integer(c_int) :: off(*)!,irn(*)
end function readDiagDeviceFloat
function readDiagDeviceDouble(deviceMat,a,off,n) &
& result(res) bind(c,name='readDiagDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int),value :: n
real(c_double) :: a(n,*)
integer(c_int) :: off(*)
end function readDiagDeviceDouble
function readDiagDeviceFloatComplex(deviceMat,a,off,n) &
& result(res) bind(c,name='readDiagDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: n
complex(c_float_complex) :: a(n,*)
integer(c_int) :: off(*)!,irn(*)
end function readDiagDeviceFloatComplex
function readDiagDeviceDoubleComplex(deviceMat,a,off,n) &
& result(res) bind(c,name='readDiagDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: n
complex(c_double_complex) :: a(n,*)
integer(c_int) :: off(*)!,irn(*)
end function readDiagDeviceDoubleComplex
end interface
interface
subroutine freeDiagDevice(deviceMat) &
& bind(c,name='freeDiagDevice')
use iso_c_binding
type(c_ptr), value :: deviceMat
end subroutine freeDiagDevice
end interface
interface
subroutine resetDiagTimer() bind(c,name='resetDiagTimer')
use iso_c_binding
end subroutine resetDiagTimer
end interface
interface
function getDiagTimer() &
& bind(c,name='getDiagTimer') result(res)
use iso_c_binding
real(c_double) :: res
end function getDiagTimer
end interface
interface
function getDiagDevicePitch(deviceMat) &
& bind(c,name='getDiagDevicePitch') result(res)
use iso_c_binding
type(c_ptr), value :: deviceMat
integer(c_int) :: res
end function getDiagDevicePitch
end interface
interface
function getDiagDeviceMaxRowSize(deviceMat) &
& bind(c,name='getDiagDeviceMaxRowSize') result(res)
use iso_c_binding
type(c_ptr), value :: deviceMat
integer(c_int) :: res
end function getDiagDeviceMaxRowSize
end interface
interface spmvDiagDevice
function spmvDiagDeviceFloat(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvDiagDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
real(c_float),value :: alpha, beta
end function spmvDiagDeviceFloat
function spmvDiagDeviceDouble(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvDiagDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
real(c_double),value :: alpha, beta
end function spmvDiagDeviceDouble
function spmvDiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvDiagDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
complex(c_float_complex),value :: alpha, beta
end function spmvDiagDeviceFloatComplex
function spmvDiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvDiagDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
complex(c_double_complex),value :: alpha, beta
end function spmvDiagDeviceDoubleComplex
end interface spmvDiagDevice
end module diagdev_mod

@ -0,0 +1,321 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include <sys/time.h>
#include "dnsdev.h"
#define PASS_RS 0
#define IMIN(a,b) ((a)<(b) ? (a) : (b))
DnsDeviceParams getDnsDeviceParams(unsigned int rows, unsigned int columns,
unsigned int elementType, unsigned int firstIndex)
{
DnsDeviceParams params;
if (elementType == SPGPU_TYPE_DOUBLE)
{
params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D;
}
else
{
params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S;
}
//For complex?
params.elementType = elementType;
params.rows = rows;
params.columns = columns;
params.firstIndex = firstIndex;
return params;
}
//new
int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params)
{
struct DnsDevice *tmp = (struct DnsDevice *)malloc(sizeof(struct DnsDevice));
*remoteMatrix = (void *)tmp;
tmp->rows = params->rows;
tmp->columns = params->columns;
tmp->cMPitch = params->pitch;
tmp->pitch= tmp->cMPitch;
tmp->allocsize = (int)tmp->columns * tmp->pitch;
tmp->baseIndex = params->firstIndex;
//fprintf(stderr,"allocDnsDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize);
if (params->elementType == SPGPU_TYPE_FLOAT)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
else if (params->elementType == SPGPU_TYPE_DOUBLE)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
else
return SPGPU_UNSUPPORTED; // Unsupported params
//fprintf(stderr,"From allocDnsDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
return SPGPU_SUCCESS;
}
void freeDnsDevice(void* remoteMatrix)
{
struct DnsDevice *devMat = (struct DnsDevice *) remoteMatrix;
//fprintf(stderr,"freeDnsDevice\n");
if (devMat != NULL) {
freeRemoteBuffer(devMat->cM);
free(remoteMatrix);
}
}
//new
int FallocDnsDevice(void** deviceMat, unsigned int rows,
unsigned int columns, unsigned int elementType,
unsigned int firstIndex)
{ int i;
DnsDeviceParams p;
p = getDnsDeviceParams(rows, columns, elementType, firstIndex);
i = allocDnsDevice(deviceMat, &p);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocDnsDevice",i);
}
return(i);
}
int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha,
void *deviceMat, void* deviceX, float *beta, void* deviceY)
{
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
int status;
cublasHandle_t handle=psb_cudaGetCublasHandle();
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) {
status = cublasSgemv(handle, trans, m,k,
alpha, devMat->cM,devMat->pitch, x->v_,1,
beta, y->v_,1);
} else {
status = cublasSgemm(handle, trans, CUBLAS_OP_N, m,n,k,
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
beta, y->v_,y->pitch_);
}
if (status == CUBLAS_STATUS_SUCCESS)
return SPGPU_SUCCESS;
else
return SPGPU_UNSUPPORTED;
}
int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha,
void *deviceMat, void* deviceX, double *beta, void* deviceY)
{
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
int status;
cublasHandle_t handle=psb_cudaGetCublasHandle();
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) {
status = cublasDgemv(handle, trans, m,k,
alpha, devMat->cM,devMat->pitch, x->v_,1,
beta, y->v_,1);
} else {
status = cublasDgemm(handle, trans, CUBLAS_OP_N, m,n,k,
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
beta, y->v_,y->pitch_);
}
if (status == CUBLAS_STATUS_SUCCESS)
return SPGPU_SUCCESS;
else
return SPGPU_UNSUPPORTED;
}
int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *alpha,
void *deviceMat, void* deviceX, float complex *beta, void* deviceY)
{
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
int status;
cublasHandle_t handle=psb_cudaGetCublasHandle();
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) {
status = cublasCgemv(handle, trans, m,k,
alpha, devMat->cM,devMat->pitch, x->v_,1,
beta, y->v_,1);
} else {
status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k,
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
beta, y->v_,y->pitch_);
}
if (status == CUBLAS_STATUS_SUCCESS)
return SPGPU_SUCCESS;
else
return SPGPU_UNSUPPORTED;
}
int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex *alpha,
void *deviceMat, void* deviceX, double complex *beta, void* deviceY)
{
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
int status;
cublasHandle_t handle=psb_cudaGetCublasHandle();
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) {
status = cublasZgemv(handle, trans, m,k,
alpha, devMat->cM,devMat->pitch, x->v_,1,
beta, y->v_,1);
} else {
status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k,
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
beta, y->v_,y->pitch_);
}
if (status == CUBLAS_STATUS_SUCCESS)
return SPGPU_SUCCESS;
else
return SPGPU_UNSUPPORTED;
}
int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc)
{ int i;
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
int pitch=devMat->pitch;
i = cublasSetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloat",i);
}
return SPGPU_SUCCESS;
}
int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc)
{ int i;
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
int pitch=devMat->pitch;
i = cublasSetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDouble",i);
}
return SPGPU_SUCCESS;
}
int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc)
{ int i;
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
int pitch=devMat->pitch;
i = cublasSetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloatComplex",i);
}
return SPGPU_SUCCESS;
}
int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc)
{ int i;
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
int pitch=devMat->pitch;
i = cublasSetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDoubleComplex",i);
}
return SPGPU_SUCCESS;
}
int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc)
{ int i;
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
int pitch=devMat->pitch;
i = cublasGetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloat",i);
}
return SPGPU_SUCCESS;
}
int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc)
{ int i;
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
int pitch=devMat->pitch;
i = cublasGetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDouble",i);
}
return SPGPU_SUCCESS;
}
int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc)
{ int i;
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
int pitch=devMat->pitch;
i = cublasGetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloatComplex",i);
}
return SPGPU_SUCCESS;
}
int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc)
{ int i;
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
int pitch=devMat->pitch;
i = cublasGetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDoubleComplex",i);
}
return SPGPU_SUCCESS;
}
int getDnsDevicePitch(void* deviceMat)
{ int i;
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
i = devMat->pitch;
return(i);
}

@ -0,0 +1,117 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#ifndef _DNSDEV_H_
#define _DNSDEV_H_
#include "cintrf.h"
#include "cuComplex.h"
#include "cublas_v2.h"
struct DnsDevice
{
// Compressed matrix
void *cM; //it can be float or double
//matrix size (uncompressed)
int rows;
int columns;
int pitch; //old
int cMPitch;
//allocation size (in elements)
int allocsize;
/*(i.e. 0 for C, 1 for Fortran)*/
int baseIndex;
};
typedef struct DnsDeviceParams
{
// The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType)
unsigned int elementType;
// Pitch (in number of elements)
unsigned int pitch;
// Number of rows.
// Used to allocate rS array
unsigned int rows;
// Number of columns.
// Used for error-checking
unsigned int columns;
// First index (e.g 0 or 1)
unsigned int firstIndex;
} DnsDeviceParams;
int FallocDnsDevice(void** deviceMat, unsigned int rows,
unsigned int columns, unsigned int elementType,
unsigned int firstIndex);
int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params);
void freeDnsDevice(void* remoteMatrix);
int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc);
int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc);
int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc);
int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc);
int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc);
int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc);
int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc);
int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc);
int spmvDnsDeviceFloat(char transa, int m, int n, int k,
float *alpha, void *deviceMat, void* deviceX,
float *beta, void* deviceY);
int spmvDnsDeviceDouble(char transa, int m, int n, int k,
double *alpha, void *deviceMat, void* deviceX,
double *beta, void* deviceY);
int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k,
float complex *alpha, void *deviceMat, void* deviceX,
float complex *beta, void* deviceY);
int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k,
double complex *alpha, void *deviceMat, void* deviceX,
double complex *beta, void* deviceY);
int getDnsDevicePitch(void* deviceMat);
// sparse Dns matrix-vector product
//int spmvDnsDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY);
//int spmvDnsDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY);
#endif

@ -0,0 +1,270 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module dnsdev_mod
use iso_c_binding
use core_mod
type, bind(c) :: dnsdev_parms
integer(c_int) :: element_type
integer(c_int) :: pitch
integer(c_int) :: rows
integer(c_int) :: columns
integer(c_int) :: maxRowSize
integer(c_int) :: avgRowSize
integer(c_int) :: firstIndex
end type dnsdev_parms
interface
function FgetDnsDeviceParams(rows, columns, elementType, firstIndex) &
& result(res) bind(c,name='getDnsDeviceParams')
use iso_c_binding
import :: dnsdev_parms
type(dnsdev_parms) :: res
integer(c_int), value :: rows,columns,elementType,firstIndex
end function FgetDnsDeviceParams
end interface
interface
function FallocDnsDevice(deviceMat,rows,columns,&
& elementType,firstIndex) &
& result(res) bind(c,name='FallocDnsDevice')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: rows,columns,elementType,firstIndex
type(c_ptr) :: deviceMat
end function FallocDnsDevice
end interface
interface writeDnsDevice
function writeDnsDeviceFloat(deviceMat,val,lda,nc) &
& result(res) bind(c,name='writeDnsDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: lda,nc
real(c_float) :: val(lda,*)
end function writeDnsDeviceFloat
function writeDnsDeviceDouble(deviceMat,val,lda,nc) &
& result(res) bind(c,name='writeDnsDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: lda,nc
real(c_double) :: val(lda,*)
end function writeDnsDeviceDouble
function writeDnsDeviceFloatComplex(deviceMat,val,lda,nc) &
& result(res) bind(c,name='writeDnsDeviceFloatComple')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: lda,nc
complex(c_float_complex) :: val(lda,*)
end function writeDnsDeviceFloatComplex
function writeDnsDeviceDoubleComplex(deviceMat,val,lda,nc) &
& result(res) bind(c,name='writeDnsDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: lda,nc
complex(c_double_complex) :: val(lda,*)
end function writeDnsDeviceDoubleComplex
end interface
interface readDnsDevice
function readDnsDeviceFloat(deviceMat,val,lda,nc) &
& result(res) bind(c,name='readDnsDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: lda,nc
real(c_float) :: val(lda,*)
end function readDnsDeviceFloat
function readDnsDeviceDouble(deviceMat,val,lda,nc) &
& result(res) bind(c,name='readDnsDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: lda,nc
real(c_double) :: val(lda,*)
end function readDnsDeviceDouble
function readDnsDeviceFloatComplex(deviceMat,val,lda,nc) &
& result(res) bind(c,name='readDnsDeviceFloatComple')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: lda,nc
complex(c_float_complex) :: val(lda,*)
end function readDnsDeviceFloatComplex
function readDnsDeviceDoubleComplex(deviceMat,val,lda,nc) &
& result(res) bind(c,name='readDnsDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: lda,nc
complex(c_double_complex) :: val(lda,*)
end function readDnsDeviceDoubleComplex
end interface
interface
subroutine freeDnsDevice(deviceMat) &
& bind(c,name='freeDnsDevice')
use iso_c_binding
type(c_ptr), value :: deviceMat
end subroutine freeDnsDevice
end interface
interface
subroutine resetDnsTimer() bind(c,name='resetDnsTimer')
use iso_c_binding
end subroutine resetDnsTimer
end interface
interface
function getDnsTimer() &
& bind(c,name='getDnsTimer') result(res)
use iso_c_binding
real(c_double) :: res
end function getDnsTimer
end interface
interface
function getDnsDevicePitch(deviceMat) &
& bind(c,name='getDnsDevicePitch') result(res)
use iso_c_binding
type(c_ptr), value :: deviceMat
integer(c_int) :: res
end function getDnsDevicePitch
end interface
!!$ interface csputDnsDeviceFloat
!!$ function dev_csputDnsDeviceFloat(deviceMat, nnz, ia, ja, val) &
!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloat')
!!$ use iso_c_binding
!!$ integer(c_int) :: res
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
!!$ integer(c_int), value :: nnz
!!$ end function dev_csputDnsDeviceFloat
!!$ end interface
!!$
!!$ interface csputDnsDeviceDouble
!!$ function dev_csputDnsDeviceDouble(deviceMat, nnz, ia, ja, val) &
!!$ & result(res) bind(c,name='dev_csputDnsDeviceDouble')
!!$ use iso_c_binding
!!$ integer(c_int) :: res
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
!!$ integer(c_int), value :: nnz
!!$ end function dev_csputDnsDeviceDouble
!!$ end interface
!!$
!!$ interface csputDnsDeviceFloatComplex
!!$ function dev_csputDnsDeviceFloatComplex(deviceMat, nnz, ia, ja, val) &
!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloatComplex')
!!$ use iso_c_binding
!!$ integer(c_int) :: res
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
!!$ integer(c_int), value :: nnz
!!$ end function dev_csputDnsDeviceFloatComplex
!!$ end interface
!!$
!!$ interface csputDnsDeviceDoubleComplex
!!$ function dev_csputDnsDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) &
!!$ & result(res) bind(c,name='dev_csputDnsDeviceDoubleComplex')
!!$ use iso_c_binding
!!$ integer(c_int) :: res
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
!!$ integer(c_int), value :: nnz
!!$ end function dev_csputDnsDeviceDoubleComplex
!!$ end interface
interface spmvDnsDevice
function spmvDnsDeviceFloat(transa,m,n,k,alpha,deviceMat,x,beta,y) &
& result(res) bind(c,name='spmvDnsDeviceFloat')
use iso_c_binding
character(c_char), value :: transa
integer(c_int), value :: m, n, k
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
real(c_float) :: alpha, beta
end function spmvDnsDeviceFloat
function spmvDnsDeviceDouble(transa,m,n,k,alpha,deviceMat,x,beta,y) &
& result(res) bind(c,name='spmvDnsDeviceDouble')
use iso_c_binding
character(c_char), value :: transa
integer(c_int), value :: m, n, k
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
real(c_double) :: alpha, beta
end function spmvDnsDeviceDouble
function spmvDnsDeviceFloatComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) &
& result(res) bind(c,name='spmvDnsDeviceFloatComplex')
use iso_c_binding
character(c_char), value :: transa
integer(c_int), value :: m, n, k
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
complex(c_float_complex) :: alpha, beta
end function spmvDnsDeviceFloatComplex
function spmvDnsDeviceDoubleComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) &
& result(res) bind(c,name='spmvDnsDeviceDoubleComplex')
use iso_c_binding
character(c_char), value :: transa
integer(c_int), value :: m, n, k
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
complex(c_double_complex) :: alpha, beta
end function spmvDnsDeviceDoubleComplex
end interface
end module dnsdev_mod

@ -0,0 +1,301 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include <stdio.h>
#include <complex.h>
//#include "utils.h"
//#include "common.h"
#include "dvectordev.h"
int registerMappedDouble(void *buff, void **d_p, int n, double dummy)
{
return registerMappedMemory(buff,d_p,n*sizeof(double));
}
int writeMultiVecDeviceDouble(void* deviceVec, double* hostVec)
{ int i;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
// Ex updateFromHost vector function
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(double));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
}
return(i);
}
int writeMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld)
{ int i;
i = writeMultiVecDeviceDouble(deviceVec, (void *) hostVec);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleR2",i);
}
return(i);
}
int readMultiVecDeviceDouble(void* deviceVec, double* hostVec)
{ int i,j;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(double));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDouble",i);
}
return(i);
}
int readMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld)
{ int i;
i = readMultiVecDeviceDouble(deviceVec, hostVec);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleR2",i);
}
return(i);
}
int setscalMultiVecDeviceDouble(double val, int first, int last,
int indexBase, void* devMultiVecX)
{ int i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
spgpuHandle_t handle=psb_cudaGetHandle();
spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_);
return(i);
}
int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal,
int dupl, int indexBase, void* devMultiVecX)
{ int j=0, i=0,nmin=0,nmax=0;
int pitch = 0;
double beta;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecIrl->pitch_;
if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
return SPGPU_UNSUPPORTED;
//fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_);
if (dupl == INS_OVERWRITE)
beta = 0.0;
else if (dupl == INS_ADD)
beta = 1.0;
else
beta = 0.0;
spgpuDscat(handle, (double *) devVecX->v_, n, (double*)devVecVal->v_,
(int*)devVecIrl->v_, indexBase, beta);
return(i);
}
int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n,
int first, void* deviceIdx, int hfirst,
void* host_values, int indexBase)
{
int i, *idx;
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
i= igathMultiVecDeviceDouble(deviceVec, vectorId, n,
first, (void*) devIdx->v_, hfirst, host_values, indexBase);
return(i);
}
int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n,
int first, void* indexes, int hfirst, void* host_values, int indexBase)
{
int i, *idx =(int *) indexes;;
double *hv = (double *) host_values;;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_cudaGetHandle();
i=0;
hv = &(hv[hfirst-indexBase]);
idx = &(idx[first-indexBase]);
spgpuDgath(handle,hv, n, idx,indexBase, (double *) devVec->v_+vectorId*devVec->pitch_);
return(i);
}
int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx,
int hfirst, void* host_values, int indexBase, double beta)
{
int i, *idx;
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
i= iscatMultiVecDeviceDouble(deviceVec, vectorId, n, first,
(void*) devIdx->v_, hfirst,host_values, indexBase, beta);
return(i);
}
int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes,
int hfirst, void* host_values, int indexBase, double beta)
{ int i=0;
double *hv = (double *) host_values;
int *idx=(int *) indexes;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_cudaGetHandle();
idx = &(idx[first-indexBase]);
hv = &(hv[hfirst-indexBase]);
spgpuDscat(handle, (double *) devVec->v_, n, hv, idx, indexBase, beta);
return SPGPU_SUCCESS;
}
int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
// Note: inner kernel can handle aliased input/output
spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_,
alpha, (double *)devVecA->v_);
return(i);
}
int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
return(i);
}
int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
return(i);
}
int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
return(i);
}
int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devMultiVecB)
{int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
return(i);
}
int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
double beta, void* devMultiVecY)
{ int j=0, i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
for(j=0;j<devVecY->count_;j++)
spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta,
(double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j);
return(i);
}
int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB)
{ int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED;
spgpuDmaxy(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_,
(double*)devVecB->v_, devVecA->count_, devVecA->pitch_);
return(i);
}
int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
void *deviceVecB, double beta, void *deviceVecZ)
{ int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ ))
return SPGPU_UNSUPPORTED;
spgpuDmaxypbz(handle, (double*)devVecZ->v_, n, beta, (double*)devVecZ->v_,
alpha, (double*) devVecA->v_, (double*) devVecB->v_,
devVecB->count_, devVecB->pitch_);
return(i);
}
int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA,
void *deviceVecB)
{ int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED;
spgpuDabs(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_);
return(i);
}
int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA)
{ int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
spgpuHandle_t handle=psb_cudaGetHandle();
if (n > devVecA->size_)
return SPGPU_UNSUPPORTED;
spgpuDabs(handle, (double*)devVecA->v_, n, alpha, (double*)devVecA->v_);
return(i);
}

@ -0,0 +1,75 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#pragma once
//#include "utils.h"
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
int registerMappedDouble(void *, void **, int, double);
int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);
int writeMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld);
int readMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);
int readMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld);
int setscalMultiVecDeviceDouble(double val, int first, int last,
int indexBase, void* devVecX);
int geinsMultiVecDeviceDouble(int n, void* devVecIrl, void* devVecVal,
int dupl, int indexBase, void* devVecX);
int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n,
int first, void* deviceIdx, int hfirst,
void* host_values, int indexBase);
int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n,
int first, void* indexes, int hfirst, void* host_values,
int indexBase);
int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first,
void *deviceIdx, int hfirst, void* host_values,
int indexBase, double beta);
int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes,
int hfirst, void* host_values, int indexBase, double beta);
int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA);
int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devVecA);
int amaxMultiVecDeviceDouble(double* y_res, int n, void* devVecA);
int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA);
int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY);
int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB);
int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
void *deviceVecB, double beta, void *deviceVecZ);
int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA);
int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, void *deviceVecB);

@ -0,0 +1,686 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include <sys/time.h>
#include "elldev.h"
#define PASS_RS 0
EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize,
unsigned int nnzeros,
unsigned int columns, unsigned int elementType,
unsigned int firstIndex)
{
EllDeviceParams params;
if (elementType == SPGPU_TYPE_DOUBLE)
{
params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D;
}
else
{
params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S;
}
//For complex?
params.elementType = elementType;
params.rows = rows;
params.maxRowSize = maxRowSize;
params.avgRowSize = (nnzeros+rows-1)/rows;
params.columns = columns;
params.firstIndex = firstIndex;
//params.pitch = computeEllAllocPitch(rows);
return params;
}
//new
int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params)
{
struct EllDevice *tmp = (struct EllDevice *)malloc(sizeof(struct EllDevice));
*remoteMatrix = (void *)tmp;
tmp->rows = params->rows;
tmp->cMPitch = computeEllAllocPitch(tmp->rows);
tmp->rPPitch = tmp->cMPitch;
tmp->pitch= tmp->cMPitch;
tmp->maxRowSize = params->maxRowSize;
tmp->avgRowSize = params->avgRowSize;
tmp->allocsize = (int)tmp->maxRowSize * tmp->pitch;
//tmp->allocsize = (int)params->maxRowSize * tmp->cMPitch;
allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int));
allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int));
allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int));
tmp->columns = params->columns;
tmp->baseIndex = params->firstIndex;
tmp->dataType = params->elementType;
//fprintf(stderr,"allocEllDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize);
if (params->elementType == SPGPU_TYPE_FLOAT)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
else if (params->elementType == SPGPU_TYPE_DOUBLE)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
else
return SPGPU_UNSUPPORTED; // Unsupported params
//fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
return SPGPU_SUCCESS;
}
//new
void zeroEllDevice(void *remoteMatrix)
{
struct EllDevice *tmp = (struct EllDevice *) remoteMatrix;
if (tmp->dataType == SPGPU_TYPE_FLOAT)
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(float));
else if (tmp->dataType == SPGPU_TYPE_DOUBLE)
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(double));
else if (tmp->dataType == SPGPU_TYPE_COMPLEX_FLOAT)
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuFloatComplex));
else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE)
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex));
else
return SPGPU_UNSUPPORTED; // Unsupported params
//fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
return;
}
void freeEllDevice(void* remoteMatrix)
{
struct EllDevice *devMat = (struct EllDevice *) remoteMatrix;
//fprintf(stderr,"freeEllDevice\n");
if (devMat != NULL) {
freeRemoteBuffer(devMat->rS);
freeRemoteBuffer(devMat->rP);
freeRemoteBuffer(devMat->cM);
free(remoteMatrix);
}
}
//new
int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize,
unsigned int nnzeros,
unsigned int columns, unsigned int elementType,
unsigned int firstIndex)
{ int i;
EllDeviceParams p;
p = getEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex);
i = allocEllDevice(deviceMat, &p);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
}
return(i);
}
void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, int* rP, int* rS,
int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex)
{
int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
for (i=0; i<s; i++)
{
if (PASS_RS) {
spgpuSellspmv (handle, (float*) z, (float*)y, alpha, (float*) cM, rP, pitch, pitch, rS,
NULL, avgRowSize, maxRowSize, rows, (float*)x, beta, firstIndex);
} else {
spgpuSellspmv (handle, (float*) z, (float*)y, alpha, (float*) cM, rP, pitch, pitch, NULL,
NULL, avgRowSize, maxRowSize, rows, (float*)x, beta, firstIndex);
}
z += vPitch;
y += vPitch;
x += vPitch;
}
}
//new
int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
float beta, void* deviceY)
{ int i=SPGPU_SUCCESS;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
#ifdef VERBOSE
__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");
__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");
__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");
#endif
/*spgpuSellspmv (handle, (float*) y->v_, (float*)y->v_, alpha,
(float*) devMat->cM, devMat->rP, devMat->cMPitch,
devMat->rPPitch, devMat->rS, devMat->rows,
(float*)x->v_, beta, devMat->baseIndex);*/
sspmdmm_gpu ( (float *)y->v_,y->count_, y->pitch_, (float *)y->v_, alpha, (float *)devMat->cM, devMat->rP, devMat->rS,
devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch,
(float *)x->v_, beta, devMat->baseIndex);
return(i);
}
void
dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, int* rP,
int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
double *x, double beta, int firstIndex)
{
int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
for (i=0; i<s; i++)
{
if (PASS_RS) {
spgpuDellspmv (handle, (double*) z, (double*)y, alpha, (double*) cM, rP,
pitch, pitch, rS,
NULL, avgRowSize, maxRowSize, rows, (double*)x, beta, firstIndex);
} else {
spgpuDellspmv (handle, (double*) z, (double*)y, alpha, (double*) cM, rP,
pitch, pitch, NULL,
NULL, avgRowSize, maxRowSize, rows, (double*)x, beta, firstIndex);
}
z += vPitch;
y += vPitch;
x += vPitch;
}
}
//new
int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
double beta, void* deviceY)
{
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
/*spgpuDellspmv (handle, (double*) y->v_, (double*)y->v_, alpha, (double*) devMat->cM, devMat->rP, devMat->cMPitch, devMat->rPPitch, devMat->rS, devMat->rows, (double*)x->v_, beta, devMat->baseIndex);*/
/* fprintf(stderr,"From spmvEllDouble: mat %d %d %d %d y %d %d \n", */
/* devMat->avgRowSize, devMat->maxRowSize, devMat->rows, */
/* devMat->pitch, y->count_, y->pitch_); */
dspmdmm_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_,
alpha, (double *)devMat->cM,
devMat->rP, devMat->rS, devMat->avgRowSize,
devMat->maxRowSize, devMat->rows, devMat->pitch,
(double *)x->v_, beta, devMat->baseIndex);
return SPGPU_SUCCESS;
}
void
cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y,
cuFloatComplex alpha, cuFloatComplex* cM,
int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
cuFloatComplex *x, cuFloatComplex beta, int firstIndex)
{
int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
for (i=0; i<s; i++)
{
if (PASS_RS) {
spgpuCellspmv (handle, (cuFloatComplex *) z, (cuFloatComplex *)y, alpha, (cuFloatComplex *) cM, rP,
pitch, pitch, rS, NULL, avgRowSize, maxRowSize, rows, (cuFloatComplex *) x, beta, firstIndex);
} else {
spgpuCellspmv (handle, (cuFloatComplex *) z, (cuFloatComplex *)y, alpha, (cuFloatComplex *) cM, rP,
pitch, pitch, NULL, NULL, avgRowSize, maxRowSize, rows, (cuFloatComplex *) x, beta, firstIndex);
}
z += vPitch;
y += vPitch;
x += vPitch;
}
}
int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
float complex beta, void* deviceY)
{
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta));
cspmdmm_gpu ((cuFloatComplex *)y->v_, y->count_, y->pitch_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM,
devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch,
(cuFloatComplex *)x->v_, b, devMat->baseIndex);
return SPGPU_SUCCESS;
}
void
zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* cM,
int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex)
{
int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
for (i=0; i<s; i++)
{
if (PASS_RS) {
spgpuZellspmv (handle, (cuDoubleComplex *) z, (cuDoubleComplex *)y, alpha, (cuDoubleComplex *) cM, rP,
pitch, pitch, rS, NULL, avgRowSize, maxRowSize, rows, (cuDoubleComplex *) x, beta, firstIndex);
} else {
spgpuZellspmv (handle, (cuDoubleComplex *) z, (cuDoubleComplex *)y, alpha, (cuDoubleComplex *) cM, rP,
pitch, pitch, NULL, NULL, avgRowSize, maxRowSize, rows, (cuDoubleComplex *) x, beta, firstIndex);
}
z += vPitch;
y += vPitch;
x += vPitch;
}
}
int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
double complex beta, void* deviceY)
{
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta));
zspmdmm_gpu ((cuDoubleComplex *)y->v_, y->count_, y->pitch_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM,
devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows,
devMat->pitch, (cuDoubleComplex *)x->v_, b, devMat->baseIndex);
return SPGPU_SUCCESS;
}
int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
//i = writeEllDevice(deviceMat, (void *) val, ja, irn);
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
}*/
return SPGPU_SUCCESS;
}
int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
}
return SPGPU_SUCCESS;
}
int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
/*i = readEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
}*/
return SPGPU_SUCCESS;
}
int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int getEllDevicePitch(void* deviceMat)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
i = devMat->pitch; //old
//i = getPitchEllDevice(deviceMat);
return(i);
}
int getEllDeviceMaxRowSize(void* deviceMat)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
i = devMat->maxRowSize;
return(i);
}
// New copying interface
int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
int *idisp, int *ja, float *val, void *deviceMat)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
float *devVal;
int *devIdisp, *devJa;
spgpuHandle_t handle;
handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float));
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
(int *) devMat->rS,devIdisp,devJa,devVal,
(int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM);
// Ex updateFromHost function
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);
freeRemoteBuffer(devVal);
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
}
return SPGPU_SUCCESS;
}
int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
int *idisp, int *ja, double *val, void *deviceMat)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
double *devVal;
int *devIdisp, *devJa;
spgpuHandle_t handle;
handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double));
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
(int *) devMat->rS,devIdisp,devJa,devVal,
(int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM);
// Ex updateFromHost function
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);
freeRemoteBuffer(devVal);
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
}
return SPGPU_SUCCESS;
}
int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
int *idisp, int *ja, float complex *val, void *deviceMat)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
float complex *devVal;
int *devIdisp, *devJa;
spgpuHandle_t handle;
handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex));
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
(int *) devMat->rS,devIdisp,devJa,devVal,
(int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM);
// Ex updateFromHost function
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float complex));
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);
freeRemoteBuffer(devVal);
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloatComplex",i);
}
return SPGPU_SUCCESS;
}
int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
int *idisp, int *ja, double complex *val, void *deviceMat)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
double complex *devVal;
int *devIdisp, *devJa;
spgpuHandle_t handle;
handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex));
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
(int *) devMat->rS,devIdisp,devJa,devVal,
(int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM);
// Ex updateFromHost function
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double complex));
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);
freeRemoteBuffer(devVal);
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDoubleComplex",i);
}
return SPGPU_SUCCESS;
}
int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *val)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
float alpha=1.0;
spgpuHandle_t handle=psb_cudaGetHandle();
if (nnz <=0) return SPGPU_SUCCESS;
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
spgpuSellcsput(handle,alpha,(float *) devMat->cM,
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
nnz, devIa->v_, devJa->v_, (float *) devVal->v_, 1);
return SPGPU_SUCCESS;
}
int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void *val)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
double alpha=1.0;
spgpuHandle_t handle=psb_cudaGetHandle();
if (nnz <=0) return SPGPU_SUCCESS;
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
spgpuDellcsput(handle,alpha,(double *) devMat->cM,
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
nnz, devIa->v_, devJa->v_, (double *) devVal->v_, 1);
return SPGPU_SUCCESS;
}
int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz,
void *ia, void *ja, void *val)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
cuFloatComplex alpha = make_cuFloatComplex(1.0, 0.0);
spgpuHandle_t handle=psb_cudaGetHandle();
if (nnz <=0) return SPGPU_SUCCESS;
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
spgpuCellcsput(handle,alpha,(cuFloatComplex *) devMat->cM,
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
nnz, devIa->v_, devJa->v_, (cuFloatComplex *) devVal->v_, 1);
return SPGPU_SUCCESS;
}
int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz,
void *ia, void *ja, void *val)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
spgpuHandle_t handle=psb_cudaGetHandle();
if (nnz <=0) return SPGPU_SUCCESS;
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
spgpuZellcsput(handle,alpha,(cuDoubleComplex *) devMat->cM,
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
nnz, devIa->v_, devJa->v_, (cuDoubleComplex *) devVal->v_, 1);
return SPGPU_SUCCESS;
}

@ -0,0 +1,177 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#ifndef _ELLDEV_H_
#define _ELLDEV_H_
#include "cintrf.h"
#include "cuComplex.h"
#include "ell.h"
struct EllDevice
{
// Compressed matrix
void *cM; //it can be float or double
// row pointers (same size of cM)
int *rP;
int *diag;
// row size
int *rS;
//matrix size (uncompressed)
int rows;
int columns;
int pitch; //old
int cMPitch;
int rPPitch;
int maxRowSize;
int avgRowSize;
//allocation size (in elements)
int allocsize;
/*(i.e. 0 for C, 1 for Fortran)*/
int baseIndex;
/* real/complex, single/double */
int dataType;
};
typedef struct EllDeviceParams
{
// The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType)
unsigned int elementType;
// Pitch (in number of elements)
unsigned int pitch;
// Number of rows.
// Used to allocate rS array
unsigned int rows;
// Number of columns.
// Used for error-checking
unsigned int columns;
// Largest row size
unsigned int maxRowSize;
unsigned int avgRowSize;
// First index (e.g 0 or 1)
unsigned int firstIndex;
} EllDeviceParams;
int FallocEllDevice(void** deviceMat, unsigned int rows, unsigned int maxRowSize,
unsigned int nnzeros,
unsigned int columns, unsigned int elementType,
unsigned int firstIndex);
int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params);
void freeEllDevice(void* remoteMatrix);
int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag);
int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag);
int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag);
int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag);
int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag);
int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag);
int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag);
int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag);
int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
float beta, void* deviceY);
int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
double beta, void* deviceY);
int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
float complex beta, void* deviceY);
int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
double complex beta, void* deviceY);
int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
int *idisp, int *ja, float *val, void *deviceMat);
int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
int *idisp, int *ja, double *val, void *deviceMat);
int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
int *idisp, int *ja, float complex *val, void *deviceMat);
int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
int *idisp, int *ja, double complex *val, void *deviceMat);
void psi_cuda_s_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
int hacksz, int ldv, int nzm,
int *rS,int *devIdisp, int *devJa, float *devVal,
int *idiag, int *rP, float *cM);
void psi_cuda_d_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
int hacksz, int ldv, int nzm,
int *rS,int *devIdisp, int *devJa, double *devVal,
int *idiag, int *rP, double *cM);
void psi_cuda_c_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
int hacksz, int ldv, int nzm,
int *rS,int *devIdisp, int *devJa, float complex *devVal,
int *idiag, int *rP, float complex *cM);
void psi_cuda_z_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
int hacksz, int ldv, int nzm,
int *rS,int *devIdisp, int *devJa, double complex *devVal,
int *idiag, int *rP, double complex *cM);
int dev_csputEllDeviceFloat(void* deviceMat, int nnz,
void *ia, void *ja, void *val);
int dev_csputEllDeviceDouble(void* deviceMat, int nnz,
void *ia, void *ja, void *val);
int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz,
void *ia, void *ja, void *val);
int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz,
void *ia, void *ja, void *val);
void zeroEllDevice(void* deviceMat);
int getEllDevicePitch(void* deviceMat);
// sparse Ell matrix-vector product
//int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY);
//int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY);
#endif

@ -0,0 +1,321 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module elldev_mod
use iso_c_binding
use core_mod
type, bind(c) :: elldev_parms
integer(c_int) :: element_type
integer(c_int) :: pitch
integer(c_int) :: rows
integer(c_int) :: columns
integer(c_int) :: maxRowSize
integer(c_int) :: avgRowSize
integer(c_int) :: firstIndex
end type elldev_parms
interface
function FgetEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex) &
& result(res) bind(c,name='getEllDeviceParams')
use iso_c_binding
import :: elldev_parms
type(elldev_parms) :: res
integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex
end function FgetEllDeviceParams
end interface
interface
function FallocEllDevice(deviceMat,rows,maxRowSize,nnzeros,columns,&
& elementType,firstIndex) &
& result(res) bind(c,name='FallocEllDevice')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex
type(c_ptr) :: deviceMat
end function FallocEllDevice
end interface
interface writeEllDevice
function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) &
& result(res) bind(c,name='writeEllDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: ldj
real(c_float) :: val(ldj,*)
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
end function writeEllDeviceFloat
function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) &
& result(res) bind(c,name='writeEllDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: ldj
real(c_double) :: val(ldj,*)
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
end function writeEllDeviceDouble
function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) &
& result(res) bind(c,name='writeEllDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: ldj
complex(c_float_complex) :: val(ldj,*)
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
end function writeEllDeviceFloatComplex
function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) &
& result(res) bind(c,name='writeEllDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: ldj
complex(c_double_complex) :: val(ldj,*)
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
end function writeEllDeviceDoubleComplex
end interface
interface readEllDevice
function readEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) &
& result(res) bind(c,name='readEllDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: ldj
real(c_float) :: val(ldj,*)
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
end function readEllDeviceFloat
function readEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) &
& result(res) bind(c,name='readEllDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: ldj
real(c_double) :: val(ldj,*)
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
end function readEllDeviceDouble
function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) &
& result(res) bind(c,name='readEllDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: ldj
complex(c_float_complex) :: val(ldj,*)
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
end function readEllDeviceFloatComplex
function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) &
& result(res) bind(c,name='readEllDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int), value :: ldj
complex(c_double_complex) :: val(ldj,*)
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
end function readEllDeviceDoubleComplex
end interface
interface
subroutine freeEllDevice(deviceMat) &
& bind(c,name='freeEllDevice')
use iso_c_binding
type(c_ptr), value :: deviceMat
end subroutine freeEllDevice
end interface
interface
subroutine zeroEllDevice(deviceMat) &
& bind(c,name='zeroEllDevice')
use iso_c_binding
type(c_ptr), value :: deviceMat
end subroutine zeroEllDevice
end interface
interface
subroutine resetEllTimer() bind(c,name='resetEllTimer')
use iso_c_binding
end subroutine resetEllTimer
end interface
interface
function getEllTimer() &
& bind(c,name='getEllTimer') result(res)
use iso_c_binding
real(c_double) :: res
end function getEllTimer
end interface
interface
function getEllDevicePitch(deviceMat) &
& bind(c,name='getEllDevicePitch') result(res)
use iso_c_binding
type(c_ptr), value :: deviceMat
integer(c_int) :: res
end function getEllDevicePitch
end interface
interface
function getEllDeviceMaxRowSize(deviceMat) &
& bind(c,name='getEllDeviceMaxRowSize') result(res)
use iso_c_binding
type(c_ptr), value :: deviceMat
integer(c_int) :: res
end function getEllDeviceMaxRowSize
end interface
interface psi_CopyCooToElg
function psiCopyCooToElgFloat(nr, nc, nza, hacksz, ldv, nzm, irn, &
& idisp, ja, val, deviceMat) &
& result(res) bind(c,name='psiCopyCooToElgFloat')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
type(c_ptr), value :: deviceMat
real(c_float) :: val(*)
integer(c_int) :: irn(*),idisp(*),ja(*)
end function psiCopyCooToElgFloat
function psiCopyCooToElgDouble(nr, nc, nza, hacksz, ldv, nzm, irn, &
& idisp, ja, val, deviceMat) &
& result(res) bind(c,name='psiCopyCooToElgDouble')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
type(c_ptr), value :: deviceMat
real(c_double) :: val(*)
integer(c_int) :: irn(*),idisp(*),ja(*)
end function psiCopyCooToElgDouble
function psiCopyCooToElgFloatComplex(nr, nc, nza, hacksz, ldv, nzm, irn, &
& idisp, ja, val, deviceMat) &
& result(res) bind(c,name='psiCopyCooToElgFloatComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
type(c_ptr), value :: deviceMat
complex(c_float_complex) :: val(*)
integer(c_int) :: irn(*),idisp(*),ja(*)
end function psiCopyCooToElgFloatComplex
function psiCopyCooToElgDoubleComplex(nr, nc, nza, hacksz, ldv, nzm, irn, &
& idisp, ja, val, deviceMat) &
& result(res) bind(c,name='psiCopyCooToElgDoubleComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
type(c_ptr), value :: deviceMat
complex(c_double_complex) :: val(*)
integer(c_int) :: irn(*),idisp(*),ja(*)
end function psiCopyCooToElgDoubleComplex
end interface
interface csputEllDeviceFloat
function dev_csputEllDeviceFloat(deviceMat, nnz, ia, ja, val) &
& result(res) bind(c,name='dev_csputEllDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat , ia, ja, val
integer(c_int), value :: nnz
end function dev_csputEllDeviceFloat
end interface
interface csputEllDeviceDouble
function dev_csputEllDeviceDouble(deviceMat, nnz, ia, ja, val) &
& result(res) bind(c,name='dev_csputEllDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat , ia, ja, val
integer(c_int), value :: nnz
end function dev_csputEllDeviceDouble
end interface
interface csputEllDeviceFloatComplex
function dev_csputEllDeviceFloatComplex(deviceMat, nnz, ia, ja, val) &
& result(res) bind(c,name='dev_csputEllDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat , ia, ja, val
integer(c_int), value :: nnz
end function dev_csputEllDeviceFloatComplex
end interface
interface csputEllDeviceDoubleComplex
function dev_csputEllDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) &
& result(res) bind(c,name='dev_csputEllDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat , ia, ja, val
integer(c_int), value :: nnz
end function dev_csputEllDeviceDoubleComplex
end interface
interface spmvEllDevice
function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvEllDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
real(c_float),value :: alpha, beta
end function spmvEllDeviceFloat
function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvEllDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
real(c_double),value :: alpha, beta
end function spmvEllDeviceDouble
function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvEllDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
complex(c_float_complex),value :: alpha, beta
end function spmvEllDeviceFloatComplex
function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvEllDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
complex(c_double_complex),value :: alpha, beta
end function spmvEllDeviceDoubleComplex
end interface
end module elldev_mod

@ -0,0 +1,76 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include "cintrf.h"
#include "fcusparse.h"
static cusparseHandle_t *cusparse_handle=NULL;
void setHandle(cusparseHandle_t);
int FcusparseCreate()
{
int ret=CUSPARSE_STATUS_SUCCESS;
cusparseHandle_t *handle;
if (cusparse_handle == NULL) {
if ((handle = (cusparseHandle_t *)malloc(sizeof(cusparseHandle_t)))==NULL)
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
ret = (int)cusparseCreate(handle);
if (ret == CUSPARSE_STATUS_SUCCESS)
cusparse_handle = handle;
}
fprintf(stderr,"Created cusparses_handle\n");
return (ret);
}
int FcusparseDestroy()
{
int val;
if (cusparse_handle!=NULL){
val = (int) cusparseDestroy(*cusparse_handle);
free(cusparse_handle);
}
cusparse_handle=NULL;
return(val);
}
cusparseHandle_t *getHandle()
{
if (cusparse_handle == NULL)
FcusparseCreate();
return(cusparse_handle);
}

@ -0,0 +1,68 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#ifndef FCUSPARSE_
#define FCUSPARSE_
#include <cuda_runtime.h>
#if CUDA_SHORT_VERSION <= 10
#include <cusparse_v2.h>
#else
#include <cusparse.h>
#endif
#include "cintrf.h"
int FcusparseCreate();
int FcusparseDestroy();
cusparseHandle_t *getHandle();
#define CHECK_CUDA(func) \
{ \
cudaError_t status = (func); \
if (status != cudaSuccess) { \
printf("CUDA API failed at line %d with error: %s (%d)\n", \
__LINE__, cudaGetErrorString(status), status); \
return EXIT_FAILURE; \
} \
}
#define CHECK_CUSPARSE(func) \
{ \
cusparseStatus_t status = (func); \
if (status != CUSPARSE_STATUS_SUCCESS) { \
printf("CUSPARSE API failed at line %d with error: %s (%d)\n", \
__LINE__, cusparseGetErrorString(status), status); \
return EXIT_FAILURE; \
} \
}
#endif

@ -0,0 +1,824 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
typedef struct T_CSRGDeviceMat
{
#if CUDA_SHORT_VERSION <= 10
cusparseMatDescr_t descr;
cusparseSolveAnalysisInfo_t triang;
#elif CUDA_VERSION < 11030
cusparseMatDescr_t descr;
csrsv2Info_t triang;
size_t mvbsize, svbsize;
void *mvbuffer, *svbuffer;
#else
cusparseSpMatDescr_t *spmvDescr;
cusparseSpSVDescr_t *spsvDescr;
size_t mvbsize, svbsize;
void *mvbuffer, *svbuffer;
#endif
int m, n, nz;
TYPE *val;
int *irp;
int *ja;
} T_CSRGDeviceMat;
/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */
typedef struct T_Cmat
{
T_CSRGDeviceMat *mat;
} T_Cmat;
#if CUDA_SHORT_VERSION <= 10
typedef struct T_HYBGDeviceMat
{
cusparseMatDescr_t descr;
cusparseSolveAnalysisInfo_t triang;
cusparseHybMat_t hybA;
int m, n, nz;
TYPE *val;
int *irp;
int *ja;
} T_HYBGDeviceMat;
/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */
typedef struct T_Hmat
{
T_HYBGDeviceMat *mat;
} T_Hmat;
#endif
int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX,
TYPE beta, void *deviceY);
int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX,
TYPE beta, void *deviceY);
int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz);
int T_CSRGDeviceFree(T_Cmat *Mat);
int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz,
int *irp, int *ja, TYPE *val);
int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz,
int *irp, int *ja, TYPE *val);
int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz);
#if CUDA_SHORT_VERSION <= 10
int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type);
int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat);
#elif CUDA_VERSION < 11030
int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type);
#else
int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat);
int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat);
int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat);
int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat);
#endif
#if CUDA_SHORT_VERSION <= 10
int T_HYBGDeviceFree(T_Hmat *Matrix);
int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
TYPE beta, void *deviceY);
int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz);
int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type);
int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type);
int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type);
int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type);
int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix);
int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
TYPE beta, void *deviceY);
int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz,
int *irp, int *ja, TYPE *val);
#endif
int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
TYPE beta, void *deviceY)
{
T_CSRGDeviceMat *cMat=Matrix->mat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
void *vX, *vY;
int r,n;
cusparseHandle_t *my_handle=getHandle();
TYPE ealpha=alpha, ebeta=beta;
#if CUDA_SHORT_VERSION <= 10
/* getAddrMultiVecDevice(deviceX, &vX); */
/* getAddrMultiVecDevice(deviceY, &vY); */
vX=x->v_;
vY=y->v_;
CHECK_CUSPARSE(cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr,
cMat->val, cMat->irp, cMat->ja,
(const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY));
#elif CUDA_VERSION < 11030
size_t bfsz;
vX=x->v_;
vY=y->v_;
#if 1
CHECK_CUSPARSE(cusparseCsrmvEx_bufferSize(*my_handle,CUSPARSE_ALG_MERGE_PATH,
CUSPARSE_OPERATION_NON_TRANSPOSE,
cMat->m,cMat->n,cMat->nz,
(const void *) &ealpha,CUSPARSE_BASE_TYPE,
cMat->descr,
(const void *) cMat->val,
CUSPARSE_BASE_TYPE,
(const int *) cMat->irp,
(const int *) cMat->ja,
(const void *) vX, CUSPARSE_BASE_TYPE,
(const void *) &ebeta, CUSPARSE_BASE_TYPE,
(void *) vY, CUSPARSE_BASE_TYPE,
CUSPARSE_BASE_TYPE, &bfsz));
#else
bfsz=cMat->nz;
#endif
if (bfsz > cMat->mvbsize) {
if (cMat->mvbuffer != NULL) {
CHECK_CUDA(cudaFree(cMat->mvbuffer));
cMat->mvbuffer = NULL;
}
CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz));
cMat->mvbsize = bfsz;
}
CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle,
CUSPARSE_ALG_MERGE_PATH,
CUSPARSE_OPERATION_NON_TRANSPOSE,
cMat->m,cMat->n,cMat->nz,
(const void *) &ealpha,CUSPARSE_BASE_TYPE,
cMat->descr,
(const void *) cMat->val, CUSPARSE_BASE_TYPE,
(const int *) cMat->irp, (const int *) cMat->ja,
(const void *) vX, CUSPARSE_BASE_TYPE,
(const void *) &ebeta, CUSPARSE_BASE_TYPE,
(void *) vY, CUSPARSE_BASE_TYPE,
CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer));
#else
cusparseDnVecDescr_t vecX, vecY;
size_t bfsz;
if (T_CSRGIsNullMvDescr(cMat)) {
cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *));
}
T_CSRGCreateSpMVDescr(cMat);
vX=x->v_;
vY=y->v_;
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
&bfsz));
if (bfsz > cMat->mvbsize) {
if (cMat->mvbuffer != NULL) {
CHECK_CUDA(cudaFree(cMat->mvbuffer));
cMat->mvbuffer = NULL;
}
CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz));
cMat->mvbsize = bfsz;
}
CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
cMat->mvbuffer));
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr)));
#endif
}
int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
TYPE beta, void *deviceY)
{
T_CSRGDeviceMat *cMat=Matrix->mat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
void *vX, *vY;
int r,n;
cusparseHandle_t *my_handle=getHandle();
#if CUDA_SHORT_VERSION <= 10
vX=x->v_;
vY=y->v_;
return cusparseTcsrsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
cMat->m,(const TYPE *) &alpha,cMat->descr,
cMat->val, cMat->irp, cMat->ja, cMat->triang,
(const TYPE *) vX, (TYPE *) vY);
#elif CUDA_VERSION < 11030
vX=x->v_;
vY=y->v_;
CHECK_CUSPARSE(cusparseTcsrsv2_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
cMat->m,cMat->nz,
(const TYPE *) &alpha,
cMat->descr,
cMat->val, cMat->irp, cMat->ja,
cMat->triang,
(const TYPE *) vX, (TYPE *) vY,
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
(void *) cMat->svbuffer));
#else
cusparseDnVecDescr_t vecX, vecY;
size_t bfsz;
vX=x->v_;
vY=y->v_;
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
if (T_CSRGIsNullMvDescr(cMat)) {
cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *));
}
T_CSRGCreateSpMVDescr(cMat);
// fprintf(stderr,"Entry to SpSVDevice: %d %p\n",
// T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr);
if (T_CSRGIsNullSvDescr(cMat)) {
cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *));
cMat->svbsize=0;
CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) );
//fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n",
// T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize);
CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,*(cMat->spmvDescr),vecX,vecY,
CUSPARSE_BASE_TYPE,
CUSPARSE_SPSV_ALG_DEFAULT,
*(cMat->spsvDescr),
&bfsz));
if (bfsz > cMat->svbsize) {
if (cMat->svbuffer != NULL) {
CHECK_CUDA(cudaFree(cMat->svbuffer));
cMat->svbuffer = NULL;
}
CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz));
cMat->svbsize=bfsz;
CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,
*(cMat->spmvDescr),
vecX, vecY,
CUSPARSE_BASE_TYPE,
CUSPARSE_SPSV_ALG_DEFAULT,
*(cMat->spsvDescr),
cMat->svbuffer));
}
if (T_CSRGIsNullSvBuffer(cMat)) {
fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n");
}
}
CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,*(cMat->spmvDescr),vecX,vecY,
CUSPARSE_BASE_TYPE,
CUSPARSE_SPSV_ALG_DEFAULT,
*(cMat->spsvDescr)));
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr)));
#endif
}
#if CUDA_VERSION >= 11030
T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat)
{
int64_t tr,tc,tz;
tr = cMat->m;
tc = cMat->n;
tz = cMat->nz;
CHECK_CUSPARSE(cusparseCreateCsr(cMat->spmvDescr,
tr,tc,tz,
(void *) cMat->irp,
(void *) cMat->ja,
(void *) cMat->val,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE,
CUSPARSE_BASE_TYPE) );
}
#endif
int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz)
{
T_CSRGDeviceMat *cMat;
int nr1=nr, nz1=nz, rc;
cusparseHandle_t *my_handle=getHandle();
int bfsz;
if ((nr<0)||(nc<0)||(nz<0))
return((int) CUSPARSE_STATUS_INVALID_VALUE);
if ((cMat=(T_CSRGDeviceMat *) malloc(sizeof(T_CSRGDeviceMat)))==NULL)
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
cMat->m = nr;
cMat->n = nc;
cMat->nz = nz;
if (nr1 == 0) nr1 = 1;
if (nz1 == 0) nz1 = 1;
if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), ((nr1+1)*sizeof(int)))) != 0)
return(rc);
if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), ((nz1)*sizeof(int)))) != 0)
return(rc);
if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), ((nz1)*sizeof(TYPE)))) != 0)
return(rc);
#if CUDA_SHORT_VERSION <= 10
if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0)
return(rc);
if ((rc= cusparseCreateSolveAnalysisInfo(&(cMat->triang))) !=0)
return(rc);
#elif CUDA_VERSION < 11030
if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0)
return(rc);
CHECK_CUSPARSE(cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL));
CHECK_CUSPARSE(cusparseSetMatDiagType(cMat->descr,CUSPARSE_DIAG_TYPE_NON_UNIT));
CHECK_CUSPARSE(cusparseSetMatIndexBase(cMat->descr,CUSPARSE_INDEX_BASE_ONE));
CHECK_CUSPARSE(cusparseCreateCsrsv2Info(&(cMat->triang)));
if (cMat->nz > 0) {
CHECK_CUSPARSE(cusparseTcsrsv2_bufferSize(*my_handle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
cMat->m,cMat->nz, cMat->descr,
cMat->val, cMat->irp, cMat->ja,
cMat->triang, &bfsz));
} else {
bfsz = 0;
}
/* if (cMat->svbuffer != NULL) { */
/* fprintf(stderr,"Calling cudaFree\n"); */
/* CHECK_CUDA(cudaFree(cMat->svbuffer)); */
/* cMat->svbuffer = NULL; */
/* } */
if (bfsz > 0) {
CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz));
} else {
cMat->svbuffer=NULL;
}
cMat->svbsize=bfsz;
cMat->mvbuffer=NULL;
cMat->mvbsize = 0;
#else
cMat->spmvDescr=NULL;
cMat->spsvDescr=NULL;
cMat->mvbuffer=NULL;
cMat->svbuffer=NULL;
cMat->mvbsize=0;
cMat->svbsize=0;
#endif
Matrix->mat = cMat;
return(CUSPARSE_STATUS_SUCCESS);
}
int T_CSRGDeviceFree(T_Cmat *Matrix)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
if (cMat!=NULL) {
freeRemoteBuffer(cMat->irp);
freeRemoteBuffer(cMat->ja);
freeRemoteBuffer(cMat->val);
#if CUDA_SHORT_VERSION <= 10
cusparseDestroyMatDescr(cMat->descr);
cusparseDestroySolveAnalysisInfo(cMat->triang);
#elif CUDA_VERSION < 11030
cusparseDestroyMatDescr(cMat->descr);
cusparseDestroyCsrsv2Info(cMat->triang);
#else
if (!T_CSRGIsNullMvDescr(cMat)) {
// already destroyed spmvDescr, just free the pointer
free(cMat->spmvDescr);
cMat->spmvDescr=NULL;
}
if (cMat->mvbuffer!=NULL)
CHECK_CUDA( cudaFree(cMat->mvbuffer));
cMat->mvbuffer=NULL;
cMat->mvbsize=0;
if (!T_CSRGIsNullSvDescr(cMat)) {
CHECK_CUSPARSE(cusparseSpSV_destroyDescr(*(cMat->spsvDescr)));
free(cMat->spsvDescr);
cMat->spsvDescr=NULL;
}
if (cMat->svbuffer!=NULL)
CHECK_CUDA( cudaFree(cMat->svbuffer));
cMat->svbuffer=NULL;
cMat->svbsize=0;
#endif
free(cMat);
Matrix->mat = NULL;
}
return(CUSPARSE_STATUS_SUCCESS);
}
int T_CSRGDeviceGetParms(T_Cmat *Matrix,int *nr, int *nc, int *nz)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
if (cMat!=NULL) {
*nr = cMat->m ;
*nc = cMat->n ;
*nz = cMat->nz ;
return(CUSPARSE_STATUS_SUCCESS);
} else {
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
}
}
#if CUDA_SHORT_VERSION <= 10
int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
return ((int) cusparseSetMatType(cMat->descr,type));
}
int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
return ((int) cusparseSetMatFillMode(cMat->descr,type));
}
int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
return ((int) cusparseSetMatDiagType(cMat->descr,type));
}
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
return ((int) cusparseSetMatIndexBase(cMat->descr,type));
}
int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Matrix)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
int rc, buffersize;
cusparseHandle_t *my_handle=getHandle();
cusparseSolveAnalysisInfo_t info;
rc= (int) cusparseTcsrsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
cMat->m,cMat->nz,cMat->descr,
cMat->val, cMat->irp, cMat->ja,
cMat->triang);
if (rc !=0) {
fprintf(stderr,"From csrsv_analysis: %d\n",rc);
}
return(rc);
}
#elif CUDA_VERSION < 11030
int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
return ((int) cusparseSetMatType(cMat->descr,type));
}
int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
return ((int) cusparseSetMatFillMode(cMat->descr,type));
}
int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
return ((int) cusparseSetMatDiagType(cMat->descr,type));
}
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
return ((int) cusparseSetMatIndexBase(cMat->descr,type));
}
#else
int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseFillMode_t mode=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
CUSPARSE_SPMAT_FILL_MODE,
(const void*) &mode,
sizeof(cusparseFillMode_t)));
return(0);
}
int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseDiagType_t cutype=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
CUSPARSE_SPMAT_DIAG_TYPE,
(const void*) &cutype,
sizeof(cusparseDiagType_t)));
return(0);
}
int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat)
{
return(cMat->spmvDescr == NULL);
}
int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat)
{
return(cMat->svbuffer == NULL);
}
int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat)
{
return(cMat->spsvDescr == NULL);
}
#endif
int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz,
int *irp, int *ja, TYPE *val)
{
int rc;
T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseHandle_t *my_handle=getHandle();
if ((rc=writeRemoteBuffer((void *) irp, (void *) cMat->irp,
(m+1)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=writeRemoteBuffer((void *) ja,(void *) cMat->ja,
(nz)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=writeRemoteBuffer((void *) val, (void *) cMat->val,
(nz)*sizeof(TYPE)))
!= SPGPU_SUCCESS)
return(rc);
#if (CUDA_SHORT_VERSION > 10 ) && (CUDA_VERSION < 11030)
if (cusparseGetMatType(cMat->descr)== CUSPARSE_MATRIX_TYPE_TRIANGULAR) {
// Why do we need to set TYPE_GENERAL??? cuSPARSE can be misterious sometimes.
cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL);
CHECK_CUSPARSE(cusparseTcsrsv2_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
cMat->m,cMat->nz, cMat->descr,
cMat->val, cMat->irp, cMat->ja,
cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL,
cMat->svbuffer));
}
#else
//cusparseSetMatType(*(cMat->spmvDescr),CUSPARSE_MATRIX_TYPE_GENERAL);
#endif
return(CUSPARSE_STATUS_SUCCESS);
}
int T_CSRGDevice2Host(T_Cmat *Matrix, int m, int n, int nz,
int *irp, int *ja, TYPE *val)
{
int rc;
T_CSRGDeviceMat *cMat = Matrix->mat;
if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, (m+1)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, (nz)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, (nz)*sizeof(TYPE)))
!= SPGPU_SUCCESS)
return(rc);
return(CUSPARSE_STATUS_SUCCESS);
}
#if CUDA_SHORT_VERSION <= 10
int T_HYBGDeviceFree(T_Hmat *Matrix)
{
T_HYBGDeviceMat *hMat= Matrix->mat;
if (hMat != NULL) {
cusparseDestroyMatDescr(hMat->descr);
cusparseDestroySolveAnalysisInfo(hMat->triang);
cusparseDestroyHybMat(hMat->hybA);
free(hMat);
}
Matrix->mat = NULL;
return(CUSPARSE_STATUS_SUCCESS);
}
int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
TYPE beta, void *deviceY)
{
T_HYBGDeviceMat *hMat=Matrix->mat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
void *vX, *vY;
int r,n,rc;
cusparseMatrixType_t type;
cusparseHandle_t *my_handle=getHandle();
/*getAddrMultiVecDevice(deviceX, &vX);
getAddrMultiVecDevice(deviceY, &vY); */
vX=x->v_;
vY=y->v_;
/* rc = (int) cusparseGetMatType(hMat->descr); */
/* fprintf(stderr,"Spmv MatType: %d\n",rc); */
/* rc = (int) cusparseGetMatDiagType(hMat->descr); */
/* fprintf(stderr,"Spmv DiagType: %d\n",rc); */
/* rc = (int) cusparseGetMatFillMode(hMat->descr); */
/* fprintf(stderr,"Spmv FillMode: %d\n",rc); */
/* Dirty trick: apparently hybmv does not accept a triangular
matrix even though it should not make a difference. So
we claim it's general anyway */
type = cusparseGetMatType(hMat->descr);
rc = cusparseSetMatType(hMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL);
if (rc == 0)
rc = (int) cusparseThybmv(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
(const TYPE *) &alpha, hMat->descr, hMat->hybA,
(const TYPE *) vX, (const TYPE *) &beta,
(TYPE *) vY);
if (rc == 0)
rc = cusparseSetMatType(hMat->descr,type);
return(rc);
}
int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz)
{
T_HYBGDeviceMat *hMat;
int nr1=nr, nz1=nz, rc;
if ((nr<0)||(nc<0)||(nz<0))
return((int) CUSPARSE_STATUS_INVALID_VALUE);
if ((hMat=(T_HYBGDeviceMat *) malloc(sizeof(T_HYBGDeviceMat)))==NULL)
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
hMat->m = nr;
hMat->n = nc;
hMat->nz = nz;
if ((rc= cusparseCreateMatDescr(&(hMat->descr))) !=0)
return(rc);
if ((rc= cusparseCreateSolveAnalysisInfo(&(hMat->triang))) !=0)
return(rc);
if((rc = cusparseCreateHybMat(&(hMat->hybA))) != 0)
return(rc);
Matrix->mat = hMat;
return(CUSPARSE_STATUS_SUCCESS);
}
int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type)
{
T_HYBGDeviceMat *hMat= Matrix->mat;
return ((int) cusparseSetMatDiagType(hMat->descr,type));
}
int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type)
{
T_HYBGDeviceMat *hMat= Matrix->mat;
return ((int) cusparseSetMatIndexBase(hMat->descr,type));
}
int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type)
{
T_HYBGDeviceMat *hMat= Matrix->mat;
return ((int) cusparseSetMatType(hMat->descr,type));
}
int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type)
{
T_HYBGDeviceMat *hMat= Matrix->mat;
return ((int) cusparseSetMatFillMode(hMat->descr,type));
}
int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
TYPE beta, void *deviceY)
{
//beta??
T_HYBGDeviceMat *hMat=Matrix->mat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
void *vX, *vY;
int r,n;
cusparseHandle_t *my_handle=getHandle();
/*getAddrMultiVecDevice(deviceX, &vX);
getAddrMultiVecDevice(deviceY, &vY); */
vX=x->v_;
vY=y->v_;
return cusparseThybsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
(const TYPE *) &alpha, hMat->descr,
hMat->hybA, hMat->triang,
(const TYPE *) vX, (TYPE *) vY);
}
int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix)
{
T_HYBGDeviceMat *hMat= Matrix->mat;
cusparseSolveAnalysisInfo_t info;
int rc;
cusparseHandle_t *my_handle=getHandle();
/* rc = (int) cusparseGetMatType(hMat->descr); */
/* fprintf(stderr,"Analysis MatType: %d\n",rc); */
/* rc = (int) cusparseGetMatDiagType(hMat->descr); */
/* fprintf(stderr,"Analysis DiagType: %d\n",rc); */
/* rc = (int) cusparseGetMatFillMode(hMat->descr); */
/* fprintf(stderr,"Analysis FillMode: %d\n",rc); */
rc = (int) cusparseThybsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
hMat->descr, hMat->hybA, hMat->triang);
if (rc !=0) {
fprintf(stderr,"From csrsv_analysis: %d\n",rc);
}
return(rc);
}
int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz,
int *irp, int *ja, TYPE *val)
{
int rc; double t1,t2;
int nr1=m, nz1=nz;
T_HYBGDeviceMat *hMat= Matrix->mat;
cusparseHandle_t *my_handle=getHandle();
if (nr1 == 0) nr1 = 1;
if (nz1 == 0) nz1 = 1;
if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), ((nr1+1)*sizeof(int)))) != 0)
return(rc);
if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), ((nz1)*sizeof(int)))) != 0)
return(rc);
if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), ((nz1)*sizeof(TYPE)))) != 0)
return(rc);
if ((rc=writeRemoteBuffer((void *) irp, (void *) hMat->irp,
(m+1)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=writeRemoteBuffer((void *) ja,(void *) hMat->ja,
(nz)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=writeRemoteBuffer((void *) val, (void *) hMat->val,
(nz)*sizeof(TYPE)))
!= SPGPU_SUCCESS)
return(rc);
/* rc = (int) cusparseGetMatType(hMat->descr); */
/* fprintf(stderr,"Conversion MatType: %d\n",rc); */
/* rc = (int) cusparseGetMatDiagType(hMat->descr); */
/* fprintf(stderr,"Conversion DiagType: %d\n",rc); */
/* rc = (int) cusparseGetMatFillMode(hMat->descr); */
/* fprintf(stderr,"Conversion FillMode: %d\n",rc); */
//t1=etime();
rc = (int) cusparseTcsr2hyb(*my_handle, m, n,
hMat->descr,
(const TYPE *)hMat->val,
(const int *)hMat->irp, (const int *)hMat->ja,
hMat->hybA,0,
CUSPARSE_HYB_PARTITION_AUTO);
freeRemoteBuffer(hMat->irp); hMat->irp = NULL;
freeRemoteBuffer(hMat->ja); hMat->ja = NULL;
freeRemoteBuffer(hMat->val); hMat->val = NULL;
//cudaSync();
//t2 = etime();
//fprintf(stderr,"Inner call to cusparseTcsr2hyb: %lf\n",(t2-t1));
if (rc != 0) {
fprintf(stderr,"From csr2hyb: %d\n",rc);
}
return(rc);
}
#endif

@ -0,0 +1,386 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include "hdiagdev.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#define DEBUG 0
void freeHdiagDevice(void* remoteMatrix)
{
struct HdiagDevice *devMat = (struct HdiagDevice *) remoteMatrix;
//fprintf(stderr,"freeHllDevice\n");
if (devMat != NULL) {
freeRemoteBuffer(devMat->hackOffsets);
freeRemoteBuffer(devMat->cM);
free(remoteMatrix);
}
}
HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns,
unsigned int allocationHeight, unsigned int hackSize,
unsigned int hackCount, unsigned int elementType)
{
HdiagDeviceParams params;
params.elementType = elementType;
//numero di elementi di val
params.rows = rows;
params.columns = columns;
params.allocationHeight = allocationHeight;
params.hackSize = hackSize;
params.hackCount = hackCount;
return params;
}
int allocHdiagDevice(void **remoteMatrix, HdiagDeviceParams* params)
{
struct HdiagDevice *tmp = (struct HdiagDevice *)malloc(sizeof(struct HdiagDevice));
int ret=SPGPU_SUCCESS;
int *tmpOff = NULL;
*remoteMatrix = (void *) tmp;
#if DEBUG
fprintf(stderr,"From alloc: %p\n",*remoteMatrix);
#endif
tmp->rows = params->rows;
tmp->hackSize = params->hackSize;
tmp->cols = params->columns;
tmp->allocationHeight = params->allocationHeight;
tmp->hackCount = params->hackCount;
#if DEBUG
fprintf(stderr,"hackcount %d allocationHeight %d\n",tmp->hackCount,tmp->allocationHeight);
#endif
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->hackOffsets), (tmp->hackCount+1)*sizeof(int));
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets), tmp->allocationHeight*sizeof(int));
/* tmp->baseIndex = params->firstIndex; */
if (params->elementType == SPGPU_TYPE_INT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(int));
}
else if (params->elementType == SPGPU_TYPE_FLOAT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(float));
}
else if (params->elementType == SPGPU_TYPE_DOUBLE)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(double));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuFloatComplex));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuDoubleComplex));
}
else
return SPGPU_UNSUPPORTED; // Unsupported params
return ret;
}
int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols,
unsigned int allocationHeight, unsigned int hackSize,
unsigned int hackCount, unsigned int elementType)
{ int i=0;
HdiagDeviceParams p;
p = getHdiagDeviceParams(rows, cols, allocationHeight, hackSize, hackCount,elementType);
i = allocHdiagDevice(deviceMat, &p);
#if DEBUG
fprintf(stderr," Falloc %p \n",*deviceMat);
#endif
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
}
return(i);
}
int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets)
{ int i=0,fo,fa,j,k,p;
char buf_a[255], buf_o[255],tmp[255];
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
i=SPGPU_SUCCESS;
#if DEBUG
fprintf(stderr," Write %p \n",devMat);
fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n",
devMat->allocationHeight,devMat->hackCount);
fprintf(stderr,"HackOffsets: ");
for (j=0; j<devMat->hackCount+1; j++)
fprintf(stderr," %d",hackOffsets[j]);
fprintf(stderr,"\n");
fprintf(stderr,"diaOffsets: ");
for (j=0; j<devMat->allocationHeight; j++)
fprintf(stderr," %d",hdiaOffsets[j]);
fprintf(stderr,"\n");
#if 1
fprintf(stderr,"values: \n");
p=0;
for (j=0; j<devMat->hackCount; j++){
fprintf(stderr,"Hack no: %d\n",j+1);
for (k=0; k<devMat->hackSize*(devMat->allocationHeight/devMat->hackCount); k++){
fprintf(stderr," %d %lf\n",p+1,val[p]); p++;
}
}
fprintf(stderr,"\n");
#endif
#endif
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets,
(devMat->hackCount+1)*sizeof(int));
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets,
devMat->allocationHeight*sizeof(int));
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
devMat->allocationHeight*devMat->hackSize*sizeof(double));
if (i!=0)
fprintf(stderr,"Error in writeHdiagDeviceDouble %d\n",i);
#if DEBUG
fprintf(stderr," EndWrite %p \n",devMat);
#endif
if(i==0)
return SPGPU_SUCCESS;
else
return SPGPU_UNSUPPORTED;
}
long long int sizeofHdiagDeviceDouble(void* deviceMat)
{ int i=0,fo,fa;
int *hoff=NULL,*hackoff=NULL;
long long int memsize=0;
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
memsize += (devMat->hackCount+1)*sizeof(int);
memsize += devMat->allocationHeight*sizeof(int);
memsize += devMat->allocationHeight*devMat->hackSize*sizeof(double);
return(memsize);
}
int readHdiagDeviceDouble(void* deviceMat, double* a, int* off)
{ int i;
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
/* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); */
/* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
double beta, void* deviceY)
{
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef VERBOSE
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
#endif
#if DEBUG
fprintf(stderr," First %p \n",devMat);
fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize,
devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM);
#endif
spgpuDhdiaspmv (handle, (double*)y->v_, (double *)y->v_, alpha,
(double *)devMat->cM,devMat->hdiaOffsets,
devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols,
x->v_, beta);
//cudaSync();
return SPGPU_SUCCESS;
}
int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets)
{ int i=0,fo,fa,j,k,p;
char buf_a[255], buf_o[255],tmp[255];
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
i=SPGPU_SUCCESS;
#if DEBUG
fprintf(stderr," Write %p \n",devMat);
fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n",
devMat->allocationHeight,devMat->hackCount);
fprintf(stderr,"HackOffsets: ");
for (j=0; j<devMat->hackCount+1; j++)
fprintf(stderr," %d",hackOffsets[j]);
fprintf(stderr,"\n");
fprintf(stderr,"diaOffsets: ");
for (j=0; j<devMat->allocationHeight; j++)
fprintf(stderr," %d",hdiaOffsets[j]);
fprintf(stderr,"\n");
#if 1
fprintf(stderr,"values: \n");
p=0;
for (j=0; j<devMat->hackCount; j++){
fprintf(stderr,"Hack no: %d\n",j+1);
for (k=0; k<devMat->hackSize*(devMat->allocationHeight/devMat->hackCount); k++){
fprintf(stderr," %d %lf\n",p+1,val[p]); p++;
}
}
fprintf(stderr,"\n");
#endif
#endif
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets,
(devMat->hackCount+1)*sizeof(int));
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets,
devMat->allocationHeight*sizeof(int));
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
devMat->allocationHeight*devMat->hackSize*sizeof(float));
if (i!=0)
fprintf(stderr,"Error in writeHdiagDeviceFloat %d\n",i);
#if DEBUG
fprintf(stderr," EndWrite %p \n",devMat);
#endif
if(i==0)
return SPGPU_SUCCESS;
else
return SPGPU_UNSUPPORTED;
}
long long int sizeofHdiagDeviceFloat(void* deviceMat)
{ int i=0,fo,fa;
int *hoff=NULL,*hackoff=NULL;
long long int memsize=0;
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
memsize += (devMat->hackCount+1)*sizeof(int);
memsize += devMat->allocationHeight*sizeof(int);
memsize += devMat->allocationHeight*devMat->hackSize*sizeof(float);
return(memsize);
}
int readHdiagDeviceFloat(void* deviceMat, float* a, int* off)
{ int i;
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
/* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); */
/* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
}*/
return SPGPU_SUCCESS;
}
int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
float beta, void* deviceY)
{
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef VERBOSE
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
#endif
#if DEBUG
fprintf(stderr," First %p \n",devMat);
fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize,
devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM);
#endif
spgpuShdiaspmv (handle, (float*)y->v_, (float *)y->v_, alpha,
(float *)devMat->cM,devMat->hdiaOffsets,
devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols,
x->v_, beta);
//cudaSync();
return SPGPU_SUCCESS;
}

@ -0,0 +1,106 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#ifndef _HDIAGDEV_H_
#define _HDIAGDEV_H_
#include "cintrf.h"
#include "hdia.h"
struct HdiagDevice
{
// Compressed matrix
void *cM; //it can be float or double
// offset (same size of cM)
int *hdiaOffsets;
int *hackOffsets;
int hackCount;
int rows;
int cols;
int hackSize;
int allocationHeight;
};
typedef struct HdiagDeviceParams
{
unsigned int elementType;
// Number of rows.
// Used to allocate rS array
unsigned int rows;
//unsigned int hackOffsLength;
// Number of columns.
// Used for error-checking
unsigned int columns;
unsigned int hackSize;
unsigned int hackCount;
unsigned int allocationHeight;
} HdiagDeviceParams;
HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns,
unsigned int allocationHeight, unsigned int hackSize,
unsigned int hackCount, unsigned int elementType);
int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols,
unsigned int allocationHeight, unsigned int hackSize,
unsigned int hackCount, unsigned int elementType);
int allocHdiagDevice(void ** remoteMatrix, HdiagDeviceParams* params);
void freeHdiagDevice(void* remoteMatrix);
int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets);
int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
float beta, void* deviceY);
int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets);
int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
double beta, void* deviceY);
#endif

@ -0,0 +1,199 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module hdiagdev_mod
use iso_c_binding
use core_mod
type, bind(c) :: hdiagdev_parms
integer(c_int) :: element_type
integer(c_int) :: rows
integer(c_int) :: columns
integer(c_int) :: hackSize
integer(c_int) :: hackCount
integer(c_int) :: allocationHeight
end type hdiagdev_parms
! interface computeHdiaHacksCount
! function computeHdiaHacksCountDouble(allocationHeight,hackOffsets,hackSize, &
! & diaValues,diaValuesPitch,diags,rows)&
! & result(res) bind(c,name='computeHdiaHackOffsetsDouble')
! use iso_c_binding
! integer(c_int) :: res
! integer(c_int), value :: rows,diags,diaValuesPitch,hackSize,elementType
! real(c_double) :: diaValues(rows,:)
! integer(c_int) :: hackOffsets,allocationHeight
! end function computeHdiaHacksCountDouble
! end interface computeHdiaHacksCount
interface
function FgetHdiagDeviceParams(rows, columns, allocationHeight,hackSize, &
& hackCount, elementType) &
& result(res) bind(c,name='getHdiagDeviceParams')
use iso_c_binding
import :: hdiagdev_parms
type(hdiagdev_parms) :: res
integer(c_int), value :: rows,columns,allocationHeight,&
& elementType,hackSize,hackCount
end function FgetHdiagDeviceParams
end interface
interface
function FallocHdiagDevice(deviceMat,rows,columns,allocationHeight,&
& hackSize,hackCount,elementType) &
& result(res) bind(c,name='FallocHdiagDevice')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: rows,columns,allocationHeight,hackSize,&
& hackCount,elementType
type(c_ptr) :: deviceMat
end function FallocHdiagDevice
end interface
interface
function sizeofHdiagDeviceDouble(deviceMat) &
& result(res) bind(c,name='sizeofHdiagDeviceDouble')
use iso_c_binding
integer(c_long_long) :: res
type(c_ptr), value :: deviceMat
end function sizeofHdiagDeviceDouble
end interface
interface writeHdiagDevice
function writeHdiagDeviceFloat(deviceMat,val,hdiaOffsets, hackOffsets) &
& result(res) bind(c,name='writeHdiagDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
real(c_float) :: val(*)
integer(c_int) :: hdiaOffsets(*), hackOffsets(*)
end function writeHdiagDeviceFloat
function writeHdiagDeviceDouble(deviceMat,val,hdiaOffsets, hackOffsets) &
& result(res) bind(c,name='writeHdiagDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
real(c_double) :: val(*)
integer(c_int) :: hdiaOffsets(*), hackOffsets(*)
end function writeHdiagDeviceDouble
end interface writeHdiagDevice
!!$ interface readHdiagDevice
!!$
!!$ function readHdiagDeviceFloat(deviceMat,val,ja,ldj,irn) &
!!$ & result(res) bind(c,name='readHdiagDeviceFloat')
!!$ use iso_c_binding
!!$ integer(c_int) :: res
!!$ type(c_ptr), value :: deviceMat
!!$ integer(c_int), value :: ldj
!!$ real(c_float) :: val(ldj,*)
!!$ integer(c_int) :: ja(ldj,*),irn(*)
!!$ end function readHdiagDeviceFloat
!!$
!!$ function readHdiagDeviceDouble(deviceMat,a,off,n) &
!!$ & result(res) bind(c,name='readHdiagDeviceDouble')
!!$ use iso_c_binding
!!$ integer(c_int) :: res
!!$ type(c_ptr), value :: deviceMat
!!$ integer(c_int),value :: n
!!$ real(c_double) :: a(n,*)
!!$ integer(c_int) :: off(*)
!!$ end function readHdiagDeviceDouble
!!$
!!$ function readHdiagDeviceFloatComplex(deviceMat,val,ja,ldj,irn) &
!!$ & result(res) bind(c,name='readHdiagDeviceFloatComplex')
!!$ use iso_c_binding
!!$ integer(c_int) :: res
!!$ type(c_ptr), value :: deviceMat
!!$ integer(c_int), value :: ldj
!!$ complex(c_float_complex) :: val(ldj,*)
!!$ integer(c_int) :: ja(ldj,*),irn(*)
!!$ end function readHdiagDeviceFloatComplex
!!$
!!$ function readHdiagDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) &
!!$ & result(res) bind(c,name='readHdiagDeviceDoubleComplex')
!!$ use iso_c_binding
!!$ integer(c_int) :: res
!!$ type(c_ptr), value :: deviceMat
!!$ integer(c_int), value :: ldj
!!$ complex(c_double_complex) :: val(ldj,*)
!!$ integer(c_int) :: ja(ldj,*),irn(*)
!!$ end function readHdiagDeviceDoubleComplex
!!$
!!$ end interface readHdiagDevice
!!$
interface
subroutine freeHdiagDevice(deviceMat) &
& bind(c,name='freeHdiagDevice')
use iso_c_binding
type(c_ptr), value :: deviceMat
end subroutine freeHdiagDevice
end interface
interface spmvHdiagDevice
function spmvHdiagDeviceFloat(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvHdiagDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
real(c_float),value :: alpha, beta
end function spmvHdiagDeviceFloat
function spmvHdiagDeviceDouble(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvHdiagDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
real(c_double),value :: alpha, beta
end function spmvHdiagDeviceDouble
!!$ function spmvHdiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
!!$ & result(res) bind(c,name='spmvHdiagDeviceFloatComplex')
!!$ use iso_c_binding
!!$ integer(c_int) :: res
!!$ type(c_ptr), value :: deviceMat, x, y
!!$ complex(c_float_complex),value :: alpha, beta
!!$ end function spmvHdiagDeviceFloatComplex
!!$ function spmvHdiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
!!$ & result(res) bind(c,name='spmvHdiagDeviceDoubleComplex')
!!$ use iso_c_binding
!!$ integer(c_int) :: res
!!$ type(c_ptr), value :: deviceMat, x, y
!!$ complex(c_double_complex),value :: alpha, beta
!!$ end function spmvHdiagDeviceDoubleComplex
end interface spmvHdiagDevice
end module hdiagdev_mod

@ -0,0 +1,540 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include "hlldev.h"
//new
HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros,
unsigned int allocsize, unsigned int elementType, unsigned int firstIndex)
{
HllDeviceParams params;
params.elementType = elementType;
params.hackSize = hksize;
//numero di elementi di val
params.allocsize = allocsize;
params.rows = rows;
params.nzt = nzeros;
params.avgNzr = (nzeros+rows-1)/rows;
params.firstIndex = firstIndex;
return params;
}
int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros,
int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr)
{
if (mat!=NULL) {
*hackOffsLength = mat->hackOffsLength ;
*hksize = mat->hackSize ;
*nzeros = mat->nzt ;
*allocsize = mat->allocsize ;
*rows = mat->rows ;
*avgnzr = mat->avgNzr ;
*firstIndex = mat->baseIndex ;
return SPGPU_SUCCESS;
} else {
return SPGPU_UNSUPPORTED;
}
}
//new
int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params)
{
HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice));
int ret=SPGPU_SUCCESS;
*remoteMatrix = (void *)tmp;
tmp->hackSize = params->hackSize;
tmp->allocsize = params->allocsize;
tmp->rows = params->rows;
tmp->avgNzr = params->avgNzr;
tmp->nzt = params->nzt;
tmp->baseIndex = params->firstIndex;
//fprintf(stderr,"Allocating HLG with %d avgNzr\n",params->avgNzr);
tmp->hackOffsLength = (int)(tmp->rows+tmp->hackSize-1)/tmp->hackSize;
//printf("hackOffsLength %d\n",tmp->hackOffsLength);
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int));
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int));
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int));
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->hackOffs), ((tmp->hackOffsLength+1)*sizeof(int)));
if (params->elementType == SPGPU_TYPE_INT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(int));
}
else if (params->elementType == SPGPU_TYPE_FLOAT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
}
else if (params->elementType == SPGPU_TYPE_DOUBLE)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
}
else
return SPGPU_UNSUPPORTED; // Unsupported params
return ret;
}
void freeHllDevice(void* remoteMatrix)
{
HllDevice *devMat = (HllDevice *) remoteMatrix;
//fprintf(stderr,"freeHllDevice\n");
if (devMat != NULL) {
freeRemoteBuffer(devMat->rS);
freeRemoteBuffer(devMat->diag);
freeRemoteBuffer(devMat->rP);
freeRemoteBuffer(devMat->cM);
free(remoteMatrix);
}
}
//new
int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros,
unsigned int allocsize,
unsigned int elementType, unsigned int firstIndex)
{ int i;
HllDeviceParams p;
p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex);
i = allocHllDevice(deviceMat, &p);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
}
return(i);
}
int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
float beta, void* deviceY)
{
HllDevice *devMat = (HllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef VERBOSE
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
#endif
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
devMat->baseIndex);*/
spgpuShellspmv (handle, (float *)y->v_, (float *)y->v_, alpha, (float *)devMat->cM,
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
devMat->avgNzr, devMat->rows, (float *)x->v_, beta, devMat->baseIndex);
return SPGPU_SUCCESS;
}
//new
int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
double beta, void* deviceY)
{
HllDevice *devMat = (HllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef VERBOSE
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
#endif
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
devMat->baseIndex);*/
spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM,
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex);
//cudaSync();
return SPGPU_SUCCESS;
}
int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
float complex beta, void* deviceY)
{
HllDevice *devMat = (HllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta));
#ifdef VERBOSE
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
#endif
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
devMat->baseIndex);*/
spgpuChellspmv (handle, (cuFloatComplex *)y->v_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM,
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
devMat->avgNzr, devMat->rows, (cuFloatComplex *)x->v_, b, devMat->baseIndex);
return SPGPU_SUCCESS;
}
int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
double complex beta, void* deviceY)
{
HllDevice *devMat = (HllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta));
#ifdef VERBOSE
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
#endif
spgpuZhellspmv (handle, (cuDoubleComplex *)y->v_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM,
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
devMat->avgNzr,devMat->rows, (cuDoubleComplex *)x->v_, b, devMat->baseIndex);
return SPGPU_SUCCESS;
}
int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
//i = writeEllDevice(deviceMat, (void *) val, ja, irn);
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
}*/
return SPGPU_SUCCESS;
}
int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
/*i = readEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
}*/
return SPGPU_SUCCESS;
}
int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
return SPGPU_SUCCESS;
}
// New copy routines.
int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz,
int *irn, int *hoffs, int *idisp, int *ja,
float *val, void *deviceMat)
{ int i,j;
spgpuHandle_t handle;
HllDevice *devMat = (HllDevice *) deviceMat;
float *devVal;
int *devIdisp, *devJa;
int *tja;
//fprintf(stderr,"devMat: %p\n",devMat);
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float));
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
//cudaSync();
handle = psb_cudaGetHandle();
psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal,
(int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM);
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);
freeRemoteBuffer(devVal);
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloat",i);
}
return SPGPU_SUCCESS;
}
int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz,
int *irn, int *hoffs, int *idisp, int *ja,
double *val, void *deviceMat)
{ int i,j;
spgpuHandle_t handle;
HllDevice *devMat = (HllDevice *) deviceMat;
double *devVal;
int *devIdisp, *devJa;
int *tja;
//fprintf(stderr,"devMat: %p\n",devMat);
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double));
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double));
//fprintf(stderr,"WriteRemoteBuffer val %d\n",i);
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
//fprintf(stderr,"WriteRemoteBuffer ja %d\n",i);
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
//fprintf(stderr,"WriteRemoteBuffer irn %d\n",i);
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
//fprintf(stderr,"WriteRemoteBuffer hoffs %d\n",i);
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
//fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i);
//cudaSync();
//fprintf(stderr," hacksz: %d \n",hacksz);
handle = psb_cudaGetHandle();
psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal,
(int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM);
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);
freeRemoteBuffer(devVal);
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDouble",i);
}
return SPGPU_SUCCESS;
}
int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz,
int *irn, int *hoffs, int *idisp, int *ja,
float complex *val, void *deviceMat)
{ int i,j;
spgpuHandle_t handle;
HllDevice *devMat = (HllDevice *) deviceMat;
float complex *devVal;
int *devIdisp, *devJa;
int *tja;
//fprintf(stderr,"devMat: %p\n",devMat);
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex));
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
//cudaSync();
handle = psb_cudaGetHandle();
psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal,
(int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM);
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);
freeRemoteBuffer(devVal);
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloatComplex",i);
}
return SPGPU_SUCCESS;
}
int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz,
int *irn, int *hoffs, int *idisp, int *ja,
double complex *val, void *deviceMat)
{ int i,j;
spgpuHandle_t handle;
HllDevice *devMat = (HllDevice *) deviceMat;
double complex *devVal;
int *devIdisp, *devJa;
int *tja;
//fprintf(stderr,"devMat: %p\n",devMat);
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex));
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
//cudaSync();
handle = psb_cudaGetHandle();
psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal,
(int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM);
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);
freeRemoteBuffer(devVal);
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDoubleComplex",i);
}
return SPGPU_SUCCESS;
}

@ -0,0 +1,156 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#ifndef _HLLDEV_H_
#define _HLLDEV_H_
#include "cintrf.h"
#include "hell.h"
typedef struct hlldevice
{
// Compressed matrix
void *cM; //it can be float or double
// row pointers (same size of cM)
int *rP;
// row size and diagonal position
int *rS;
int *diag;
int *hackOffs;
int rows;
int avgNzr;
int hackOffsLength;
int nzt;
int hackSize; //must be multiple of 32
//matrix size (uncompressed)
//int rows;
//int columns;
//allocation size
int allocsize;
/*(i.e. 0 for C, 1 for Fortran)*/
int baseIndex;
} HllDevice;
typedef struct hlldeviceparams
{
unsigned int elementType;
unsigned int hackSize;
// Number of rows.
// Used to allocate rS array
unsigned int rows;
unsigned int avgNzr;
unsigned int nzt;
//unsigned int hackOffsLength;
// Number of columns.
// Used for error-checking
// unsigned int columns;
unsigned int allocsize;
// First index (e.g 0 or 1)
unsigned int firstIndex;
} HllDeviceParams;
HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros,
unsigned int allocsize,
unsigned int elementType, unsigned int firstIndex);
int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros,
int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr);
int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros,
unsigned int allocsize, unsigned int elementType, unsigned int firstIndex);
int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params);
void freeHllDevice(void* remoteMatrix);
int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag);
int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag);
int writeHllDeviceFloatComplex(void* deviceMat, float complex* val,
int* ja, int *hkoffs, int* irn, int *idiag);
int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val,
int* ja, int *hkoffs, int* irn, int *idiag);
int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag);
int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag);
int readHllDeviceFloatComplex(void* deviceMat, float complex* val,
int* ja, int *hkoffs, int* irn, int *idiag);
int readHllDeviceDoubleComplex(void* deviceMat, double complex* val,
int* ja, int *hkoffs, int* irn, int *idiag);
int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz,
int *irn, int *hoffs, int *idisp, int *ja,
float *val, void *deviceMat);
int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz,
int *irn, int *hoffs, int *idisp, int *ja,
double *val, void *deviceMat);
int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz,
int noffs, int isz, int *irn,
int *hoffs, int *idisp, int *ja,
float complex *val, void *deviceMat);
int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz,
int noffs, int isz, int *irn,
int *hoffs, int *idisp, int *ja,
double complex *val, void *deviceMat);
int psi_cuda_s_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
int baseIdx, int hacksz, int noffs, int isz,
int *irn, int *hoffs, int *idisp,
int *ja, float *val,
int *idiag, int *rP, float *cM);
int psi_cuda_d_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
int baseIdx, int hacksz, int noffs, int isz,
int *irn, int *hoffs, int *idisp,
int *ja, double *val,
int *idiag, int *rP, double *cM);
int psi_cuda_c_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
int baseIdx, int hacksz, int noffs, int isz,
int *irn, int *hoffs, int *idisp,
int *ja, float complex *val,
int *idiag, int *rP, float complex *cM);
int psi_cuda_z_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
int baseIdx, int hacksz, int noffs, int isz,
int *irn, int *hoffs, int *idisp,
int *ja, double complex *val,
int *idiag, int *rP, double complex *cM);
#endif

@ -0,0 +1,268 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module hlldev_mod
use iso_c_binding
use core_mod
type, bind(c) :: hlldev_parms
integer(c_int) :: element_type
integer(c_int) :: hackSize
integer(c_int) :: rows
integer(c_int) :: avgNzr
integer(c_int) :: allocsize
integer(c_int) :: firstIndex
end type hlldev_parms
interface
function bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex) &
& result(res) bind(c,name='bldHllDeviceParams')
use iso_c_binding
import :: hlldev_parms
type(hlldev_parms) :: res
integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex
end function BldHllDeviceParams
end interface
interface
function getHllDeviceParams(deviceMat,hksize, rows, nzeros, allocsize,&
& hackOffsLength, firstIndex,avgnzr) &
& result(res) bind(c,name='getHllDeviceParams')
use iso_c_binding
import :: hlldev_parms
integer(c_int) :: res
type(c_ptr), value :: deviceMat
integer(c_int) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr
end function GetHllDeviceParams
end interface
interface
function FallocHllDevice(deviceMat,hksize,rows, nzeros,allocsize, &
& elementType,firstIndex) &
& result(res) bind(c,name='FallocHllDevice')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex
type(c_ptr) :: deviceMat
end function FallocHllDevice
end interface
interface writeHllDevice
function writeHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) &
& result(res) bind(c,name='writeHllDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
real(c_float) :: val(*)
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
end function writeHllDeviceFloat
function writeHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) &
& result(res) bind(c,name='writeHllDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
real(c_double) :: val(*)
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
end function writeHllDeviceDouble
function writeHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
& result(res) bind(c,name='writeHllDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
complex(c_float_complex) :: val(*)
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
end function writeHllDeviceFloatComplex
function writeHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
& result(res) bind(c,name='writeHllDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
complex(c_double_complex) :: val(*)
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
end function writeHllDeviceDoubleComplex
end interface
interface readHllDevice
function readHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) &
& result(res) bind(c,name='readHllDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
real(c_float) :: val(*)
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
end function readHllDeviceFloat
function readHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) &
& result(res) bind(c,name='readHllDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
real(c_double) :: val(*)
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
end function readHllDeviceDouble
function readHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
& result(res) bind(c,name='readHllDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
complex(c_float_complex) :: val(*)
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
end function readHllDeviceFloatComplex
function readHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
& result(res) bind(c,name='readHllDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat
complex(c_double_complex) :: val(*)
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
end function readHllDeviceDoubleComplex
end interface
interface
subroutine freeHllDevice(deviceMat) &
& bind(c,name='freeHllDevice')
use iso_c_binding
type(c_ptr), value :: deviceMat
end subroutine freeHllDevice
end interface
interface psi_CopyCooToHlg
function psiCopyCooToHlgFloat(nr, nc, nza, hacksz, noffs, isz, irn, &
& hoffs, idisp, ja, val, deviceMat) &
& result(res) bind(c,name='psiCopyCooToHlgFloat')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
type(c_ptr), value :: deviceMat
real(c_float) :: val(*)
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
end function psiCopyCooToHlgFloat
function psiCopyCooToHlgDouble(nr, nc, nza, hacksz, noffs, isz, irn, &
& hoffs, idisp, ja, val, deviceMat) &
& result(res) bind(c,name='psiCopyCooToHlgDouble')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
type(c_ptr), value :: deviceMat
real(c_double) :: val(*)
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
end function psiCopyCooToHlgDouble
function psiCopyCooToHlgFloatComplex(nr, nc, nza, hacksz, noffs, isz, irn, &
& hoffs, idisp, ja, val, deviceMat) &
& result(res) bind(c,name='psiCopyCooToHlgFloatComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
type(c_ptr), value :: deviceMat
complex(c_float_complex) :: val(*)
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
end function psiCopyCooToHlgFloatComplex
function psiCopyCooToHlgDoubleComplex(nr, nc, nza, hacksz, noffs, isz, irn, &
& hoffs, idisp, ja, val, deviceMat) &
& result(res) bind(c,name='psiCopyCooToHlgDoubleComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
type(c_ptr), value :: deviceMat
complex(c_double_complex) :: val(*)
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
end function psiCopyCooToHlgDoubleComplex
end interface
!interface
! function getHllDevicePitch(deviceMat) &
! & bind(c,name='getHllDevicePitch') result(res)
! use iso_c_binding
! type(c_ptr), value :: deviceMat
! integer(c_int) :: res
! end function getHllDevicePitch
!end interface
!interface
! function getHllDeviceMaxRowSize(deviceMat) &
! & bind(c,name='getHllDeviceMaxRowSize') result(res)
! use iso_c_binding
! type(c_ptr), value :: deviceMat
! integer(c_int) :: res
! end function getHllDeviceMaxRowSize
!end interface
interface spmvHllDevice
function spmvHllDeviceFloat(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvHllDeviceFloat')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
real(c_float),value :: alpha, beta
end function spmvHllDeviceFloat
function spmvHllDeviceDouble(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvHllDeviceDouble')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
real(c_double),value :: alpha, beta
end function spmvHllDeviceDouble
function spmvHllDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvHllDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
complex(c_float_complex),value :: alpha, beta
end function spmvHllDeviceFloatComplex
function spmvHllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
& result(res) bind(c,name='spmvHllDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
type(c_ptr), value :: deviceMat, x, y
complex(c_double_complex),value :: alpha, beta
end function spmvHllDeviceDoubleComplex
end interface
end module hlldev_mod

@ -0,0 +1,297 @@
include ../../Make.inc
LIBDIR=../../lib
INCDIR=../../include
MODDIR=../../modules
PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such
#
#CCOPT= -g
FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG)..
CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR)
LIBNAME=libpsb_gpu.a
CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES)
CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES)
FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES)
OBJS= \
psb_d_cuda_cp_csrg_from_coo.o \
psb_d_cuda_cp_csrg_from_fmt.o \
psb_d_cuda_cp_elg_from_coo.o \
psb_d_cuda_cp_elg_from_fmt.o \
psb_s_cuda_cp_csrg_from_coo.o \
psb_s_cuda_cp_csrg_from_fmt.o \
psb_s_cuda_csrg_allocate_mnnz.o \
psb_s_cuda_csrg_csmm.o \
psb_s_cuda_csrg_csmv.o \
psb_s_cuda_csrg_mold.o \
psb_s_cuda_csrg_reallocate_nz.o \
psb_s_cuda_csrg_scal.o \
psb_s_cuda_csrg_scals.o \
psb_s_cuda_csrg_from_gpu.o \
psb_s_cuda_csrg_to_gpu.o \
psb_s_cuda_csrg_vect_mv.o \
psb_s_cuda_csrg_inner_vect_sv.o \
psb_d_cuda_csrg_allocate_mnnz.o \
psb_d_cuda_csrg_csmm.o \
psb_d_cuda_csrg_csmv.o \
psb_d_cuda_csrg_mold.o \
psb_d_cuda_csrg_reallocate_nz.o \
psb_d_cuda_csrg_scal.o \
psb_d_cuda_csrg_scals.o \
psb_d_cuda_csrg_from_gpu.o \
psb_d_cuda_csrg_to_gpu.o \
psb_d_cuda_csrg_vect_mv.o \
psb_d_cuda_csrg_inner_vect_sv.o \
psb_d_cuda_elg_allocate_mnnz.o \
psb_d_cuda_elg_asb.o \
psb_d_cuda_elg_csmm.o \
psb_d_cuda_elg_csmv.o \
psb_d_cuda_elg_csput.o \
psb_d_cuda_elg_from_gpu.o \
psb_d_cuda_elg_inner_vect_sv.o \
psb_d_cuda_elg_mold.o \
psb_d_cuda_elg_reallocate_nz.o \
psb_d_cuda_elg_scal.o \
psb_d_cuda_elg_scals.o \
psb_d_cuda_elg_to_gpu.o \
psb_d_cuda_elg_vect_mv.o \
psb_d_cuda_mv_csrg_from_coo.o \
psb_d_cuda_mv_csrg_from_fmt.o \
psb_d_cuda_mv_elg_from_coo.o \
psb_d_cuda_mv_elg_from_fmt.o \
psb_s_cuda_mv_csrg_from_coo.o \
psb_s_cuda_mv_csrg_from_fmt.o \
psb_s_cuda_cp_elg_from_coo.o \
psb_s_cuda_cp_elg_from_fmt.o \
psb_s_cuda_elg_allocate_mnnz.o \
psb_s_cuda_elg_asb.o \
psb_s_cuda_elg_csmm.o \
psb_s_cuda_elg_csmv.o \
psb_s_cuda_elg_csput.o \
psb_s_cuda_elg_inner_vect_sv.o \
psb_s_cuda_elg_mold.o \
psb_s_cuda_elg_reallocate_nz.o \
psb_s_cuda_elg_scal.o \
psb_s_cuda_elg_scals.o \
psb_s_cuda_elg_to_gpu.o \
psb_s_cuda_elg_from_gpu.o \
psb_s_cuda_elg_vect_mv.o \
psb_s_cuda_mv_elg_from_coo.o \
psb_s_cuda_mv_elg_from_fmt.o \
psb_s_cuda_cp_hlg_from_fmt.o \
psb_s_cuda_cp_hlg_from_coo.o \
psb_d_cuda_cp_hlg_from_fmt.o \
psb_d_cuda_cp_hlg_from_coo.o \
psb_d_cuda_hlg_allocate_mnnz.o \
psb_d_cuda_hlg_csmm.o \
psb_d_cuda_hlg_csmv.o \
psb_d_cuda_hlg_inner_vect_sv.o \
psb_d_cuda_hlg_mold.o \
psb_d_cuda_hlg_reallocate_nz.o \
psb_d_cuda_hlg_scal.o \
psb_d_cuda_hlg_scals.o \
psb_d_cuda_hlg_from_gpu.o \
psb_d_cuda_hlg_to_gpu.o \
psb_d_cuda_hlg_vect_mv.o \
psb_s_cuda_hlg_allocate_mnnz.o \
psb_s_cuda_hlg_csmm.o \
psb_s_cuda_hlg_csmv.o \
psb_s_cuda_hlg_inner_vect_sv.o \
psb_s_cuda_hlg_mold.o \
psb_s_cuda_hlg_reallocate_nz.o \
psb_s_cuda_hlg_scal.o \
psb_s_cuda_hlg_scals.o \
psb_s_cuda_hlg_from_gpu.o \
psb_s_cuda_hlg_to_gpu.o \
psb_s_cuda_hlg_vect_mv.o \
psb_s_cuda_mv_hlg_from_coo.o \
psb_s_cuda_cp_hlg_from_coo.o \
psb_s_cuda_mv_hlg_from_fmt.o \
psb_d_cuda_mv_hlg_from_coo.o \
psb_d_cuda_cp_hlg_from_coo.o \
psb_d_cuda_mv_hlg_from_fmt.o \
psb_s_cuda_hybg_allocate_mnnz.o \
psb_s_cuda_hybg_csmm.o \
psb_s_cuda_hybg_csmv.o \
psb_s_cuda_hybg_reallocate_nz.o \
psb_s_cuda_hybg_scal.o \
psb_s_cuda_hybg_scals.o \
psb_s_cuda_hybg_to_gpu.o \
psb_s_cuda_hybg_vect_mv.o \
psb_s_cuda_hybg_inner_vect_sv.o \
psb_s_cuda_cp_hybg_from_coo.o \
psb_s_cuda_cp_hybg_from_fmt.o \
psb_s_cuda_mv_hybg_from_fmt.o \
psb_s_cuda_mv_hybg_from_coo.o \
psb_s_cuda_hybg_mold.o \
psb_d_cuda_hybg_allocate_mnnz.o \
psb_d_cuda_hybg_csmm.o \
psb_d_cuda_hybg_csmv.o \
psb_d_cuda_hybg_reallocate_nz.o \
psb_d_cuda_hybg_scal.o \
psb_d_cuda_hybg_scals.o \
psb_d_cuda_hybg_to_gpu.o \
psb_d_cuda_hybg_vect_mv.o \
psb_d_cuda_hybg_inner_vect_sv.o \
psb_d_cuda_cp_hybg_from_coo.o \
psb_d_cuda_cp_hybg_from_fmt.o \
psb_d_cuda_mv_hybg_from_fmt.o \
psb_d_cuda_mv_hybg_from_coo.o \
psb_d_cuda_hybg_mold.o \
psb_z_cuda_cp_csrg_from_coo.o \
psb_z_cuda_cp_csrg_from_fmt.o \
psb_z_cuda_cp_elg_from_coo.o \
psb_z_cuda_cp_elg_from_fmt.o \
psb_c_cuda_cp_csrg_from_coo.o \
psb_c_cuda_cp_csrg_from_fmt.o \
psb_c_cuda_csrg_allocate_mnnz.o \
psb_c_cuda_csrg_csmm.o \
psb_c_cuda_csrg_csmv.o \
psb_c_cuda_csrg_mold.o \
psb_c_cuda_csrg_reallocate_nz.o \
psb_c_cuda_csrg_scal.o \
psb_c_cuda_csrg_scals.o \
psb_c_cuda_csrg_from_gpu.o \
psb_c_cuda_csrg_to_gpu.o \
psb_c_cuda_csrg_vect_mv.o \
psb_c_cuda_csrg_inner_vect_sv.o \
psb_z_cuda_csrg_allocate_mnnz.o \
psb_z_cuda_csrg_csmm.o \
psb_z_cuda_csrg_csmv.o \
psb_z_cuda_csrg_mold.o \
psb_z_cuda_csrg_reallocate_nz.o \
psb_z_cuda_csrg_scal.o \
psb_z_cuda_csrg_scals.o \
psb_z_cuda_csrg_from_gpu.o \
psb_z_cuda_csrg_to_gpu.o \
psb_z_cuda_csrg_vect_mv.o \
psb_z_cuda_csrg_inner_vect_sv.o \
psb_z_cuda_elg_allocate_mnnz.o \
psb_z_cuda_elg_asb.o \
psb_z_cuda_elg_csmm.o \
psb_z_cuda_elg_csmv.o \
psb_z_cuda_elg_csput.o \
psb_z_cuda_elg_inner_vect_sv.o \
psb_z_cuda_elg_mold.o \
psb_z_cuda_elg_reallocate_nz.o \
psb_z_cuda_elg_scal.o \
psb_z_cuda_elg_scals.o \
psb_z_cuda_elg_to_gpu.o \
psb_z_cuda_elg_from_gpu.o \
psb_z_cuda_elg_vect_mv.o \
psb_z_cuda_mv_csrg_from_coo.o \
psb_z_cuda_mv_csrg_from_fmt.o \
psb_z_cuda_mv_elg_from_coo.o \
psb_z_cuda_mv_elg_from_fmt.o \
psb_c_cuda_mv_csrg_from_coo.o \
psb_c_cuda_mv_csrg_from_fmt.o \
psb_c_cuda_cp_elg_from_coo.o \
psb_c_cuda_cp_elg_from_fmt.o \
psb_c_cuda_elg_allocate_mnnz.o \
psb_c_cuda_elg_asb.o \
psb_c_cuda_elg_csmm.o \
psb_c_cuda_elg_csmv.o \
psb_c_cuda_elg_csput.o \
psb_c_cuda_elg_inner_vect_sv.o \
psb_c_cuda_elg_mold.o \
psb_c_cuda_elg_reallocate_nz.o \
psb_c_cuda_elg_scal.o \
psb_c_cuda_elg_scals.o \
psb_c_cuda_elg_to_gpu.o \
psb_c_cuda_elg_from_gpu.o \
psb_c_cuda_elg_vect_mv.o \
psb_c_cuda_mv_elg_from_coo.o \
psb_c_cuda_mv_elg_from_fmt.o \
psb_c_cuda_cp_hlg_from_fmt.o \
psb_c_cuda_cp_hlg_from_coo.o \
psb_z_cuda_cp_hlg_from_fmt.o \
psb_z_cuda_cp_hlg_from_coo.o \
psb_z_cuda_hlg_allocate_mnnz.o \
psb_z_cuda_hlg_csmm.o \
psb_z_cuda_hlg_csmv.o \
psb_z_cuda_hlg_inner_vect_sv.o \
psb_z_cuda_hlg_mold.o \
psb_z_cuda_hlg_reallocate_nz.o \
psb_z_cuda_hlg_scal.o \
psb_z_cuda_hlg_scals.o \
psb_z_cuda_hlg_from_gpu.o \
psb_z_cuda_hlg_to_gpu.o \
psb_z_cuda_hlg_vect_mv.o \
psb_c_cuda_hlg_allocate_mnnz.o \
psb_c_cuda_hlg_csmm.o \
psb_c_cuda_hlg_csmv.o \
psb_c_cuda_hlg_inner_vect_sv.o \
psb_c_cuda_hlg_mold.o \
psb_c_cuda_hlg_reallocate_nz.o \
psb_c_cuda_hlg_scal.o \
psb_c_cuda_hlg_scals.o \
psb_c_cuda_hlg_from_gpu.o \
psb_c_cuda_hlg_to_gpu.o \
psb_c_cuda_hlg_vect_mv.o \
psb_c_cuda_mv_hlg_from_coo.o \
psb_c_cuda_cp_hlg_from_coo.o \
psb_c_cuda_mv_hlg_from_fmt.o \
psb_z_cuda_mv_hlg_from_coo.o \
psb_z_cuda_cp_hlg_from_coo.o \
psb_z_cuda_mv_hlg_from_fmt.o \
psb_c_cuda_hybg_allocate_mnnz.o \
psb_c_cuda_hybg_csmm.o \
psb_c_cuda_hybg_csmv.o \
psb_c_cuda_hybg_reallocate_nz.o \
psb_c_cuda_hybg_scal.o \
psb_c_cuda_hybg_scals.o \
psb_c_cuda_hybg_to_gpu.o \
psb_c_cuda_hybg_vect_mv.o \
psb_c_cuda_hybg_inner_vect_sv.o \
psb_c_cuda_cp_hybg_from_coo.o \
psb_c_cuda_cp_hybg_from_fmt.o \
psb_c_cuda_mv_hybg_from_fmt.o \
psb_c_cuda_mv_hybg_from_coo.o \
psb_c_cuda_hybg_mold.o \
psb_z_cuda_hybg_allocate_mnnz.o \
psb_z_cuda_hybg_csmm.o \
psb_z_cuda_hybg_csmv.o \
psb_z_cuda_hybg_reallocate_nz.o \
psb_z_cuda_hybg_scal.o \
psb_z_cuda_hybg_scals.o \
psb_z_cuda_hybg_to_gpu.o \
psb_z_cuda_hybg_vect_mv.o \
psb_z_cuda_hybg_inner_vect_sv.o \
psb_z_cuda_cp_hybg_from_coo.o \
psb_z_cuda_cp_hybg_from_fmt.o \
psb_z_cuda_mv_hybg_from_fmt.o \
psb_z_cuda_mv_hybg_from_coo.o \
psb_z_cuda_hybg_mold.o \
psb_d_cuda_cp_diag_from_coo.o \
psb_d_cuda_mv_diag_from_coo.o \
psb_d_cuda_diag_to_gpu.o \
psb_d_cuda_diag_csmv.o \
psb_d_cuda_diag_mold.o \
psb_d_cuda_diag_vect_mv.o \
psb_d_cuda_cp_hdiag_from_coo.o \
psb_d_cuda_mv_hdiag_from_coo.o \
psb_d_cuda_hdiag_to_gpu.o \
psb_d_cuda_hdiag_csmv.o \
psb_d_cuda_hdiag_mold.o \
psb_d_cuda_hdiag_vect_mv.o \
psb_s_cuda_cp_hdiag_from_coo.o \
psb_s_cuda_mv_hdiag_from_coo.o \
psb_s_cuda_hdiag_to_gpu.o \
psb_s_cuda_hdiag_csmv.o \
psb_s_cuda_hdiag_mold.o \
psb_s_cuda_hdiag_vect_mv.o \
psb_s_cuda_dnsg_mat_impl.o \
psb_d_cuda_dnsg_mat_impl.o \
psb_c_cuda_dnsg_mat_impl.o \
psb_z_cuda_dnsg_mat_impl.o
objs: $(OBJS)
lib: objs
ar cur ../$(LIBNAME) $(OBJS)
clean:
/bin/rm -f $(OBJS)

@ -0,0 +1,56 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info)
use psb_base_mod
use cusparse_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_coo
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%cp_from_coo(b,info)
if (info /= 0) goto 9999
call a%to_gpu(info)
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_cuda_cp_csrg_from_coo

@ -0,0 +1,55 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info)
use psb_base_mod
use cusparse_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_fmt
!use iso_c_binding
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b,info)
class default
call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info)
if (info /= 0) return
call a%to_gpu(info)
end select
end subroutine psb_c_cuda_cp_csrg_from_fmt

@ -0,0 +1,58 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_cp_diag_from_coo(a,b,info)
use psb_base_mod
use diagdev_mod
use psb_vectordev_mod
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_cp_diag_from_coo
implicit none
class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
!locals
info = psb_success_
call a%psb_c_dia_sparse_mat%cp_from_coo(b,info)
call a%to_gpu(info)
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_cuda_cp_diag_from_coo

@ -0,0 +1,161 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_cp_elg_from_coo(a,b,info)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_coo
use psi_ext_util_mod
use psb_cuda_env_mod
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
!locals
Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, &
& ir, ic, ld, ldv, hacksize
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name
type(psb_c_coo_sparse_mat) :: tmp
integer(psb_ipk_), allocatable :: idisp(:)
info = psb_success_
hacksize = max(1,psb_cuda_WarpSize())
if (b%is_dev()) call b%sync()
if (b%is_by_rows()) then
call psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize)
if (c_associated(a%deviceMat)) then
call freeEllDevice(a%deviceMat)
endif
nr = b%get_nrows()
nc = b%get_ncols()
nza = b%get_nzeros()
info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1)
if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, &
& a%irn,idisp,b%ja,b%val, a%deviceMat)
call a%set_dev()
else
call b%cp_to_coo(tmp,info)
call psi_c_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize)
if (c_associated(a%deviceMat)) then
call freeEllDevice(a%deviceMat)
endif
nr = b%get_nrows()
nc = b%get_ncols()
nza = b%get_nzeros()
info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1)
if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, &
& a%irn,idisp,tmp%ja,tmp%val, a%deviceMat)
call a%set_dev()
end if
if (info /= psb_success_) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
contains
subroutine psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize)
use psb_base_mod
use psi_ext_util_mod
implicit none
class(psb_c_ell_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), allocatable, intent(out) :: idisp(:)
integer(psb_ipk_), intent(out) :: info, nzm, ldv
integer(psb_ipk_), intent(in), optional :: hacksize
!locals
Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, &
& ir, ic, hsz_
real(psb_dpk_) :: t0,t1
logical, parameter :: timing=.true.
info = psb_success_
nr = b%get_nrows()
nc = b%get_ncols()
nza = b%get_nzeros()
hsz_ = 1
if (present(hacksize)) then
if (hacksize> 1) hsz_ = hacksize
end if
! Make ldv a multiple of hacksize
ldv = ((nr+hsz_-1)/hsz_)*hsz_
! If it is sorted then we can lessen memory impact
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
! First compute the number of nonzeros in each row.
call psb_realloc(nr,a%irn,info)
if (info == psb_success_) call psb_realloc(nr+1,idisp,info)
if (info /= psb_success_) return
if (timing) t0=psb_wtime()
a%irn = 0
do i=1, nza
ir = b%ia(i)
a%irn(ir) = a%irn(ir) + 1
end do
nzm = 0
a%nzt = 0
idisp(1) = 0
do i=1,nr
nzm = max(nzm,a%irn(i))
a%nzt = a%nzt + a%irn(i)
idisp(i+1) = a%nzt
end do
end subroutine psi_c_count_ell_from_coo
end subroutine psb_c_cuda_cp_elg_from_coo

@ -0,0 +1,89 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_fmt
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
!locals
type(psb_c_coo_sparse_mat) :: tmp
Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name
type(elldev_parms) :: gpu_parms
info = psb_success_
if (b%is_dev()) call b%sync()
select type (b)
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b,info)
class is (psb_c_ell_sparse_mat)
nzm = psb_size(b%ja,2)
m = b%get_nrows()
nc = b%get_ncols()
nza = b%get_nzeros()
gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1)
ld = gpu_parms%pitch
nzm = gpu_parms%maxRowSize
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info)
if (info == 0) call psb_safe_cpy( b%irn, a%irn , info)
if (info == 0) call psb_safe_cpy( b%ja , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val, a%val , info)
if (info == 0) call psb_realloc(ld,nzm,a%ja,info)
if (info == 0) then
a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm)
end if
if (info == 0) call psb_realloc(ld,nzm,a%val,info)
if (info == 0) then
a%val(1:m,1:nzm) = b%val(1:m,1:nzm)
end if
a%nzt = nza
call a%to_gpu(info)
class default
call b%cp_to_coo(tmp,info)
if (info == psb_success_) call a%mv_from_coo(tmp,info)
end select
end subroutine psb_c_cuda_cp_elg_from_fmt

@ -0,0 +1,63 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info)
use psb_base_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_cp_hdiag_from_coo
use psb_cuda_env_mod
implicit none
class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
!locals
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name
info = psb_success_
a%hacksize = psb_cuda_WarpSize()
call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info)
call a%to_gpu(info)
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_cuda_cp_hdiag_from_coo

@ -0,0 +1,190 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info)
use psb_base_mod
use hlldev_mod
use psb_vectordev_mod
use psb_cuda_env_mod
use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_coo
implicit none
class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
!locals
type(psb_c_coo_sparse_mat) :: tmp
integer(psb_ipk_) :: debug_level, debug_unit, hksz
integer(psb_ipk_), allocatable :: idisp(:)
character(len=20) :: name='hll_from_coo'
Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs
integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc
integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:)
real(psb_dpk_), allocatable :: val(:)
logical, parameter :: debug=.false.
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
hksz = max(1,psb_cuda_WarpSize())
if (b%is_by_rows()) then
nr = b%get_nrows()
nc = b%get_ncols()
nza = b%get_nzeros()
if (debug) write(0,*) 'Copying through GPU',nza
call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info)
if (info /=0) then
write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz
return
end if
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
if (c_associated(a%deviceMat)) then
call freeHllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,&
& a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat)
call a%set_dev()
else
! This is to guarantee tmp%is_by_rows()
call b%cp_to_coo(tmp,info)
call tmp%fix(info)
nr = tmp%get_nrows()
nc = tmp%get_ncols()
nza = tmp%get_nzeros()
if (debug) write(0,*) 'Copying through GPU'
call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info)
if (info /=0) then
write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz
return
end if
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
if (c_associated(a%deviceMat)) then
call freeHllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,&
& a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat)
call tmp%free()
call a%set_dev()
end if
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
contains
subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info)
use psb_base_mod
use psi_ext_util_mod
implicit none
class(psb_c_hll_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), allocatable, intent(out) :: idisp(:)
integer(psb_ipk_), intent(in) :: hksz
integer(psb_ipk_), intent(out) :: info, noffs, isz
!locals
Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs
integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc
logical, parameter :: debug=.false.
info = 0
nr = b%get_nrows()
nc = b%get_ncols()
nza = b%get_nzeros()
! If it is sorted then we can lessen memory impact
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza
! First compute the number of nonzeros in each row.
call psb_realloc(nr,a%irn,info)
if (info == 0) call psb_realloc(nr+1,idisp,info)
if (info /= 0) return
a%irn = 0
if (debug) then
do i=1, nza
if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then
a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1
else
write(0,*) 'Out of bouds IA ',i,b%ia(i),nr
end if
end do
else
do i=1, nza
a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1
end do
end if
a%nzt = nza
! Second. Figure out the block offsets.
call a%set_hksz(hksz)
noffs = (nr+hksz-1)/hksz
call psb_realloc(noffs+1,a%hkoffs,info)
if (debug) write(0,*) ' noffsets ',noffs,info
if (info /= 0) return
a%hkoffs(1) = 0
j=1
idisp(1) = 0
do i=1,nr,hksz
ir = min(hksz,nr-i+1)
mxrwl = a%irn(i)
idisp(i+1) = idisp(i) + a%irn(i)
do k=1,ir-1
idisp(i+k+1) = idisp(i+k) + a%irn(i+k)
mxrwl = max(mxrwl,a%irn(i+k))
end do
a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz
j = j + 1
end do
!
! At this point a%hkoffs(noffs+1) contains the allocation
! size a%ja a%val.
!
isz = a%hkoffs(noffs+1)
!!$ write(*,*) 'End of psi_comput_hckoff ',info
end subroutine psi_compute_hckoff_from_coo
end subroutine psb_c_cuda_cp_hlg_from_coo

@ -0,0 +1,62 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info)
use psb_base_mod
use hlldev_mod
use psb_vectordev_mod
use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_fmt
implicit none
class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b,info)
class default
call a%psb_c_hll_sparse_mat%cp_from_fmt(b,info)
if (info == 0) call a%to_gpu(info)
end select
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_cuda_cp_hlg_from_fmt

@ -0,0 +1,58 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
#if CUDA_SHORT_VERSION <= 10
subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info)
use psb_base_mod
use cusparse_mod
use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_coo
implicit none
class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%cp_from_coo(b,info)
if (info /= 0) goto 9999
call a%to_gpu(info)
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_cuda_cp_hybg_from_coo
#endif

@ -0,0 +1,56 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
#if CUDA_SHORT_VERSION <= 10
subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info)
use psb_base_mod
use cusparse_mod
use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_fmt
implicit none
class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b,info)
class default
call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info)
if (info /= 0) return
call a%to_gpu(info)
end select
end subroutine psb_c_cuda_cp_hybg_from_fmt
#endif

@ -0,0 +1,62 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz)
use psb_base_mod
use cusparse_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_allocate_mnnz
implicit none
integer(psb_ipk_), intent(in) :: m,n
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
Integer(Psb_ipk_) :: err_act, info, nz_,ld
character(len=20) :: name='allocate_mnz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_c_csr_sparse_mat%allocate(m,n,nz)
info = initFcusparse()
if (info == 0) call a%to_gpu(info,nzrm=nz)
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_csrg_allocate_mnnz

@ -0,0 +1,126 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use cusparse_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmm
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
complex(psb_spk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
complex(psb_spk_), allocatable :: acc(:)
type(c_ptr) :: gpX, gpY
logical :: tra
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='d_csrg_csmm'
logical, parameter :: debug=.false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
if (tra) then
call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
else
!
! Just to test, move X/Y to/from the GPU.
!
nxy = min(size(x,2),size(y,2))
if (info == 0) &
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_complex_float)
if (info == 0) &
& info = writeMultiVecDevice(gpX,x,nxy)
if (info == 0) &
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_complex_float)
if (info == 0) &
& info = writeMultiVecDevice(gpY,y,nxy)
if (info == 0) &
& info = spmvCSRGDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) &
& info = readMultiVecDevice(gpY,y,nxy)
if (info /= 0) goto 9999
call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY)
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_csrg_csmm

@ -0,0 +1,131 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use cusparse_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmv
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc
complex(psb_spk_) :: acc
type(c_ptr) :: gpX
type(c_ptr) :: gpY
logical :: tra
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_csrg_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
if (tra) then
call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
else
!
! Just to test, move X/Y to/from the GPU.
!
if (info == 0) &
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_complex_float)
if (alpha /= dzero) then
if (info == 0) &
& info = writeMultiVecDevice(gpX,x)
end if
if (info == 0) &
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_complex_float)
if (beta /= dzero) then
if (info == 0) &
& info = writeMultiVecDevice(gpY,y)
end if
if (info == 0) &
& info = spmvCSRGDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) &
& info = readMultiVecDevice(gpY,y)
if (info /= 0) goto 9999
call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY)
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_csrg_csmv

@ -0,0 +1,67 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_from_gpu(a,info)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_from_gpu
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: m, n, nz
info = 0
if (.not.(c_associated(a%deviceMat%mat))) then
call a%free()
return
end if
info = CSRGDeviceGetParms(a%deviceMat,m,n,nz)
if (info /= psb_success_) return
if (info == 0) call psb_realloc(m+1,a%irp,info)
if (info == 0) call psb_realloc(nz,a%ja,info)
if (info == 0) call psb_realloc(nz,a%val,info)
if (info == 0) info = &
& CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030)
a%irp(:) = a%irp(:)+1
a%ja(:) = a%ja(:)+1
#endif
call a%set_sync()
end subroutine psb_c_cuda_csrg_from_gpu

@ -0,0 +1,125 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_inner_vect_sv
use psb_c_cuda_vect_mod
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_csrg_inner_vect_sv'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
! This is the base version. If we get here
! it means the derived class is incomplete,
! so we throw an error.
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra.or.(beta/=dzero)) then
call x%sync()
call y%sync()
call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans)
call y%set_host()
else
select type (xx => x)
type is (psb_c_vect_cuda)
select type(yy => y)
type is (psb_c_vect_cuda)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,&
& beta,yy%deviceVect)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/))
info = psb_err_from_subroutine_ai_
goto 9999
end if
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
end if
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name, a_err='csrg_vect_sv')
goto 9999
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_csrg_inner_vect_sv

@ -0,0 +1,65 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_mold(a,b,info)
use psb_base_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_mold
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='csrg_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_c_cuda_csrg_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_csrg_mold

@ -0,0 +1,64 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_reallocate_nz(nz,a)
use psb_base_mod
use cusparse_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_reallocate_nz
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: m, nzrm,ld
Integer(Psb_ipk_) :: err_act, info
character(len=20) :: name='c_cuda_csrg_reallocate_nz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
!
! What should this really do???
!
call a%psb_c_csr_sparse_mat%reallocate(nz)
call a%to_gpu(info,nzrm=nz)
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_csrg_reallocate_nz

@ -0,0 +1,67 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_scal(d,a,info,side)
use psb_base_mod
use cusparse_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scal
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_dev()) call a%sync()
call a%psb_c_csr_sparse_mat%scal(d,info,side=side)
if (info /= 0) goto 9999
call a%to_gpu(info)
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_csrg_scal

@ -0,0 +1,65 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_scals(d,a,info)
use psb_base_mod
use cusparse_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scals
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_dev()) call a%sync()
call a%psb_c_csr_sparse_mat%scal(d,info)
if (info /= 0) goto 9999
call a%to_gpu(info)
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_csrg_scals

@ -0,0 +1,378 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm)
use psb_base_mod
use cusparse_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nzrm
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz
integer(psb_ipk_) :: nzdi,i,j,k,nrz
integer(psb_ipk_), allocatable :: irpdi(:),jadi(:)
complex(psb_spk_), allocatable :: valdi(:)
info = 0
if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return
m = a%get_nrows()
n = a%get_ncols()
nz = a%get_nzeros()
if (c_associated(a%deviceMat%Mat)) then
info = CSRGDeviceFree(a%deviceMat)
end if
#if (CUDA_SHORT_VERSION <= 10 )
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
if ((info == 0) .and. a%is_triangle()) then
!info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = cone
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = cone
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
!!$ if (info == 0) then
!!$ if (a%is_unit()) then
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
!!$ else
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
!!$ end if
!!$ end if
if ((info == 0) .and. a%is_triangle()) then
!info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif
if ((info == 0) .and. a%is_triangle()) then
info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
end if
#elif CUDA_VERSION < 11030
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
!!$ write(0,*) 'Done deviceAlloc'
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero)
!!$ write(0,*) 'Done SetIndexBase'
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
if ((info == 0) .and. a%is_triangle()) then
info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info)
if (info == 0) then
irpdi(1) = 0
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = cone
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = cone
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
!!$ write(0,*) 'Done deviceAlloc', info
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,&
& cusparse_index_base_zero)
!!$ write(0,*) 'Done setIndexBase', info
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
if ((info == 0) .and. a%is_triangle()) then
info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
nzdi=a%irp(m+1)-1
if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info)
if (info == 0) then
irpdi(1:m+1) = a%irp(1:m+1) -1
jadi(1:nzdi) = a%ja(1:nzdi) -1
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val)
!!$ write(0,*) 'Done Host2Device', info
endif
#elif 0
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
if ((info == 0) .and. a%is_triangle()) then
!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = cone
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = cone
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
if ((info == 0) .and. a%is_triangle()) then
!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif
!!$ if ((info == 0) .and. a%is_triangle()) then
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
!!$ end if
#else
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
if ((info == 0) .and. a%is_triangle()) then
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = cone
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = cone
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif
#endif
call a%set_sync()
if (info /= 0) then
write(0,*) 'Error in CSRG_TO_GPU ',info
end if
end subroutine psb_c_cuda_csrg_to_gpu

@ -0,0 +1,117 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use cusparse_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_vect_mv
use psb_c_cuda_vect_mod
implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_csrg_vect_mv'
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
if (.not.x%is_host()) call x%sync()
if (beta /= czero) then
if (.not.y%is_host()) call y%sync()
end if
call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
call y%set_host()
else
if (a%is_host()) call a%sync()
select type (xx => x)
type is (psb_c_vect_cuda)
select type(yy => y)
type is (psb_c_vect_cuda)
if (xx%is_host()) call xx%sync()
if (beta /= czero) then
if (yy%is_host()) call yy%sync()
end if
info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,&
& beta,yy%deviceVect)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/))
info = psb_err_from_subroutine_ai_
goto 9999
end if
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
end if
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_csrg_vect_mv

@ -0,0 +1,127 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use diagdev_mod
use psb_vectordev_mod
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_csmv
implicit none
class(psb_c_cuda_diag_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer, intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer :: i,j,k,m,n, nnz, ir, jc
complex(psb_spk_) :: acc
type(c_ptr) :: gpX, gpY
logical :: tra
Integer :: err_act
character(len=20) :: name='c_cuda_diag_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
if (tra) then
call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
else
!
! Just to test, move X/Y to/from the GPU.
!
if (info == 0) &
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_double)
if (alpha /= dzero) then
if (info == 0) &
& info = writeMultiVecDevice(gpX,x)
end if
if (info == 0) &
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_double)
if (beta /= dzero) then
if (info == 0) &
& info = writeMultiVecDevice(gpY,y)
end if
if (info == 0) &
& info = spmvDiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) &
& info = readMultiVecDevice(gpY,y)
if (info /= 0) goto 9999
call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY)
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_diag_csmv

@ -0,0 +1,65 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_diag_mold(a,b,info)
use psb_base_mod
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_mold
implicit none
class(psb_c_cuda_diag_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='diag_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_c_cuda_diag_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_diag_mold

@ -0,0 +1,66 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm)
use psb_base_mod
use diagdev_mod
use psb_vectordev_mod
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu
use iso_c_binding
implicit none
class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nzrm
integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d
type(diagdev_parms) :: gpu_parms
info = 0
if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return
n = size(a%data,1)
d = size(a%data,2)
c = a%get_ncols()
!allocsize = a%get_size()
!write(*,*) 'Create the DIAG matrix'
gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_float)
if (c_associated(a%deviceMat)) then
call freeDiagDevice(a%deviceMat)
endif
info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_float)
if (info == 0) info = &
& writeDiagDevice(a%deviceMat,a%data,a%offset,n)
! if (info /= 0) goto 9999
end subroutine psb_c_cuda_diag_to_gpu

@ -0,0 +1,116 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use diagdev_mod
use psb_vectordev_mod
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_vect_mv
use psb_c_cuda_vect_mod
implicit none
class(psb_c_cuda_diag_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_diag_vect_mv'
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
if (.not.x%is_host()) call x%sync()
if (beta /= szero) then
if (.not.y%is_host()) call y%sync()
end if
call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
call y%set_host()
else
if (a%is_host()) call a%sync()
select type (xx => x)
type is (psb_c_vect_cuda)
select type(yy => y)
type is (psb_c_vect_cuda)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,&
& beta,yy%deviceVect)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/))
info = psb_err_from_subroutine_ai_
goto 9999
end if
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
end if
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_diag_vect_mv

@ -0,0 +1,428 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use psb_c_cuda_vect_mod
use dnsdev_mod
use psb_c_vectordev_mod
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_vect_mv
implicit none
class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
logical :: tra
character :: trans_
complex(psb_spk_), allocatable :: rx(:), ry(:)
Integer(Psb_ipk_) :: err_act, m, n, k
character(len=20) :: name='c_cuda_dnsg_vect_mv'
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = psb_toupper(trans)
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
if (trans_ =='N') then
m = a%get_nrows()
n = 1
k = a%get_ncols()
else
m = a%get_ncols()
n = 1
k = a%get_nrows()
end if
select type (xx => x)
type is (psb_c_vect_cuda)
select type(yy => y)
type is (psb_c_vect_cuda)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (beta /= czero) then
if (yy%is_host()) call yy%sync()
end if
info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,&
& xx%deviceVect,beta,yy%deviceVect)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/))
info = psb_err_from_subroutine_ai_
goto 9999
end if
call yy%set_dev()
class default
if (a%is_dev()) call a%sync()
rx = xx%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
class default
if (a%is_dev()) call a%sync()
rx = x%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_dnsg_vect_mv
subroutine psb_c_cuda_dnsg_mold(a,b,info)
use psb_base_mod
use psb_c_cuda_vect_mod
use dnsdev_mod
use psb_c_vectordev_mod
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_mold
implicit none
class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='dnsg_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_c_cuda_dnsg_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_dnsg_mold
!!$
!!$ interface
!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans)
!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type
!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a
!!$ complex(psb_spk_), intent(in) :: alpha, beta
!!$ class(psb_c_base_vect_type), intent(inout) :: x, y
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character, optional, intent(in) :: trans
!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv
!!$ end interface
!!$ interface
!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a)
!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_
!!$ integer(psb_ipk_), intent(in) :: nz
!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz
!!$ end interface
!!$
!!$ interface
!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz)
!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_
!!$ integer(psb_ipk_), intent(in) :: m,n
!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
!!$ integer(psb_ipk_), intent(in), optional :: nz
!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz
!!$ end interface
subroutine psb_c_cuda_dnsg_to_gpu(a,info)
use psb_base_mod
use psb_c_cuda_vect_mod
use dnsdev_mod
use psb_c_vectordev_mod
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_to_gpu
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
Integer(Psb_ipk_) :: err_act, pitch, lda
logical, parameter :: debug=.false.
character(len=20) :: name='c_cuda_dnsg_to_gpu'
call psb_erractionsave(err_act)
info = psb_success_
if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2)
info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),&
& spgpu_type_complex_float,1)
if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2))
if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_dnsg_to_gpu
subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info)
use psb_base_mod
use psb_c_cuda_vect_mod
use dnsdev_mod
use psb_c_vectordev_mod
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_coo
implicit none
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_dnsg_cp_from_coo'
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: debug=.false.
type(psb_c_coo_sparse_mat) :: tmp
call psb_erractionsave(err_act)
info = psb_success_
if (b%is_dev()) call b%sync()
call a%psb_c_dns_sparse_mat%cp_from_coo(b,info)
if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info
if (info == 0) call a%to_gpu(info)
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_cp_dnsg_from_coo
subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info)
use psb_base_mod
use psb_c_cuda_vect_mod
use dnsdev_mod
use psb_c_vectordev_mod
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_fmt
implicit none
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
type(psb_c_coo_sparse_mat) :: tmp
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_dnsg_cp_from_fmt'
call psb_erractionsave(err_act)
info = psb_success_
if (b%is_dev()) call b%sync()
select type (b)
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b,info)
!!$ class is (psb_c_ell_sparse_mat)
!!$ nzm = psb_size(b%ja,2)
!!$ m = b%get_nrows()
!!$ nc = b%get_ncols()
!!$ nza = b%get_nzeros()
!!$#ifdef HAVE_SPGPU
!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1)
!!$ ld = gpu_parms%pitch
!!$ nzm = gpu_parms%maxRowSize
!!$#else
!!$ ld = m
!!$#endif
!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info)
!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info)
!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info)
!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info)
!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info)
!!$ if (info == 0) then
!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm)
!!$ end if
!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info)
!!$ if (info == 0) then
!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm)
!!$ end if
!!$ a%nzt = nza
!!$#ifdef HAVE_SPGPU
!!$ call a%to_gpu(info)
!!$#endif
class default
call b%cp_to_coo(tmp,info)
if (info == psb_success_) call a%mv_from_coo(tmp,info)
end select
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_cp_dnsg_from_fmt
subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info)
use psb_base_mod
use psb_c_cuda_vect_mod
use dnsdev_mod
use psb_c_vectordev_mod
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_coo
implicit none
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
Integer(Psb_ipk_) :: err_act
logical, parameter :: debug=.false.
character(len=20) :: name='c_cuda_dnsg_mv_from_coo'
call psb_erractionsave(err_act)
info = psb_success_
if (.not.b%is_by_rows()) call b%fix(info)
if (info /= psb_success_) return
if (b%is_dev()) call b%sync()
call a%cp_from_coo(b,info)
if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info
call b%free()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_mv_dnsg_from_coo
subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info)
use psb_base_mod
use psb_c_cuda_vect_mod
use dnsdev_mod
use psb_c_vectordev_mod
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_fmt
implicit none
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
type(psb_c_coo_sparse_mat) :: tmp
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_dnsg_cp_from_fmt'
call psb_erractionsave(err_act)
info = psb_success_
if (b%is_dev()) call b%sync()
select type (b)
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b,info)
!!$ class is (psb_c_ell_sparse_mat)
!!$ nzm = psb_size(b%ja,2)
!!$ m = b%get_nrows()
!!$ nc = b%get_ncols()
!!$ nza = b%get_nzeros()
!!$#ifdef HAVE_SPGPU
!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1)
!!$ ld = gpu_parms%pitch
!!$ nzm = gpu_parms%maxRowSize
!!$#else
!!$ ld = m
!!$#endif
!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info)
!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info)
!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info)
!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info)
!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info)
!!$ if (info == 0) then
!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm)
!!$ end if
!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info)
!!$ if (info == 0) then
!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm)
!!$ end if
!!$ a%nzt = nza
!!$#ifdef HAVE_SPGPU
!!$ call a%to_gpu(info)
!!$#endif
class default
call b%mv_to_coo(tmp,info)
if (info == psb_success_) call a%mv_from_coo(tmp,info)
end select
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_mv_dnsg_from_fmt

@ -0,0 +1,99 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_allocate_mnnz
implicit none
integer(psb_ipk_), intent(in) :: m,n
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
Integer(Psb_ipk_) :: err_act, info, nz_,ld
character(len=20) :: name='allocate_mnz'
logical, parameter :: debug=.false.
type(elldev_parms) :: gpu_parms
call psb_erractionsave(err_act)
info = psb_success_
if (m < 0) then
info = psb_err_iarg_neg_
call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/))
goto 9999
endif
if (n < 0) then
info = psb_err_iarg_neg_
call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/))
goto 9999
endif
if (present(nz)) then
nz_ = (max(nz,ione) + m -1 )/m
else
nz_ = (max(7*m,7*n,ione)+m-1)/m
end if
if (nz_ < 0) then
info = psb_err_iarg_neg_
call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/))
goto 9999
endif
gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_float,1)
ld = gpu_parms%pitch
nz_ = gpu_parms%maxRowSize
if (info == psb_success_) call psb_realloc(m,a%irn,info)
if (info == psb_success_) call psb_realloc(m,a%idiag,info)
if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info)
if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info)
if (info == psb_success_) then
a%irn = 0
a%idiag = 0
a%nzt = 0
call a%set_nrows(m)
call a%set_ncols(n)
call a%set_bld()
call a%set_triangle(.false.)
call a%set_unit(.false.)
call a%set_dupl(psb_dupl_def_)
end if
call a%to_gpu(info,nzrm=nz_)
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_allocate_mnnz

@ -0,0 +1,64 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_asb(a)
use psb_base_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_asb
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: err_act, info
character(len=20) :: name='elg_asb'
logical :: clear_
logical, parameter :: debug=.false.
real(psb_dpk_), allocatable :: valt(:,:)
integer(psb_ipk_), allocatable :: jat(:,:)
integer(psb_ipk_) :: nr, nc
call psb_erractionsave(err_act)
info = psb_success_
! Only call sync() if we are on host
if (a%is_host()) then
call a%sync()
end if
call a%set_asb()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_asb

@ -0,0 +1,124 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmm
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
complex(psb_spk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
complex(psb_spk_), allocatable :: acc(:)
type(c_ptr) :: gpX, gpY
logical :: tra
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_elg_csmm'
logical, parameter :: debug=.false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
if (tra) then
if (a%is_dev()) call a%sync()
call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans)
else
!
! Just to test, move X/Y to/from the GPU.
!
nxy = min(size(x,2),size(y,2))
if (info == 0) &
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_complex_float)
if (info == 0) &
& info = writeMultiVecDevice(gpX,x,nxy)
if (info == 0) &
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_complex_float)
if (info == 0) &
& info = writeMultiVecDevice(gpY,y,nxy)
if (info == 0) &
& info = spmvEllDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) &
& info = readMultiVecDevice(gpY,y,nxy)
if (info /= 0) goto 9999
call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY)
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_csmm

@ -0,0 +1,127 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmv
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc
complex(psb_spk_) :: acc
type(c_ptr) :: gpX, gpY
logical :: tra
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='d_elg_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
if (tra) then
if (a%is_dev()) call a%sync()
call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans)
else
!
! Just to test, move X/Y to/from the GPU.
!
if (info == 0) &
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_complex_float)
if (alpha /= dzero) then
if (info == 0) &
& info = writeMultiVecDevice(gpX,x)
end if
if (info == 0) &
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_complex_float)
if (beta /= dzero) then
if (info == 0) &
& info = writeMultiVecDevice(gpY,y)
end if
if (info == 0) &
& info = spmvEllDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) &
& info = readMultiVecDevice(gpY,y)
if (info /= 0) goto 9999
call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY)
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_csmv

@ -0,0 +1,239 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
use psb_base_mod
use iso_c_binding
use elldev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_a
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: val(:)
integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_elg_csput_a'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit
real(psb_dpk_) :: t1,t2,t3
type(c_ptr) :: devIdxUpd
call psb_erractionsave(err_act)
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
!!$ write(0,*) 'In ELG_csput_a'
if (nz <= 0) then
info = psb_err_iarg_neg_
int_err(1)=1
call psb_errpush(info,name,i_err=int_err)
goto 9999
end if
if (size(ia) < nz) then
info = psb_err_input_asize_invalid_i_
int_err(1)=2
call psb_errpush(info,name,i_err=int_err)
goto 9999
end if
if (size(ja) < nz) then
info = psb_err_input_asize_invalid_i_
int_err(1)=3
call psb_errpush(info,name,i_err=int_err)
goto 9999
end if
if (size(val) < nz) then
info = psb_err_input_asize_invalid_i_
int_err(1)=4
call psb_errpush(info,name,i_err=int_err)
goto 9999
end if
if (nz == 0) return
if (a%is_bld()) then
! Build phase should only ever be in COO
info = psb_err_invalid_mat_state_
else if (a%is_upd()) then
!!$ write(*,*) 'elg_csput_a '
if (a%is_dev()) call a%sync()
call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,&
& imin,imax,jmin,jmax,info)
if (info /= psb_success_) then
call psb_errpush(info,name)
goto 9999
end if
call a%set_host()
else
! State is wrong.
info = psb_err_invalid_mat_state_
end if
if (info /= psb_success_) then
call psb_errpush(info,name)
goto 9999
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_csput_a
subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
use psb_base_mod
use iso_c_binding
use elldev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_v
use psb_c_cuda_vect_mod
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
class(psb_c_base_vect_type), intent(inout) :: val
class(psb_i_base_vect_type), intent(inout) :: ia, ja
integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_elg_csput_v'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw
logical :: gpu_invoked
real(psb_dpk_) :: t1,t2,t3
type(c_ptr) :: devIdxUpd
integer(psb_ipk_), allocatable :: idxs(:)
logical, parameter :: debug_idxs=.false., debug_vals=.false.
call psb_erractionsave(err_act)
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
! write(0,*) 'In ELG_csput_v'
if (nz <= 0) then
info = psb_err_iarg_neg_
int_err(1)=1
call psb_errpush(info,name,i_err=int_err)
goto 9999
end if
if (ia%get_nrows() < nz) then
info = psb_err_input_asize_invalid_i_
int_err(1)=2
call psb_errpush(info,name,i_err=int_err)
goto 9999
end if
if (ja%get_nrows() < nz) then
info = psb_err_input_asize_invalid_i_
int_err(1)=3
call psb_errpush(info,name,i_err=int_err)
goto 9999
end if
if (val%get_nrows() < nz) then
info = psb_err_input_asize_invalid_i_
int_err(1)=4
call psb_errpush(info,name,i_err=int_err)
goto 9999
end if
if (nz == 0) return
if (a%is_bld()) then
! Build phase should only ever be in COO
info = psb_err_invalid_mat_state_
else if (a%is_upd()) then
t1=psb_wtime()
gpu_invoked = .false.
select type (ia)
class is (psb_i_vect_cuda)
select type (ja)
class is (psb_i_vect_cuda)
select type (val)
class is (psb_c_vect_cuda)
if (a%is_host()) call a%sync()
if (val%is_host()) call val%sync()
if (ia%is_host()) call ia%sync()
if (ja%is_host()) call ja%sync()
info = csputEllDeviceFloatComplex(a%deviceMat,nz,&
& ia%deviceVect,ja%deviceVect,val%deviceVect)
call a%set_dev()
gpu_invoked=.true.
end select
end select
end select
if (.not.gpu_invoked) then
!!$ write(0,*)'Not gpu_invoked '
if (a%is_dev()) call a%sync()
call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,&
& imin,imax,jmin,jmax,info)
call a%set_host()
end if
if (info /= 0) then
info = psb_err_internal_error_
end if
else
! State is wrong.
info = psb_err_invalid_mat_state_
end if
if (info /= psb_success_) then
call psb_errpush(info,name)
goto 9999
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_csput_v

@ -0,0 +1,67 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_from_gpu(a,info)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_from_gpu
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize
info = 0
if (.not.(c_associated(a%deviceMat))) then
call a%free()
return
end if
m = a%get_nrows()
nzm = psb_size(a%val,2)
n = a%get_ncols()
pitch = getEllDevicePitch(a%deviceMat)
maxrowsize = getEllDeviceMaxRowSize(a%deviceMat)
if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then
call psb_realloc(pitch,maxrowsize,a%val,info)
if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info)
if (info == 0) call psb_realloc(pitch,a%irn,info)
end if
if (info == 0) info = &
& readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag)
call a%set_sync()
end subroutine psb_c_cuda_elg_from_gpu

@ -0,0 +1,84 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_inner_vect_sv
use psb_c_cuda_vect_mod
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_elg_inner_vect_sv'
logical, parameter :: debug=.false.
complex(psb_spk_), allocatable :: rx(:), ry(:)
call psb_get_erraction(err_act)
! This is the base version. If we get here
! it means the derived class is incomplete,
! so we throw an error.
info = psb_success_
if (a%is_dev()) call a%sync()
if (.false.) then
rx = x%get_vect()
ry = y%get_vect()
call a%inner_spsm(alpha,rx,beta,ry,info,trans)
call y%bld(ry)
else
call x%sync()
call y%sync()
call a%psb_c_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans)
call y%set_host()
end if
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name, a_err='inner_cssm')
goto 9999
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_inner_vect_sv

@ -0,0 +1,63 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_mold(a,b,info)
use psb_base_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_mold
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='elg_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_c_cuda_elg_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_mold

@ -0,0 +1,72 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_reallocate_nz(nz,a)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_reallocate_nz
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: m, nzrm,ld
Integer(Psb_ipk_) :: err_act, info
character(len=20) :: name='c_cuda_elg_reallocate_nz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
!
! What should this really do???
!
if (a%is_dev()) call a%sync()
m = a%get_nrows()
nzrm = (max(nz,ione)+m-1)/m
ld = size(a%ja,1)
call psb_realloc(ld,nzrm,a%ja,info)
if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_alloc_dealloc_,name)
goto 9999
end if
call a%to_gpu(info,nzrm=nzrm)
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_reallocate_nz

@ -0,0 +1,71 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_scal(d,a,info,side)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scal
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
Integer(Psb_ipk_) :: err_act,mnm, i, j, m
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_dev()) call a%sync()
if (a%is_unit()) then
call a%make_nonunit()
end if
call a%psb_c_ell_sparse_mat%scal(d,info,side)
if (info /= psb_success_) goto 9999
call a%to_gpu(info)
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_scal

@ -0,0 +1,66 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_scals(d,a,info)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scals
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_dev()) call a%sync()
if (a%is_unit()) then
call a%make_nonunit()
end if
a%val(:,:) = a%val(:,:) * d
call a%to_gpu(info)
if (info /= 0) goto 9999
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_scals

@ -0,0 +1,84 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nzrm
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt
type(elldev_parms) :: gpu_parms
info = 0
if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return
m = a%get_nrows()
nzm = psb_size(a%val,2)
n = a%get_ncols()
nzt = a%get_nzeros()
if (present(nzrm)) nzm = max(nzm,nzrm)
gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_float,1)
if (c_associated(a%deviceMat)) then
pitch = getEllDevicePitch(a%deviceMat)
maxrowsize = getEllDeviceMaxRowSize(a%deviceMat)
else
pitch = -1
maxrowsize = -1
end if
if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then
if (c_associated(a%deviceMat)) then
call freeEllDevice(a%deviceMat)
endif
info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_float,1)
pitch = getEllDevicePitch(a%deviceMat)
maxrowsize = getEllDeviceMaxRowSize(a%deviceMat)
end if
if (info == 0) then
if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then
call psb_realloc(pitch,maxrowsize,a%val,info)
if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info)
end if
end if
if (info == 0) info = &
& writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag)
call a%set_sync()
end subroutine psb_c_cuda_elg_to_gpu

@ -0,0 +1,61 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_trim(a)
use psb_base_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_trim
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld
character(len=20) :: name='trim'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
m = max(1_psb_ipk_,a%get_nrows())
ld = max(1_psb_ipk_,size(a%ja,1))
nzm = max(1_psb_ipk_,maxval(a%irn(1:m)))
call psb_realloc(m,a%irn,info)
if (info == psb_success_) call psb_realloc(m,a%idiag,info)
if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info)
if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info)
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_trim

@ -0,0 +1,121 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_vect_mv
use psb_c_cuda_vect_mod
implicit none
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_elg_vect_mv'
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
if (a%is_dev()) call a%sync()
if (.not.x%is_host()) call x%sync()
if (beta /= czero) then
if (.not.y%is_host()) call y%sync()
end if
call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans)
call y%set_host()
else
if (a%is_host()) call a%sync()
select type (xx => x)
type is (psb_c_vect_cuda)
select type(yy => y)
type is (psb_c_vect_cuda)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (beta /= czero) then
if (yy%is_host()) call yy%sync()
end if
info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,&
& beta,yy%deviceVect)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/))
info = psb_err_from_subroutine_ai_
goto 9999
end if
call yy%set_dev()
class default
if (a%is_dev()) call a%sync()
rx = xx%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
class default
if (a%is_dev()) call a%sync()
rx = x%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
end if
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_elg_vect_mv

@ -0,0 +1,126 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmv
implicit none
class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer, intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer :: i,j,k,m,n, nnz, ir, jc
complex(psb_spk_) :: acc
type(c_ptr) :: gpX, gpY
logical :: tra
Integer :: err_act
character(len=20) :: name='c_cuda_hdiag_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
if (tra) then
call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
else
!
! Just to test, move X/Y to/from the GPU.
!
if (info == 0) &
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_double)
if (alpha /= dzero) then
if (info == 0) &
& info = writeMultiVecDevice(gpX,x)
end if
if (info == 0) &
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_double)
if (beta /= dzero) then
if (info == 0) &
& info = writeMultiVecDevice(gpY,y)
end if
if (info == 0) &
& info = spmvHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) &
& info = readMultiVecDevice(gpY,y)
if (info /= 0) goto 9999
call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY)
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_hdiag_csmv

@ -0,0 +1,64 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_hdiag_mold(a,b,info)
use psb_base_mod
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_mold
implicit none
class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='hdiag_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_c_cuda_hdiag_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_hdiag_mold

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save