Merge branch 'repackage' into non-diag
commit
74cf138a6c
@ -0,0 +1,37 @@
|
||||
TOPDIR=../..
|
||||
include $(TOPDIR)/Make.inc
|
||||
#
|
||||
# Libraries used
|
||||
#
|
||||
PSBLIBDIR=$(PSBLASDIR)/lib/
|
||||
PSBINCDIR=$(PSBLASDIR)/include
|
||||
LIBDIR=$(TOPDIR)/lib
|
||||
INCDIR=$(TOPDIR)/include
|
||||
PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base
|
||||
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
|
||||
LDLIBS=$(PSBLDLIBS)
|
||||
#
|
||||
# Compilers and such
|
||||
#
|
||||
#CCOPT= -g
|
||||
FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG).
|
||||
CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. -I$(INCDIR)
|
||||
LIBNAME=libpsb_gpu.a
|
||||
|
||||
|
||||
CUDAOBJS=psi_cuda_c_CopyCooToElg.o psi_cuda_c_CopyCooToHlg.o \
|
||||
psi_cuda_d_CopyCooToElg.o psi_cuda_d_CopyCooToHlg.o \
|
||||
psi_cuda_s_CopyCooToElg.o psi_cuda_s_CopyCooToHlg.o \
|
||||
psi_cuda_z_CopyCooToElg.o psi_cuda_z_CopyCooToHlg.o
|
||||
|
||||
|
||||
|
||||
objs: $(CUDAOBJS)
|
||||
|
||||
lib: objs
|
||||
ar cur ../$(LIBNAME) $(CUDAOBJS)
|
||||
|
||||
$(CUDAOBJS): psi_cuda_common.cuh psi_cuda_CopyCooToElg.cuh psi_cuda_CopyCooToHlg.cuh
|
||||
|
||||
clean:
|
||||
/bin/rm -f $(CUDAOBJS)
|
@ -0,0 +1,104 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "vectordev.h"
|
||||
#include "psi_cuda_common.cuh"
|
||||
|
||||
|
||||
#undef GEN_PSI_FUNC_NAME
|
||||
#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToElg)
|
||||
|
||||
#define THREAD_BLOCK 256
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza,
|
||||
int baseIdx, int hacksz, int ldv, int nzm,
|
||||
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
|
||||
int *idiag, int *rP, VALUE_TYPE *cM);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza,
|
||||
int baseIdx, int hacksz, int ldv, int nzm,
|
||||
int *rS, int *devIdisp, int *devJa, VALUE_TYPE *devVal,
|
||||
int *idiag, int *rP, VALUE_TYPE *cM)
|
||||
{
|
||||
int ir, k, ipnt, rsz,jc;
|
||||
int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK);
|
||||
int i=ii+ki;
|
||||
int idval=0;
|
||||
|
||||
if (ki >= nrws) return;
|
||||
if (i >= nr) return;
|
||||
|
||||
ipnt=devIdisp[i];
|
||||
rsz=rS[i];
|
||||
ir = i;
|
||||
for (k=0; k<rsz; k++) {
|
||||
if (devJa[ipnt] == i+baseIdx) idval = ipnt-devIdisp[i]+baseIdx;
|
||||
rP[ir] = devJa[ipnt];
|
||||
cM[ir] = devVal[ipnt];
|
||||
ir += ldv;
|
||||
ipnt++;
|
||||
}
|
||||
// Here we are assuming that devJa[] has at least one valid entry
|
||||
// Pick one valid value.
|
||||
jc = devJa[devIdisp[1]];
|
||||
for (k=rsz; k<nzm; k++) {
|
||||
rP[ir] = jc;
|
||||
cM[ir] = CONCAT(zero_,VALUE_TYPE)();
|
||||
ir += ldv;
|
||||
}
|
||||
idiag[i]=idval;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, int i, int nr, int nza,
|
||||
int baseIdx, int hacksz, int ldv, int nzm,
|
||||
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
|
||||
int *idiag, int *rP, VALUE_TYPE *cM)
|
||||
{
|
||||
dim3 block (THREAD_BLOCK, 1);
|
||||
dim3 grid ((nrws + THREAD_BLOCK - 1) / THREAD_BLOCK);
|
||||
|
||||
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)
|
||||
<<< grid, block, 0, handle->currentStream >>>(i,nrws, nr, nza, baseIdx, hacksz, ldv, nzm,
|
||||
rS,devIdisp,devJa,devVal,idiag, rP,cM);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void
|
||||
GEN_PSI_FUNC_NAME(TYPE_SYMBOL)
|
||||
(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm,
|
||||
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
|
||||
int *idiag, int *rP, VALUE_TYPE *cM)
|
||||
{ int i,j, nrws;
|
||||
//int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX;
|
||||
int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX);
|
||||
|
||||
|
||||
//fprintf(stderr,"Loop on j: %d\n",j);
|
||||
for (i=0; i<nr; i+=nrws) {
|
||||
nrws = MIN(maxNForACall, nr - i);
|
||||
//fprintf(stderr,"ifirst: %d i : %d nrws: %d i + ifirst + (nrws -1) -1 %d \n",ifirst,i,nrws,i + ifirst + (nrws -1) -1);
|
||||
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(handle,nrws,i, nr, nza, baseIdx, hacksz, ldv, nzm,
|
||||
rS,devIdisp, devJa, devVal, idiag, rP, cM);
|
||||
}
|
||||
}
|
@ -0,0 +1,108 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "vectordev.h"
|
||||
#include "psi_cuda_common.cuh"
|
||||
|
||||
|
||||
#undef GEN_PSI_FUNC_NAME
|
||||
#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToHlg)
|
||||
|
||||
#define THREAD_BLOCK 256
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz,
|
||||
int noffs, int isz, int *rS, int *hackOffs, int *devIdisp,
|
||||
int *devJa, VALUE_TYPE *devVal,
|
||||
int *idiag, int *rP, VALUE_TYPE *cM);
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza,
|
||||
int baseIdx, int hacksz, int noffs, int isz,
|
||||
int *rS, int *hackOffs, int *devIdisp,
|
||||
int *devJa, VALUE_TYPE *devVal,
|
||||
int *idiag, int *rP, VALUE_TYPE *cM)
|
||||
{
|
||||
int ir, k, ipnt, rsz,jc;
|
||||
int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK);
|
||||
int i=ii+ki;
|
||||
|
||||
if (ki >= nrws) return;
|
||||
|
||||
|
||||
if (i<nr) {
|
||||
int hackId = i / hacksz;
|
||||
int hackLaneId = i % hacksz;
|
||||
int hackOffset = hackOffs[hackId] + hackLaneId;
|
||||
int nzm = (hackOffs[hackId+1]-hackOffs[hackId])/hacksz;
|
||||
int idval=0;
|
||||
rsz = rS[i];
|
||||
ipnt = devIdisp[i];
|
||||
ir = hackOffset;
|
||||
for (k=0; k<rsz; k++) {
|
||||
cM[ir] = devVal[ipnt];
|
||||
if (devJa[ipnt] == i+baseIdx) idval = ipnt-devIdisp[i]+baseIdx;
|
||||
rP[ir] = devJa[ipnt];
|
||||
ir += hacksz;
|
||||
ipnt++;
|
||||
}
|
||||
// Here we are assuming that devJa[] has at least one valid entry
|
||||
// Pick one valid value.
|
||||
jc = devJa[devIdisp[1]];
|
||||
for (k=rsz; k<nzm; k++) {
|
||||
rP[ir] = jc;
|
||||
cM[ir] = CONCAT(zero_,VALUE_TYPE)();
|
||||
ir += hacksz;
|
||||
}
|
||||
idiag[i]=idval;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, int i,
|
||||
int nr, int nza, int baseIdx, int hacksz, int noffs, int isz,
|
||||
int *rS, int *hackOffs, int *devIdisp, int *devJa,
|
||||
VALUE_TYPE *devVal, int *idiag, int *rP, VALUE_TYPE *cM)
|
||||
{
|
||||
dim3 block (THREAD_BLOCK, 1);
|
||||
dim3 grid ((nrws + THREAD_BLOCK - 1) / THREAD_BLOCK);
|
||||
|
||||
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)
|
||||
<<< grid, block, 0, handle->currentStream >>>(i,nrws,nr, nza, baseIdx, hacksz, noffs, isz,
|
||||
rS,hackOffs,devIdisp,devJa,devVal,idiag,rP,cM);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza,
|
||||
int baseIdx, int hacksz, int noffs, int isz,
|
||||
int *rS, int *hackOffs, int *devIdisp,
|
||||
int *devJa, VALUE_TYPE *devVal,
|
||||
int *idiag, int *rP, VALUE_TYPE *cM)
|
||||
{ int i, nrws;
|
||||
//int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX;
|
||||
int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX);
|
||||
|
||||
//fprintf(stderr,"Loop on j: %d\n",j);
|
||||
for (i=0; i<nr; i+=nrws) {
|
||||
nrws = MIN(maxNForACall, nr - i);
|
||||
//fprintf(stderr,"cpy_coo_2_hlg: i : %d nrws: %d \n", i,nrws);
|
||||
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(handle,nrws,i, nr, nza, baseIdx, hacksz, noffs, isz,
|
||||
rS, hackOffs, devIdisp, devJa, devVal, idiag, rP, cM);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "vectordev.h"
|
||||
|
||||
|
||||
#define VALUE_TYPE cuFloatComplex
|
||||
#define TYPE_SYMBOL c
|
||||
#include "psi_cuda_CopyCooToElg.cuh"
|
@ -0,0 +1,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "vectordev.h"
|
||||
|
||||
|
||||
#define VALUE_TYPE cuFloatComplex
|
||||
#define TYPE_SYMBOL c
|
||||
#include "psi_cuda_CopyCooToHlg.cuh"
|
@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#define PRE_CONCAT(A, B) A ## B
|
||||
#define CONCAT(A, B) PRE_CONCAT(A, B)
|
||||
#define MIN(A,B) ( (A)<(B) ? (A) : (B) )
|
||||
#define SQUARE(x) ((x)*(x))
|
||||
#define GET_ADDR(a,ix,iy,nc) a[(nc)*(ix)+(iy)]
|
||||
#define GET_VAL(a,ix,iy,nc) (GET_ADDR(a,ix,iy,nc))
|
||||
|
||||
__device__ __host__ static float zero_float() { return 0.0f; }
|
||||
__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); }
|
||||
|
||||
#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__)
|
||||
__device__ __host__ static double zero_double() { return 0.0; }
|
||||
__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); }
|
||||
#endif
|
@ -0,0 +1,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "vectordev.h"
|
||||
|
||||
|
||||
#define VALUE_TYPE double
|
||||
#define TYPE_SYMBOL d
|
||||
#include "psi_cuda_CopyCooToElg.cuh"
|
@ -0,0 +1,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "vectordev.h"
|
||||
|
||||
|
||||
#define VALUE_TYPE double
|
||||
#define TYPE_SYMBOL d
|
||||
#include "psi_cuda_CopyCooToHlg.cuh"
|
@ -0,0 +1,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "vectordev.h"
|
||||
|
||||
|
||||
#define VALUE_TYPE float
|
||||
#define TYPE_SYMBOL s
|
||||
#include "psi_cuda_CopyCooToElg.cuh"
|
@ -0,0 +1,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "vectordev.h"
|
||||
|
||||
|
||||
#define VALUE_TYPE float
|
||||
#define TYPE_SYMBOL s
|
||||
#include "psi_cuda_CopyCooToHlg.cuh"
|
@ -0,0 +1,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "vectordev.h"
|
||||
|
||||
|
||||
#define VALUE_TYPE cuDoubleComplex
|
||||
#define TYPE_SYMBOL z
|
||||
#include "psi_cuda_CopyCooToElg.cuh"
|
@ -0,0 +1,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "vectordev.h"
|
||||
|
||||
|
||||
#define VALUE_TYPE cuDoubleComplex
|
||||
#define TYPE_SYMBOL z
|
||||
#include "psi_cuda_CopyCooToHlg.cuh"
|
@ -0,0 +1,21 @@
|
||||
(c) Copyright 2011-2021 Davide Barbieri, Salvatore Filippone
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,143 @@
|
||||
include ../Make.inc
|
||||
#
|
||||
# Libraries used
|
||||
#
|
||||
LIBDIR=../lib
|
||||
INCDIR=../include
|
||||
MODDIR=../modules
|
||||
PSBLAS_LIB= -lpsb_util -lpsb_base
|
||||
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
|
||||
LDLIBS=$(PSBLDLIBS)
|
||||
#
|
||||
# Compilers and such
|
||||
#
|
||||
#CCOPT= -g
|
||||
FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG).
|
||||
CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I$(INCDIR)
|
||||
LIBNAME=libpsb_cuda.a
|
||||
|
||||
|
||||
FOBJS=cusparse_mod.o base_cusparse_mod.o \
|
||||
s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o \
|
||||
psb_vectordev_mod.o core_mod.o \
|
||||
psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_i_vectordev_mod.o\
|
||||
psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_base_vectordev_mod.o \
|
||||
elldev_mod.o hlldev_mod.o diagdev_mod.o hdiagdev_mod.o \
|
||||
psb_i_cuda_vect_mod.o \
|
||||
psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\
|
||||
psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\
|
||||
psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \
|
||||
psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\
|
||||
psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \
|
||||
psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\
|
||||
psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \
|
||||
psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\
|
||||
psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \
|
||||
psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\
|
||||
psb_cuda_env_mod.o psb_cuda_mod.o \
|
||||
psb_d_cuda_diag_mat_mod.o\
|
||||
psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\
|
||||
psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \
|
||||
psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o \
|
||||
dnsdev_mod.o
|
||||
|
||||
COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o\
|
||||
svectordev.o dvectordev.o cvectordev.o zvectordev.o cuda_util.o \
|
||||
fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o
|
||||
|
||||
OBJS=$(COBJS) $(FOBJS)
|
||||
|
||||
lib: objs
|
||||
|
||||
objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs
|
||||
/bin/cp -p *$(.mod) $(MODDIR)
|
||||
/bin/cp -p *.h $(INCDIR)
|
||||
|
||||
spgpuinc:
|
||||
$(MAKE) -C spgpu includes
|
||||
spgpuobjs:
|
||||
$(MAKE) -C spgpu objs
|
||||
spgpulib:
|
||||
$(MAKE) -C spgpu lib
|
||||
|
||||
lib: ilib cudalib spgpulib
|
||||
ar cur $(LIBNAME) $(OBJS)
|
||||
/bin/cp -p $(LIBNAME) $(LIBDIR)
|
||||
|
||||
dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o
|
||||
psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o
|
||||
psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o
|
||||
cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o
|
||||
s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o : base_cusparse_mod.o
|
||||
psb_d_cuda_hlg_mat_mod.o: hlldev_mod.o psb_d_cuda_vect_mod.o psb_cuda_env_mod.o
|
||||
psb_d_cuda_elg_mat_mod.o: elldev_mod.o psb_d_cuda_vect_mod.o
|
||||
psb_d_cuda_diag_mat_mod.o: diagdev_mod.o psb_d_cuda_vect_mod.o
|
||||
psb_d_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_cuda_vect_mod.o
|
||||
psb_s_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_s_cuda_vect_mod.o
|
||||
psb_d_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_d_cuda_vect_mod.o
|
||||
psb_c_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_c_cuda_vect_mod.o
|
||||
psb_z_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_z_cuda_vect_mod.o
|
||||
psb_s_cuda_hlg_mat_mod.o: hlldev_mod.o psb_s_cuda_vect_mod.o psb_cuda_env_mod.o
|
||||
psb_s_cuda_elg_mat_mod.o: elldev_mod.o psb_s_cuda_vect_mod.o
|
||||
psb_s_cuda_diag_mat_mod.o: diagdev_mod.o psb_s_cuda_vect_mod.o
|
||||
psb_s_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_cuda_vect_mod.o
|
||||
psb_s_cuda_csrg_mat_mod.o psb_s_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
|
||||
psb_d_cuda_csrg_mat_mod.o psb_d_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
|
||||
psb_z_cuda_hlg_mat_mod.o: hlldev_mod.o psb_z_cuda_vect_mod.o psb_cuda_env_mod.o
|
||||
psb_z_cuda_elg_mat_mod.o: elldev_mod.o psb_z_cuda_vect_mod.o
|
||||
psb_c_cuda_hlg_mat_mod.o: hlldev_mod.o psb_c_cuda_vect_mod.o psb_cuda_env_mod.o
|
||||
psb_c_cuda_elg_mat_mod.o: elldev_mod.o psb_c_cuda_vect_mod.o
|
||||
psb_c_cuda_csrg_mat_mod.o psb_c_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
|
||||
psb_z_cuda_csrg_mat_mod.o psb_z_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
|
||||
psb_vectordev_mod.o: psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_i_vectordev_mod.o
|
||||
psb_i_vectordev_mod.o psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o: psb_base_vectordev_mod.o
|
||||
vectordev.o: cuda_util.o vectordev.h
|
||||
elldev.o: elldev.c
|
||||
dnsdev.o: dnsdev.c
|
||||
fcusparse.h elldev.c: elldev.h vectordev.h
|
||||
fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse.h
|
||||
fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse_fct.h
|
||||
svectordev.o: svectordev.h vectordev.h
|
||||
dvectordev.o: dvectordev.h vectordev.h
|
||||
cvectordev.o: cvectordev.h vectordev.h
|
||||
zvectordev.o: zvectordev.h vectordev.h
|
||||
psb_cuda_env_mod.o: base_cusparse_mod.o
|
||||
psb_cuda_mod.o: psb_cuda_env_mod.o psb_i_cuda_vect_mod.o\
|
||||
psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\
|
||||
psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\
|
||||
psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \
|
||||
psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\
|
||||
psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \
|
||||
psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\
|
||||
psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \
|
||||
psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\
|
||||
psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \
|
||||
psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\
|
||||
psb_d_cuda_diag_mat_mod.o \
|
||||
psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\
|
||||
psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \
|
||||
psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o
|
||||
|
||||
iobjs: $(FOBJS)
|
||||
$(MAKE) -C impl objs
|
||||
cudaobjs: $(FOBJS)
|
||||
$(MAKE) -C CUDA objs
|
||||
|
||||
ilib: objs
|
||||
$(MAKE) -C impl lib LIBNAME=$(LIBNAME)
|
||||
cudalib: objs ilib
|
||||
$(MAKE) -C CUDA lib LIBNAME=$(LIBNAME)
|
||||
|
||||
clean: cclean iclean cudaclean spgpuclean
|
||||
/bin/rm -f $(FOBJS) *$(.mod) *.a
|
||||
|
||||
cclean:
|
||||
/bin/rm -f $(COBJS)
|
||||
iclean:
|
||||
$(MAKE) -C impl clean
|
||||
cudaclean:
|
||||
$(MAKE) -C CUDA clean
|
||||
spgpuclean:
|
||||
$(MAKE) -C spgpu clean
|
||||
|
||||
veryclean: clean
|
@ -0,0 +1,113 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
module base_cusparse_mod
|
||||
use iso_c_binding
|
||||
! Interface to CUSPARSE.
|
||||
|
||||
enum, bind(c)
|
||||
enumerator cusparse_status_success
|
||||
enumerator cusparse_status_not_initialized
|
||||
enumerator cusparse_status_alloc_failed
|
||||
enumerator cusparse_status_invalid_value
|
||||
enumerator cusparse_status_arch_mismatch
|
||||
enumerator cusparse_status_mapping_error
|
||||
enumerator cusparse_status_execution_failed
|
||||
enumerator cusparse_status_internal_error
|
||||
enumerator cusparse_status_matrix_type_not_supported
|
||||
end enum
|
||||
|
||||
enum, bind(c)
|
||||
enumerator cusparse_matrix_type_general
|
||||
enumerator cusparse_matrix_type_symmetric
|
||||
enumerator cusparse_matrix_type_hermitian
|
||||
enumerator cusparse_matrix_type_triangular
|
||||
end enum
|
||||
|
||||
enum, bind(c)
|
||||
enumerator cusparse_fill_mode_lower
|
||||
enumerator cusparse_fill_mode_upper
|
||||
end enum
|
||||
|
||||
enum, bind(c)
|
||||
enumerator cusparse_diag_type_non_unit
|
||||
enumerator cusparse_diag_type_unit
|
||||
end enum
|
||||
|
||||
enum, bind(c)
|
||||
enumerator cusparse_index_base_zero
|
||||
enumerator cusparse_index_base_one
|
||||
end enum
|
||||
|
||||
enum, bind(c)
|
||||
enumerator cusparse_operation_non_transpose
|
||||
enumerator cusparse_operation_transpose
|
||||
enumerator cusparse_operation_conjugate_transpose
|
||||
end enum
|
||||
|
||||
enum, bind(c)
|
||||
enumerator cusparse_direction_row
|
||||
enumerator cusparse_direction_column
|
||||
end enum
|
||||
|
||||
interface
|
||||
function FcusparseCreate() &
|
||||
& bind(c,name="FcusparseCreate") result(res)
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
end function FcusparseCreate
|
||||
end interface
|
||||
|
||||
interface
|
||||
function FcusparseDestroy() &
|
||||
& bind(c,name="FcusparseDestroy") result(res)
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
end function FcusparseDestroy
|
||||
end interface
|
||||
|
||||
contains
|
||||
|
||||
function initFcusparse() result(res)
|
||||
implicit none
|
||||
integer(c_int) :: res
|
||||
|
||||
res = FcusparseCreate()
|
||||
end function initFcusparse
|
||||
|
||||
function closeFcusparse() result(res)
|
||||
implicit none
|
||||
integer(c_int) :: res
|
||||
res = FcusparseDestroy()
|
||||
end function closeFcusparse
|
||||
|
||||
end module base_cusparse_mod
|
@ -0,0 +1,312 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
module c_cusparse_mod
|
||||
use base_cusparse_mod
|
||||
|
||||
type, bind(c) :: c_Cmat
|
||||
type(c_ptr) :: Mat = c_null_ptr
|
||||
end type c_Cmat
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
type, bind(c) :: c_Hmat
|
||||
type(c_ptr) :: Mat = c_null_ptr
|
||||
end type c_Hmat
|
||||
#endif
|
||||
|
||||
interface CSRGDeviceFree
|
||||
function c_CSRGDeviceFree(Mat) &
|
||||
& bind(c,name="c_CSRGDeviceFree") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGDeviceFree
|
||||
end interface
|
||||
|
||||
interface CSRGDeviceSetMatType
|
||||
function c_CSRGDeviceSetMatType(Mat,type) &
|
||||
& bind(c,name="c_CSRGDeviceSetMatType") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGDeviceSetMatType
|
||||
end interface
|
||||
|
||||
interface CSRGDeviceSetMatFillMode
|
||||
function c_CSRGDeviceSetMatFillMode(Mat,type) &
|
||||
& bind(c,name="c_CSRGDeviceSetMatFillMode") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGDeviceSetMatFillMode
|
||||
end interface
|
||||
|
||||
interface CSRGDeviceSetMatDiagType
|
||||
function c_CSRGDeviceSetMatDiagType(Mat,type) &
|
||||
& bind(c,name="c_CSRGDeviceSetMatDiagType") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGDeviceSetMatDiagType
|
||||
end interface
|
||||
|
||||
interface CSRGDeviceSetMatIndexBase
|
||||
function c_CSRGDeviceSetMatIndexBase(Mat,type) &
|
||||
& bind(c,name="c_CSRGDeviceSetMatIndexBase") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGDeviceSetMatIndexBase
|
||||
end interface
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
interface CSRGDeviceCsrsmAnalysis
|
||||
function c_CSRGDeviceCsrsmAnalysis(Mat) &
|
||||
& bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGDeviceCsrsmAnalysis
|
||||
end interface
|
||||
#else
|
||||
interface CSRGIsNullSvBuffer
|
||||
function c_CSRGIsNullSvBuffer(Mat) &
|
||||
& bind(c,name="c_CSRGIsNullSvBuffer") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGIsNullSvBuffer
|
||||
end interface
|
||||
#endif
|
||||
|
||||
interface CSRGDeviceAlloc
|
||||
function c_CSRGDeviceAlloc(Mat,nr,nc,nz) &
|
||||
& bind(c,name="c_CSRGDeviceAlloc") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int), value :: nr, nc, nz
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGDeviceAlloc
|
||||
end interface
|
||||
|
||||
interface CSRGDeviceGetParms
|
||||
function c_CSRGDeviceGetParms(Mat,nr,nc,nz) &
|
||||
& bind(c,name="c_CSRGDeviceGetParms") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int) :: nr, nc, nz
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGDeviceGetParms
|
||||
end interface
|
||||
|
||||
interface spsvCSRGDevice
|
||||
function c_spsvCSRGDevice(Mat,alpha,x,beta,y) &
|
||||
& bind(c,name="c_spsvCSRGDevice") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
type(c_ptr), value :: x
|
||||
type(c_ptr), value :: y
|
||||
complex(c_float_complex), value :: alpha,beta
|
||||
integer(c_int) :: res
|
||||
end function c_spsvCSRGDevice
|
||||
end interface
|
||||
|
||||
interface spmvCSRGDevice
|
||||
function c_spmvCSRGDevice(Mat,alpha,x,beta,y) &
|
||||
& bind(c,name="c_spmvCSRGDevice") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
type(c_ptr), value :: x
|
||||
type(c_ptr), value :: y
|
||||
complex(c_float_complex), value :: alpha,beta
|
||||
integer(c_int) :: res
|
||||
end function c_spmvCSRGDevice
|
||||
end interface
|
||||
|
||||
interface CSRGHost2Device
|
||||
function c_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) &
|
||||
& bind(c,name="c_CSRGHost2Device") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int), value :: m,n,nz
|
||||
integer(c_int) :: irp(*), ja(*)
|
||||
complex(c_float_complex) :: val(*)
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGHost2Device
|
||||
end interface
|
||||
|
||||
interface CSRGDevice2Host
|
||||
function c_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) &
|
||||
& bind(c,name="c_CSRGDevice2Host") result(res)
|
||||
use iso_c_binding
|
||||
import c_Cmat
|
||||
type(c_Cmat) :: Mat
|
||||
integer(c_int), value :: m,n,nz
|
||||
integer(c_int) :: irp(*), ja(*)
|
||||
complex(c_float_complex) :: val(*)
|
||||
integer(c_int) :: res
|
||||
end function c_CSRGDevice2Host
|
||||
end interface
|
||||
|
||||
#if CUDA_SHORT_VERSION <=10
|
||||
interface HYBGDeviceAlloc
|
||||
function c_HYBGDeviceAlloc(Mat,nr,nc,nz) &
|
||||
& bind(c,name="c_HYBGDeviceAlloc") result(res)
|
||||
use iso_c_binding
|
||||
import c_hmat
|
||||
type(c_Hmat) :: Mat
|
||||
integer(c_int), value :: nr, nc, nz
|
||||
integer(c_int) :: res
|
||||
end function c_HYBGDeviceAlloc
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceFree
|
||||
function c_HYBGDeviceFree(Mat) &
|
||||
& bind(c,name="c_HYBGDeviceFree") result(res)
|
||||
use iso_c_binding
|
||||
import c_Hmat
|
||||
type(c_Hmat) :: Mat
|
||||
integer(c_int) :: res
|
||||
end function c_HYBGDeviceFree
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceSetMatType
|
||||
function c_HYBGDeviceSetMatType(Mat,type) &
|
||||
& bind(c,name="c_HYBGDeviceSetMatType") result(res)
|
||||
use iso_c_binding
|
||||
import c_Hmat
|
||||
type(c_Hmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function c_HYBGDeviceSetMatType
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceSetMatFillMode
|
||||
function c_HYBGDeviceSetMatFillMode(Mat,type) &
|
||||
& bind(c,name="c_HYBGDeviceSetMatFillMode") result(res)
|
||||
use iso_c_binding
|
||||
import c_Hmat
|
||||
type(c_Hmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function c_HYBGDeviceSetMatFillMode
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceSetMatDiagType
|
||||
function c_HYBGDeviceSetMatDiagType(Mat,type) &
|
||||
& bind(c,name="c_HYBGDeviceSetMatDiagType") result(res)
|
||||
use iso_c_binding
|
||||
import c_Hmat
|
||||
type(c_Hmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function c_HYBGDeviceSetMatDiagType
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceSetMatIndexBase
|
||||
function c_HYBGDeviceSetMatIndexBase(Mat,type) &
|
||||
& bind(c,name="c_HYBGDeviceSetMatIndexBase") result(res)
|
||||
use iso_c_binding
|
||||
import c_Hmat
|
||||
type(c_Hmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function c_HYBGDeviceSetMatIndexBase
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceHybsmAnalysis
|
||||
function c_HYBGDeviceHybsmAnalysis(Mat) &
|
||||
& bind(c,name="c_HYBGDeviceHybsmAnalysis") result(res)
|
||||
use iso_c_binding
|
||||
import c_Hmat
|
||||
type(c_Hmat) :: Mat
|
||||
integer(c_int) :: res
|
||||
end function c_HYBGDeviceHybsmAnalysis
|
||||
end interface
|
||||
|
||||
interface spsvHYBGDevice
|
||||
function c_spsvHYBGDevice(Mat,alpha,x,beta,y) &
|
||||
& bind(c,name="c_spsvHYBGDevice") result(res)
|
||||
use iso_c_binding
|
||||
import c_Hmat
|
||||
type(c_Hmat) :: Mat
|
||||
type(c_ptr), value :: x
|
||||
type(c_ptr), value :: y
|
||||
complex(c_float_complex), value :: alpha,beta
|
||||
integer(c_int) :: res
|
||||
end function c_spsvHYBGDevice
|
||||
end interface
|
||||
|
||||
interface spmvHYBGDevice
|
||||
function c_spmvHYBGDevice(Mat,alpha,x,beta,y) &
|
||||
& bind(c,name="c_spmvHYBGDevice") result(res)
|
||||
use iso_c_binding
|
||||
import c_Hmat
|
||||
type(c_Hmat) :: Mat
|
||||
type(c_ptr), value :: x
|
||||
type(c_ptr), value :: y
|
||||
complex(c_float_complex), value :: alpha,beta
|
||||
integer(c_int) :: res
|
||||
end function c_spmvHYBGDevice
|
||||
end interface
|
||||
|
||||
interface HYBGHost2Device
|
||||
function c_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) &
|
||||
& bind(c,name="c_HYBGHost2Device") result(res)
|
||||
use iso_c_binding
|
||||
import c_Hmat
|
||||
type(c_Hmat) :: Mat
|
||||
integer(c_int), value :: m,n,nz
|
||||
integer(c_int) :: irp(*), ja(*)
|
||||
complex(c_float_complex) :: val(*)
|
||||
integer(c_int) :: res
|
||||
end function c_HYBGHost2Device
|
||||
end interface
|
||||
#endif
|
||||
|
||||
end module c_cusparse_mod
|
@ -0,0 +1,99 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cusparse_v2.h>
|
||||
#include "cintrf.h"
|
||||
#include "fcusparse.h"
|
||||
|
||||
|
||||
/* Double precision real */
|
||||
#define TYPE float complex
|
||||
#define CUSPARSE_BASE_TYPE CUDA_C_32F
|
||||
#define T_CSRGDeviceMat c_CSRGDeviceMat
|
||||
#define T_Cmat c_Cmat
|
||||
#define T_spmvCSRGDevice c_spmvCSRGDevice
|
||||
#define T_spsvCSRGDevice c_spsvCSRGDevice
|
||||
#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc
|
||||
#define T_CSRGDeviceFree c_CSRGDeviceFree
|
||||
#define T_CSRGHost2Device c_CSRGHost2Device
|
||||
#define T_CSRGDevice2Host c_CSRGDevice2Host
|
||||
#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode
|
||||
#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType
|
||||
#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
|
||||
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
|
||||
#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis
|
||||
#define cusparseTcsrmv cusparseCcsrmv
|
||||
#define cusparseTcsrsv_solve cusparseCcsrsv_solve
|
||||
#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis
|
||||
#define T_HYBGDeviceMat c_HYBGDeviceMat
|
||||
#define T_Hmat c_Hmat
|
||||
#define T_HYBGDeviceFree c_HYBGDeviceFree
|
||||
#define T_spmvHYBGDevice c_spmvHYBGDevice
|
||||
#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc
|
||||
#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType
|
||||
#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase
|
||||
#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType
|
||||
#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode
|
||||
#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis
|
||||
#define T_spsvHYBGDevice c_spsvHYBGDevice
|
||||
#define T_HYBGHost2Device c_HYBGHost2Device
|
||||
#define cusparseThybmv cusparseChybmv
|
||||
#define cusparseThybsv_solve cusparseChybsv_solve
|
||||
#define cusparseThybsv_analysis cusparseChybsv_analysis
|
||||
#define cusparseTcsr2hyb cusparseCcsr2hyb
|
||||
|
||||
#elif CUDA_VERSION < 11030
|
||||
|
||||
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
|
||||
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
|
||||
#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis
|
||||
#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize
|
||||
#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis
|
||||
#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve
|
||||
#else
|
||||
|
||||
#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer
|
||||
#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr
|
||||
#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr
|
||||
#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr
|
||||
|
||||
#endif
|
||||
|
||||
#include "fcusparse_fct.h"
|
||||
|
@ -0,0 +1,47 @@
|
||||
/* Parallel Sparse BLAS SPGPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#ifndef _CINTRF_H_
|
||||
#define _CINTRF_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "core.h"
|
||||
#include "cuda_util.h"
|
||||
#include "vector.h"
|
||||
#include "vectordev.h"
|
||||
|
||||
#define ELL_PITCH_ALIGN_S 32
|
||||
#define ELL_PITCH_ALIGN_D 16
|
||||
|
||||
#endif
|
@ -0,0 +1,53 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
module core_mod
|
||||
use iso_c_binding
|
||||
|
||||
integer(c_int), parameter :: spgpu_type_int = 0
|
||||
integer(c_int), parameter :: spgpu_type_float = 1
|
||||
integer(c_int), parameter :: spgpu_type_double = 2
|
||||
integer(c_int), parameter :: spgpu_type_complex_float = 3
|
||||
integer(c_int), parameter :: spgpu_type_complex_double = 4
|
||||
integer(c_int), parameter :: spgpu_success = 0
|
||||
integer(c_int), parameter :: spgpu_unsupported = 1
|
||||
integer(c_int), parameter :: spgpu_unspecified = 2
|
||||
integer(c_int), parameter :: spgpu_outofmem = 3
|
||||
|
||||
interface
|
||||
subroutine psb_cudaSync() &
|
||||
& bind(c,name='cudaSync')
|
||||
use iso_c_binding
|
||||
end subroutine psb_cudaSync
|
||||
end interface
|
||||
|
||||
end module core_mod
|
@ -0,0 +1,799 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#include "cuda_util.h"
|
||||
|
||||
|
||||
static int hasUVA=-1;
|
||||
static struct cudaDeviceProp *prop=NULL;
|
||||
static spgpuHandle_t psb_cuda_handle = NULL;
|
||||
static cublasHandle_t psb_cublas_handle = NULL;
|
||||
|
||||
|
||||
int allocRemoteBuffer(void** buffer, int count)
|
||||
{
|
||||
cudaError_t err = cudaMalloc(buffer, count);
|
||||
if (err == cudaSuccess)
|
||||
{
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr,"CUDA allocRemoteBuffer for %d bytes Error: %s \n",
|
||||
count, cudaGetErrorString(err));
|
||||
if(err == cudaErrorMemoryAllocation)
|
||||
return SPGPU_OUTOFMEMORY;
|
||||
else
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
int hostRegisterMapped(void *pointer, long size)
|
||||
{
|
||||
cudaError_t err = cudaHostRegister(pointer, size, cudaHostRegisterMapped);
|
||||
|
||||
if (err == cudaSuccess)
|
||||
{
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr,"CUDA hostRegisterMapped Error: %s\n", cudaGetErrorString(err));
|
||||
if(err == cudaErrorMemoryAllocation)
|
||||
return SPGPU_OUTOFMEMORY;
|
||||
else
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
int getDevicePointer(void **d_p, void * h_p)
|
||||
{
|
||||
cudaError_t err = cudaHostGetDevicePointer(d_p,h_p,0);
|
||||
|
||||
if (err == cudaSuccess)
|
||||
{
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr,"CUDA getDevicePointer Error: %s\n", cudaGetErrorString(err));
|
||||
if(err == cudaErrorMemoryAllocation)
|
||||
return SPGPU_OUTOFMEMORY;
|
||||
else
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
int registerMappedMemory(void *buffer, void **dp, int size)
|
||||
{
|
||||
//cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
|
||||
cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterMapped);
|
||||
if (err == cudaSuccess) err = cudaHostGetDevicePointer(dp,buffer,0);
|
||||
|
||||
if (err == cudaSuccess)
|
||||
{
|
||||
err = cudaHostGetDevicePointer(dp,buffer,0);
|
||||
if (err == cudaSuccess)
|
||||
{
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err));
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err));
|
||||
if(err == cudaErrorMemoryAllocation)
|
||||
return SPGPU_OUTOFMEMORY;
|
||||
else
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
int allocMappedMemory(void **buffer, void **dp, int size)
|
||||
{
|
||||
cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
|
||||
if (err == 0) err = cudaHostGetDevicePointer(dp,*buffer,0);
|
||||
|
||||
if (err == cudaSuccess)
|
||||
{
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr,"CUDA allocMappedMemory Error: %s\n", cudaGetErrorString(err));
|
||||
if(err == cudaErrorMemoryAllocation)
|
||||
return SPGPU_OUTOFMEMORY;
|
||||
else
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
int unregisterMappedMemory(void *buffer)
|
||||
{
|
||||
//cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
|
||||
cudaError_t err = cudaHostUnregister(buffer);
|
||||
|
||||
if (err == cudaSuccess)
|
||||
{
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr,"CUDA unregisterMappedMemory Error: %s\n", cudaGetErrorString(err));
|
||||
if(err == cudaErrorMemoryAllocation)
|
||||
return SPGPU_OUTOFMEMORY;
|
||||
else
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
int writeRemoteBuffer(void* hostSrc, void* buffer, int count)
|
||||
{
|
||||
cudaError_t err = cudaMemcpy(buffer, hostSrc, count, cudaMemcpyHostToDevice);
|
||||
|
||||
if (err == cudaSuccess)
|
||||
return SPGPU_SUCCESS;
|
||||
else {
|
||||
fprintf(stderr,"CUDA Error writeRemoteBuffer: %s %p %p %d\n",
|
||||
cudaGetErrorString(err),buffer, hostSrc, count);
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
int readRemoteBuffer(void* hostDest, void* buffer, int count)
|
||||
{
|
||||
|
||||
|
||||
cudaError_t err1;
|
||||
cudaError_t err;
|
||||
#if 0
|
||||
{
|
||||
err1 =cudaGetLastError();
|
||||
fprintf(stderr,"CUDA Error prior to readRemoteBuffer: %s %d\n",
|
||||
cudaGetErrorString(err1),err1);
|
||||
}
|
||||
|
||||
#endif
|
||||
err = cudaMemcpy(hostDest, buffer, count, cudaMemcpyDeviceToHost);
|
||||
|
||||
if (err == cudaSuccess)
|
||||
return SPGPU_SUCCESS;
|
||||
else {
|
||||
fprintf(stderr,"CUDA Error readRemoteBuffer: %s %p %p %d %d\n",
|
||||
cudaGetErrorString(err),hostDest,buffer,count,err);
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
int freeRemoteBuffer(void* buffer)
|
||||
{
|
||||
cudaError_t err = cudaFree(buffer);
|
||||
if (err == cudaSuccess)
|
||||
return SPGPU_SUCCESS;
|
||||
else {
|
||||
fprintf(stderr,"CUDA Error freeRemoteBuffer: %s %p\n", cudaGetErrorString(err),buffer);
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
int gpuInit(int dev)
|
||||
{
|
||||
|
||||
int count,err;
|
||||
|
||||
if ((err=cudaSetDeviceFlags(cudaDeviceMapHost))!=cudaSuccess)
|
||||
fprintf(stderr,"Error On SetDeviceFlags: %d '%s'\n",err,cudaGetErrorString(err));
|
||||
if ((prop=(struct cudaDeviceProp *) malloc(sizeof(struct cudaDeviceProp)))==NULL) {
|
||||
fprintf(stderr,"CUDA Error gpuInit3: not malloced prop\n");
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
err = setDevice(dev);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err));
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
if (!psb_cublas_handle)
|
||||
psb_cudaCreateCublasHandle();
|
||||
hasUVA=getDeviceHasUVA();
|
||||
FcusparseCreate();
|
||||
return err;
|
||||
|
||||
}
|
||||
|
||||
void gpuClose()
|
||||
{
|
||||
cudaStream_t st1, st2;
|
||||
if (! psb_cuda_handle)
|
||||
st1=spgpuGetStream(psb_cuda_handle);
|
||||
if (! psb_cublas_handle)
|
||||
cublasGetStream(psb_cublas_handle,&st2);
|
||||
FcusparseDestroy();
|
||||
psb_cudaDestroyHandle();
|
||||
if (st1 != st2)
|
||||
psb_cudaDestroyCublasHandle();
|
||||
free(prop);
|
||||
prop=NULL;
|
||||
hasUVA=-1;
|
||||
}
|
||||
|
||||
|
||||
int setDevice(int dev)
|
||||
{
|
||||
int count,err,idev;
|
||||
|
||||
err = cudaGetDeviceCount(&count);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr,"CUDA Error setDevice: %s\n", cudaGetErrorString(err));
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
|
||||
if ((0<=dev)&&(dev<count))
|
||||
idev = dev;
|
||||
else
|
||||
idev = 0;
|
||||
err = cudaSetDevice(idev);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err));
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
err = cudaGetDeviceProperties(prop,idev);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr,"CUDA Error gpuInit4: %s\n", cudaGetErrorString(err));
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int getDevice()
|
||||
{ int count;
|
||||
|
||||
cudaGetDevice(&count);
|
||||
return(count);
|
||||
}
|
||||
|
||||
int getDeviceHasUVA()
|
||||
{ int count=0;
|
||||
if (prop!=NULL)
|
||||
count = prop->unifiedAddressing;
|
||||
return(count);
|
||||
}
|
||||
|
||||
int getGPUMultiProcessors()
|
||||
{ int count=0;
|
||||
if (prop!=NULL)
|
||||
count = prop->multiProcessorCount;
|
||||
return(count);
|
||||
}
|
||||
|
||||
|
||||
int getGPUMemoryBusWidth()
|
||||
{ int count=0;
|
||||
#if CUDART_VERSION >= 5000
|
||||
if (prop!=NULL)
|
||||
count = prop->memoryBusWidth;
|
||||
#endif
|
||||
return(count);
|
||||
}
|
||||
int getGPUMemoryClockRate()
|
||||
{ int count=0;
|
||||
#if CUDART_VERSION >= 5000
|
||||
if (prop!=NULL)
|
||||
count = prop->memoryClockRate;
|
||||
#endif
|
||||
return(count);
|
||||
}
|
||||
int getGPUWarpSize()
|
||||
{ int count=0;
|
||||
if (prop!=NULL)
|
||||
count = prop->warpSize;
|
||||
return(count);
|
||||
}
|
||||
int getGPUMaxThreadsPerBlock()
|
||||
{ int count=0;
|
||||
if (prop!=NULL)
|
||||
count = prop->maxThreadsPerBlock;
|
||||
return(count);
|
||||
}
|
||||
int getGPUMaxThreadsPerMP()
|
||||
{ int count=0;
|
||||
if (prop!=NULL)
|
||||
count = prop->maxThreadsPerMultiProcessor;
|
||||
return(count);
|
||||
}
|
||||
int getGPUMaxRegistersPerBlock()
|
||||
{ int count=0;
|
||||
if (prop!=NULL)
|
||||
count = prop->regsPerBlock;
|
||||
return(count);
|
||||
}
|
||||
|
||||
void cpyGPUNameString(char *cstring)
|
||||
{
|
||||
*cstring='\0';
|
||||
if (prop!=NULL)
|
||||
strcpy(cstring,prop->name);
|
||||
|
||||
}
|
||||
|
||||
int DeviceHasUVA()
|
||||
{
|
||||
return(hasUVA == 1);
|
||||
}
|
||||
|
||||
|
||||
int getDeviceCount()
|
||||
{ int count;
|
||||
cudaError_t err;
|
||||
err = cudaGetDeviceCount(&count);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr,"CUDA Error getDeviceCount: %s\n", cudaGetErrorString(err));
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
return(count);
|
||||
}
|
||||
|
||||
void cudaSync()
|
||||
{
|
||||
cudaError_t err;
|
||||
err = cudaDeviceSynchronize();
|
||||
if (err == cudaSuccess)
|
||||
return SPGPU_SUCCESS;
|
||||
else {
|
||||
fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err));
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
void cudaReset()
|
||||
{
|
||||
cudaError_t err;
|
||||
err = cudaDeviceReset();
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err));
|
||||
return SPGPU_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
spgpuHandle_t psb_cudaGetHandle()
|
||||
{
|
||||
return psb_cuda_handle;
|
||||
}
|
||||
|
||||
void psb_cudaCreateHandle()
|
||||
{
|
||||
if (!psb_cuda_handle)
|
||||
spgpuCreate(&psb_cuda_handle, getDevice());
|
||||
|
||||
}
|
||||
|
||||
void psb_cudaDestroyHandle()
|
||||
{
|
||||
if (!psb_cuda_handle)
|
||||
spgpuDestroy(psb_cuda_handle);
|
||||
psb_cuda_handle = NULL;
|
||||
}
|
||||
|
||||
cudaStream_t psb_cudaGetStream()
|
||||
{
|
||||
return spgpuGetStream(psb_cuda_handle);
|
||||
}
|
||||
|
||||
void psb_cudaSetStream(cudaStream_t stream)
|
||||
{
|
||||
spgpuSetStream(psb_cuda_handle, stream);
|
||||
return ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
cublasHandle_t psb_cudaGetCublasHandle()
|
||||
{
|
||||
if (!psb_cublas_handle)
|
||||
psb_cudaCreateCublasHandle();
|
||||
return psb_cublas_handle;
|
||||
}
|
||||
void psb_cudaCreateCublasHandle()
|
||||
{ if (!psb_cublas_handle)
|
||||
cublasCreate(&psb_cublas_handle);
|
||||
}
|
||||
void psb_cudaDestroyCublasHandle()
|
||||
{
|
||||
if (!psb_cublas_handle)
|
||||
cublasDestroy(psb_cublas_handle);
|
||||
psb_cublas_handle=NULL;
|
||||
}
|
||||
|
||||
/* Simple memory tools */
|
||||
|
||||
int allocateInt(void **d_int, int n)
|
||||
{
|
||||
return allocRemoteBuffer((void **)(d_int), n*sizeof(int));
|
||||
}
|
||||
|
||||
int writeInt(void *d_int, int* h_int, int n)
|
||||
{
|
||||
int i,j;
|
||||
int *di;
|
||||
i = writeRemoteBuffer((void*)h_int, (void*)d_int, n*sizeof(int));
|
||||
return i;
|
||||
}
|
||||
|
||||
int readInt(void* d_int, int* h_int, int n)
|
||||
{ int i;
|
||||
i = readRemoteBuffer((void *) h_int, (void *) d_int, n*sizeof(int));
|
||||
//cudaSync();
|
||||
return(i);
|
||||
}
|
||||
|
||||
int writeIntFirst(int first, void *d_int, int* h_int, int n, int IndexBase)
|
||||
{
|
||||
int i,j;
|
||||
int *di=(int *) d_int;
|
||||
di = &(di[first-IndexBase]);
|
||||
i = writeRemoteBuffer((void*)h_int, (void*)di, n*sizeof(int));
|
||||
return i;
|
||||
}
|
||||
|
||||
int readIntFirst(int first,void* d_int, int* h_int, int n, int IndexBase)
|
||||
{ int i;
|
||||
int *di=(int *) d_int;
|
||||
di = &(di[first-IndexBase]);
|
||||
i = readRemoteBuffer((void *) h_int, (void *) di, n*sizeof(int));
|
||||
//cudaSync();
|
||||
return(i);
|
||||
}
|
||||
|
||||
int allocateMultiInt(void **d_int, int m, int n)
|
||||
{
|
||||
return allocRemoteBuffer((void **)(d_int), m*n*sizeof(int));
|
||||
}
|
||||
|
||||
int writeMultiInt(void *d_int, int* h_int, int m, int n)
|
||||
{
|
||||
int i,j;
|
||||
int *di;
|
||||
i = writeRemoteBuffer((void*)h_int, (void*)d_int, m*n*sizeof(int));
|
||||
return i;
|
||||
}
|
||||
|
||||
int readMultiInt(void* d_int, int* h_int, int m, int n)
|
||||
{ int i;
|
||||
i = readRemoteBuffer((void *) h_int, (void *) d_int, m*n*sizeof(int));
|
||||
//cudaSync();
|
||||
return(i);
|
||||
}
|
||||
|
||||
void freeInt(void *d_int)
|
||||
{
|
||||
//printf("Before freeInt\n");
|
||||
freeRemoteBuffer(d_int);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int allocateFloat(void **d_float, int n)
|
||||
{
|
||||
return allocRemoteBuffer((void **)(d_float), n*sizeof(float));
|
||||
}
|
||||
|
||||
int writeFloat(void *d_float, float* h_float, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
i = writeRemoteBuffer((void*)h_float, (void*)d_float, n*sizeof(float));
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int readFloat(void* d_float, float* h_float, int n)
|
||||
{ int i;
|
||||
i = readRemoteBuffer((void *) h_float, (void *) d_float, n*sizeof(float));
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int writeFloatFirst(int df, void *d_float, float* h_float, int n, int IndexBase)
|
||||
{
|
||||
int i;
|
||||
|
||||
float *dv=(float *) d_float;
|
||||
dv = &dv[df-IndexBase];
|
||||
i = writeRemoteBuffer((void*)h_float, (void*)dv, n*sizeof(float));
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int readFloatFirst(int df, void* d_float, float* h_float, int n, int IndexBase)
|
||||
{ int i;
|
||||
float *dv=(float *) d_float;
|
||||
dv = &dv[df-IndexBase];
|
||||
//fprintf(stderr,"readFloatFirst: %d %p %p %p %d \n",df,d_float,dv,h_float,n);
|
||||
i = readRemoteBuffer((void *) h_float, (void *) dv, n*sizeof(float));
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
|
||||
int allocateMultiFloat(void **d_float, int m, int n)
|
||||
{
|
||||
return allocRemoteBuffer((void **)(d_float), m*n*sizeof(float));
|
||||
}
|
||||
|
||||
int writeMultiFloat(void *d_float, float* h_float, int m, int n)
|
||||
{
|
||||
int i,j;
|
||||
i = writeRemoteBuffer((void*)h_float, (void*)d_float, m*n*sizeof(float));
|
||||
return i;
|
||||
}
|
||||
|
||||
int readMultiFloat(void* d_float, float* h_float, int m, int n)
|
||||
{ int i;
|
||||
i = readRemoteBuffer((void *) h_float, (void *) d_float, m*n*sizeof(float));
|
||||
//cudaSync();
|
||||
return(i);
|
||||
}
|
||||
|
||||
void freeFloat(void *d_float)
|
||||
{
|
||||
freeRemoteBuffer(d_float);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int allocateDouble(void **d_double, int n)
|
||||
{
|
||||
return allocRemoteBuffer((void **)(d_double), n*sizeof(double));
|
||||
}
|
||||
|
||||
int writeDouble(void *d_double, double* h_double, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
i = writeRemoteBuffer((void*)h_double, (void*)d_double, n*sizeof(double));
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int readDouble(void* d_double, double* h_double, int n)
|
||||
{ int i;
|
||||
i = readRemoteBuffer((void *) h_double, (void *) d_double, n*sizeof(double));
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int writeDoubleFirst(int df, void *d_double, double* h_double, int n, int IndexBase)
|
||||
{
|
||||
int i;
|
||||
|
||||
double *dv=(double *) d_double;
|
||||
dv = &dv[df-IndexBase];
|
||||
i = writeRemoteBuffer((void*)h_double, (void*)dv, n*sizeof(double));
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int readDoubleFirst(int df, void* d_double, double* h_double, int n, int IndexBase)
|
||||
{ int i;
|
||||
double *dv=(double *) d_double;
|
||||
dv = &dv[df-IndexBase];
|
||||
//fprintf(stderr,"readDoubleFirst: %d %p %p %p %d \n",df,d_double,dv,h_double,n);
|
||||
i = readRemoteBuffer((void *) h_double, (void *) dv, n*sizeof(double));
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int allocateMultiDouble(void **d_double, int m, int n)
|
||||
{
|
||||
return allocRemoteBuffer((void **)(d_double), m*n*sizeof(double));
|
||||
}
|
||||
|
||||
int writeMultiDouble(void *d_double, double* h_double, int m, int n)
|
||||
{
|
||||
int i,j;
|
||||
i = writeRemoteBuffer((void*)h_double, (void*)d_double, m*n*sizeof(double));
|
||||
return i;
|
||||
}
|
||||
|
||||
int readMultiDouble(void* d_double, double* h_double, int m, int n)
|
||||
{ int i;
|
||||
i = readRemoteBuffer((void *) h_double, (void *) d_double, m*n*sizeof(double));
|
||||
//cudaSync();
|
||||
return(i);
|
||||
}
|
||||
|
||||
void freeDouble(void *d_double)
|
||||
{
|
||||
freeRemoteBuffer(d_double);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int allocateFloatComplex(void **d_FloatComplex, int n)
|
||||
{
|
||||
return allocRemoteBuffer((void **)(d_FloatComplex), n*sizeof(cuFloatComplex));
|
||||
}
|
||||
|
||||
int writeFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, n*sizeof(cuFloatComplex));
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int readFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int n)
|
||||
{ int i;
|
||||
i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, n*sizeof(cuFloatComplex));
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int allocateMultiFloatComplex(void **d_FloatComplex, int m, int n)
|
||||
{
|
||||
return allocRemoteBuffer((void **)(d_FloatComplex), m*n*sizeof(cuFloatComplex));
|
||||
}
|
||||
|
||||
int writeMultiFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n)
|
||||
{
|
||||
int i,j;
|
||||
i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, m*n*sizeof(cuFloatComplex));
|
||||
return i;
|
||||
}
|
||||
|
||||
int readMultiFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n)
|
||||
{ int i;
|
||||
i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, m*n*sizeof(cuFloatComplex));
|
||||
//cudaSync();
|
||||
return(i);
|
||||
}
|
||||
|
||||
int writeFloatComplexFirst(int df, void *d_floatComplex,
|
||||
cuFloatComplex* h_floatComplex, int n, int IndexBase)
|
||||
{
|
||||
int i;
|
||||
|
||||
cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex;
|
||||
dv = &dv[df-IndexBase];
|
||||
i = writeRemoteBuffer((void*)h_floatComplex, (void*)dv, n*sizeof(cuFloatComplex));
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int readFloatComplexFirst(int df, void* d_floatComplex, cuFloatComplex* h_floatComplex,
|
||||
int n, int IndexBase)
|
||||
{ int i;
|
||||
cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex;
|
||||
dv = &dv[df-IndexBase];
|
||||
i = readRemoteBuffer((void *) h_floatComplex, (void *) dv, n*sizeof(cuFloatComplex));
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
void freeFloatComplex(void *d_FloatComplex)
|
||||
{
|
||||
freeRemoteBuffer(d_FloatComplex);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int allocateDoubleComplex(void **d_DoubleComplex, int n)
|
||||
{
|
||||
return allocRemoteBuffer((void **)(d_DoubleComplex), n*sizeof(cuDoubleComplex));
|
||||
}
|
||||
|
||||
int writeDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, n*sizeof(cuDoubleComplex));
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int readDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n)
|
||||
{ int i;
|
||||
i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, n*sizeof(cuDoubleComplex));
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int writeDoubleComplexFirst(int df, void *d_doubleComplex,
|
||||
cuDoubleComplex* h_doubleComplex, int n, int IndexBase)
|
||||
{
|
||||
int i;
|
||||
|
||||
cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex;
|
||||
dv = &dv[df-IndexBase];
|
||||
i = writeRemoteBuffer((void*)h_doubleComplex, (void*)dv, n*sizeof(cuDoubleComplex));
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int readDoubleComplexFirst(int df, void* d_doubleComplex, cuDoubleComplex* h_doubleComplex,
|
||||
int n, int IndexBase)
|
||||
{ int i;
|
||||
cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex;
|
||||
dv = &dv[df-IndexBase];
|
||||
i = readRemoteBuffer((void *) h_doubleComplex, (void *) dv, n*sizeof(cuDoubleComplex));
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int allocateMultiDoubleComplex(void **d_DoubleComplex, int m, int n)
|
||||
{
|
||||
return allocRemoteBuffer((void **)(d_DoubleComplex), m*n*sizeof(cuDoubleComplex));
|
||||
}
|
||||
|
||||
int writeMultiDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n)
|
||||
{
|
||||
int i,j;
|
||||
i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, m*n*sizeof(cuDoubleComplex));
|
||||
return i;
|
||||
}
|
||||
|
||||
int readMultiDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n)
|
||||
{ int i;
|
||||
i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, m*n*sizeof(cuDoubleComplex));
|
||||
//cudaSync();
|
||||
return(i);
|
||||
}
|
||||
|
||||
void freeDoubleComplex(void *d_DoubleComplex)
|
||||
{
|
||||
freeRemoteBuffer(d_DoubleComplex);
|
||||
}
|
||||
|
||||
|
||||
|
||||
double etime()
|
||||
{
|
||||
struct timeval tt;
|
||||
struct timezone tz;
|
||||
double temp;
|
||||
if (gettimeofday(&tt,&tz) != 0) {
|
||||
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
|
||||
exit(-1);
|
||||
}
|
||||
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
|
||||
return(temp);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,137 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#ifndef _CUDA_UTIL_H_
|
||||
#define _CUDA_UTIL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
#include "core.h"
|
||||
#include "cuComplex.h"
|
||||
#include "fcusparse.h"
|
||||
#include "cublas_v2.h"
|
||||
|
||||
int allocRemoteBuffer(void** buffer, int count);
|
||||
int allocMappedMemory(void **buffer, void **dp, int size);
|
||||
int registerMappedMemory(void *buffer, void **dp, int size);
|
||||
int unregisterMappedMemory(void *buffer);
|
||||
int writeRemoteBuffer(void* hostSrc, void* buffer, int count);
|
||||
int readRemoteBuffer(void* hostDest, void* buffer, int count);
|
||||
int freeRemoteBuffer(void* buffer);
|
||||
int gpuInit(int dev);
|
||||
int getDeviceCount();
|
||||
int getDevice();
|
||||
int setDevice(int dev);
|
||||
int getGPUMultiProcessors();
|
||||
int getGPUMemoryBusWidth();
|
||||
int getGPUMemoryClockRate();
|
||||
int getGPUWarpSize();
|
||||
int getGPUMaxThreadsPerBlock();
|
||||
int getGPUMaxThreadsPerMP();
|
||||
int getGPUMaxRegistersPerBlock();
|
||||
void cpyGPUNameString(char *cstring);
|
||||
|
||||
|
||||
void cudaSync();
|
||||
void cudaReset();
|
||||
void gpuClose();
|
||||
|
||||
|
||||
spgpuHandle_t psb_cudaGetHandle();
|
||||
void psb_cudaCreateHandle();
|
||||
void psb_cudaDestroyHandle();
|
||||
cudaStream_t psb_cudaGetStream();
|
||||
void psb_cudaSetStream(cudaStream_t stream);
|
||||
|
||||
cublasHandle_t psb_cudaGetCublasHandle();
|
||||
void psb_cudaCreateCublasHandle();
|
||||
void psb_cudaDestroyCublasHandle();
|
||||
|
||||
|
||||
int allocateInt(void **, int);
|
||||
int allocateMultiInt(void **, int, int);
|
||||
int writeInt(void *, int *, int);
|
||||
int writeMultiInt(void *, int* , int , int );
|
||||
int readInt(void *, int *, int);
|
||||
int readMultiInt(void*, int*, int, int );
|
||||
int writeIntFirst(int,void *, int *, int,int);
|
||||
int readIntFirst(int,void *, int *, int,int);
|
||||
void freeInt(void *);
|
||||
|
||||
int allocateFloat(void **, int);
|
||||
int allocateMultiFloat(void **, int, int);
|
||||
int writeFloat(void *, float *, int);
|
||||
int writeMultiFloat(void *, float* , int , int );
|
||||
int readFloat(void *, float*, int);
|
||||
int readMultiFloat(void*, float*, int, int );
|
||||
int writeFloatFirst(int, void *, float*, int, int);
|
||||
int readFloatFirst(int, void *, float*, int, int);
|
||||
void freeFloat(void *);
|
||||
|
||||
int allocateDouble(void **, int);
|
||||
int allocateMultiDouble(void **, int, int);
|
||||
int writeDouble(void *, double*, int);
|
||||
int writeMultiDouble(void *, double* , int , int );
|
||||
int readDouble(void *, double*, int);
|
||||
int readMultiDouble(void*, double*, int, int );
|
||||
int writeDoubleFirst(int, void *, double*, int, int);
|
||||
int readDoubleFirst(int, void *, double*, int, int);
|
||||
void freeDouble(void *);
|
||||
|
||||
int allocateFloatComplex(void **, int);
|
||||
int allocateMultiFloatComplex(void **, int, int);
|
||||
int writeFloatComplex(void *, cuFloatComplex*, int);
|
||||
int writeMultiFloatComplex(void *, cuFloatComplex* , int , int );
|
||||
int readFloatComplex(void *, cuFloatComplex*, int);
|
||||
int readMultiFloatComplex(void*, cuFloatComplex*, int, int );
|
||||
int writeFloatComplexFirst(int, void *, cuFloatComplex*, int, int);
|
||||
int readFloatComplexFirst(int, void *, cuFloatComplex*, int, int);
|
||||
void freeFloatComplex(void *);
|
||||
|
||||
int allocateDoubleComplex(void **, int);
|
||||
int allocateMultiDoubleComplex(void **, int, int);
|
||||
int writeDoubleComplex(void *, cuDoubleComplex*, int);
|
||||
int writeMultiDoubleComplex(void *, cuDoubleComplex* , int , int );
|
||||
int readDoubleComplex(void *, cuDoubleComplex*, int);
|
||||
int readMultiDoubleComplex(void*, cuDoubleComplex*, int, int );
|
||||
int writeDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int);
|
||||
int readDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int);
|
||||
void freeDoubleComplex(void *);
|
||||
|
||||
double etime();
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,38 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
module cusparse_mod
|
||||
use base_cusparse_mod
|
||||
use s_cusparse_mod
|
||||
use d_cusparse_mod
|
||||
use c_cusparse_mod
|
||||
use z_cusparse_mod
|
||||
end module cusparse_mod
|
@ -0,0 +1,323 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <complex.h>
|
||||
//#include "utils.h"
|
||||
//#include "common.h"
|
||||
#include "cvectordev.h"
|
||||
|
||||
|
||||
int registerMappedFloatComplex(void *buff, void **d_p, int n, cuFloatComplex dummy)
|
||||
{
|
||||
return registerMappedMemory(buff,d_p,n*sizeof(cuFloatComplex));
|
||||
}
|
||||
|
||||
int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec)
|
||||
{ int i;
|
||||
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||
// Ex updateFromHost vector function
|
||||
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_,
|
||||
devVec->pitch_*devVec->count_*sizeof(cuFloatComplex));
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
int writeMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld)
|
||||
{ int i;
|
||||
i = writeMultiVecDeviceFloatComplex(deviceVec, (void *) hostVec);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatComplexR2",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
int readMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec)
|
||||
{ int i,j;
|
||||
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
|
||||
devVec->pitch_*devVec->count_*sizeof(cuFloatComplex));
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
int readMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld)
|
||||
{ int i;
|
||||
i = readMultiVecDeviceFloatComplex(deviceVec, hostVec);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatComplexR2",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last,
|
||||
int indexBase, void* devMultiVecX)
|
||||
{ int i=0;
|
||||
int pitch = 0;
|
||||
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiVecVal,
|
||||
int dupl, int indexBase, void* devMultiVecX)
|
||||
{ int j=0, i=0,nmin=0,nmax=0;
|
||||
int pitch = 0;
|
||||
cuFloatComplex beta;
|
||||
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
|
||||
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
pitch = devVecIrl->pitch_;
|
||||
if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
|
||||
return SPGPU_UNSUPPORTED;
|
||||
|
||||
//fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_);
|
||||
|
||||
if (dupl == INS_OVERWRITE)
|
||||
beta = make_cuFloatComplex(0.0, 0.0);
|
||||
else if (dupl == INS_ADD)
|
||||
beta = make_cuFloatComplex(1.0, 0.0);
|
||||
else
|
||||
beta = make_cuFloatComplex(0.0, 0.0);
|
||||
|
||||
spgpuCscat(handle, (cuFloatComplex *) devVecX->v_, n, (cuFloatComplex*)devVecVal->v_,
|
||||
(int*)devVecIrl->v_, indexBase, beta);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
|
||||
int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n,
|
||||
int first, void* deviceIdx, int hfirst,
|
||||
void* host_values, int indexBase)
|
||||
{
|
||||
int i, *idx;
|
||||
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
|
||||
|
||||
i= igathMultiVecDeviceFloatComplex(deviceVec, vectorId, n,
|
||||
first, (void*) devIdx->v_, hfirst, host_values, indexBase);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
|
||||
int first, void* indexes, int hfirst,
|
||||
void* host_values, int indexBase)
|
||||
{
|
||||
int i, *idx =(int *) indexes;;
|
||||
cuFloatComplex *hv = (cuFloatComplex *) host_values;;
|
||||
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
i=0;
|
||||
hv = &(hv[hfirst-indexBase]);
|
||||
idx = &(idx[first-indexBase]);
|
||||
spgpuCgath(handle,hv, n, idx,indexBase,
|
||||
(cuFloatComplex *) devVec->v_+vectorId*devVec->pitch_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n,
|
||||
int first, void *deviceIdx,
|
||||
int hfirst, void* host_values,
|
||||
int indexBase, cuFloatComplex beta)
|
||||
{
|
||||
int i, *idx;
|
||||
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
|
||||
i= iscatMultiVecDeviceFloatComplex(deviceVec, vectorId, n, first,
|
||||
(void*) devIdx->v_, hfirst,host_values,
|
||||
indexBase, beta);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
|
||||
int first, void *indexes,
|
||||
int hfirst, void* host_values,
|
||||
int indexBase, cuFloatComplex beta)
|
||||
{ int i=0;
|
||||
cuFloatComplex *hv = (cuFloatComplex *) host_values;
|
||||
int *idx=(int *) indexes;
|
||||
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
idx = &(idx[first-indexBase]);
|
||||
hv = &(hv[hfirst-indexBase]);
|
||||
spgpuCscat(handle, (cuFloatComplex *) devVec->v_, n, hv, idx, indexBase, beta);
|
||||
return SPGPU_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
|
||||
{ int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||
|
||||
spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
|
||||
devVecA->count_, devVecA->pitch_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
|
||||
{ int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||
|
||||
spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
|
||||
devVecA->count_, devVecA->pitch_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
|
||||
{ int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||
|
||||
spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
|
||||
devVecA->count_, devVecA->pitch_);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA)
|
||||
{ int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||
// Note: inner kernel can handle aliased input/output
|
||||
spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_,
|
||||
alpha, (cuFloatComplex *)devVecA->v_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n,
|
||||
void* devMultiVecA, void* devMultiVecB)
|
||||
{int i=0;
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_,
|
||||
(cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVecX,
|
||||
cuFloatComplex beta, void* devMultiVecY)
|
||||
{ int j=0, i=0;
|
||||
int pitch = 0;
|
||||
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
pitch = devVecY->pitch_;
|
||||
if ((n > devVecY->size_) || (n>devVecX->size_ ))
|
||||
return SPGPU_UNSUPPORTED;
|
||||
|
||||
for(j=0;j<devVecY->count_;j++)
|
||||
spgpuCaxpby(handle,(cuFloatComplex*)devVecY->v_+pitch*j, n, beta,
|
||||
(cuFloatComplex*)devVecY->v_+pitch*j, alpha,
|
||||
(cuFloatComplex*) devVecX->v_+pitch*j);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha,
|
||||
void *deviceVecA, void *deviceVecB)
|
||||
{ int i = 0;
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
if ((n > devVecA->size_) || (n>devVecB->size_ ))
|
||||
return SPGPU_UNSUPPORTED;
|
||||
|
||||
spgpuCmaxy(handle, (cuFloatComplex*)devVecB->v_, n, alpha,
|
||||
(cuFloatComplex*)devVecA->v_,
|
||||
(cuFloatComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA,
|
||||
void *deviceVecB, cuFloatComplex beta,
|
||||
void *deviceVecZ)
|
||||
{ int i=0;
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ ))
|
||||
return SPGPU_UNSUPPORTED;
|
||||
spgpuCmaxypbz(handle, (cuFloatComplex*)devVecZ->v_, n, beta,
|
||||
(cuFloatComplex*)devVecZ->v_,
|
||||
alpha, (cuFloatComplex*) devVecA->v_, (cuFloatComplex*) devVecB->v_,
|
||||
devVecB->count_, devVecB->pitch_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
|
||||
int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA,
|
||||
void *deviceVecB)
|
||||
{ int i=0;
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
if ((n > devVecA->size_) || (n>devVecB->size_ ))
|
||||
return SPGPU_UNSUPPORTED;
|
||||
|
||||
spgpuCabs(handle, (cuFloatComplex*)devVecB->v_, n,
|
||||
alpha, (cuFloatComplex*)devVecA->v_);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA)
|
||||
{ int i = 0;
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
if (n > devVecA->size_)
|
||||
return SPGPU_UNSUPPORTED;
|
||||
|
||||
spgpuCabs(handle, (cuFloatComplex*)devVecA->v_, n,
|
||||
alpha, (cuFloatComplex*)devVecA->v_);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,78 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#pragma once
|
||||
//#include "utils.h"
|
||||
#include <complex.h>
|
||||
#include "cuComplex.h"
|
||||
#include "vectordev.h"
|
||||
#include "cuda_runtime.h"
|
||||
#include "core.h"
|
||||
|
||||
int registerMappedFloatComplex(void *, void **, int, cuFloatComplex);
|
||||
int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
|
||||
int writeMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld);
|
||||
int readMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
|
||||
int readMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld);
|
||||
|
||||
int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last,
|
||||
int indexBase, void* devVecX);
|
||||
|
||||
int geinsMultiVecDeviceFloatComplex(int n, void* devVecIrl, void* devVecVal,
|
||||
int dupl, int indexBase, void* devVecX);
|
||||
|
||||
int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n,
|
||||
int first, void* deviceIdx, int hfirst,
|
||||
void* host_values, int indexBase);
|
||||
int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
|
||||
int first, void* indexes, int hfirst, void* host_values,
|
||||
int indexBase);
|
||||
int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, int first,
|
||||
void *deviceIdx, int hfirst, void* host_values,
|
||||
int indexBase, cuFloatComplex beta);
|
||||
int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int first, void *indexes,
|
||||
int hfirst, void* host_values, int indexBase, cuFloatComplex beta);
|
||||
|
||||
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA);
|
||||
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
|
||||
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
|
||||
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
|
||||
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
|
||||
|
||||
int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);
|
||||
int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB);
|
||||
int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA,
|
||||
void *deviceVecB, cuFloatComplex beta, void *deviceVecZ);
|
||||
int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA);
|
||||
int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha,
|
||||
void *deviceVecA, void *deviceVecB);
|
||||
|
@ -0,0 +1,313 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
module d_cusparse_mod
|
||||
use base_cusparse_mod
|
||||
|
||||
type, bind(c) :: d_Cmat
|
||||
type(c_ptr) :: Mat = c_null_ptr
|
||||
end type d_Cmat
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
type, bind(c) :: d_Hmat
|
||||
type(c_ptr) :: Mat = c_null_ptr
|
||||
end type d_Hmat
|
||||
#endif
|
||||
|
||||
interface CSRGDeviceFree
|
||||
function d_CSRGDeviceFree(Mat) &
|
||||
& bind(c,name="d_CSRGDeviceFree") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGDeviceFree
|
||||
end interface
|
||||
|
||||
interface CSRGDeviceSetMatType
|
||||
function d_CSRGDeviceSetMatType(Mat,type) &
|
||||
& bind(c,name="d_CSRGDeviceSetMatType") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGDeviceSetMatType
|
||||
end interface
|
||||
|
||||
interface CSRGDeviceSetMatFillMode
|
||||
function d_CSRGDeviceSetMatFillMode(Mat,type) &
|
||||
& bind(c,name="d_CSRGDeviceSetMatFillMode") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGDeviceSetMatFillMode
|
||||
end interface
|
||||
|
||||
interface CSRGDeviceSetMatDiagType
|
||||
function d_CSRGDeviceSetMatDiagType(Mat,type) &
|
||||
& bind(c,name="d_CSRGDeviceSetMatDiagType") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGDeviceSetMatDiagType
|
||||
end interface
|
||||
|
||||
interface CSRGDeviceSetMatIndexBase
|
||||
function d_CSRGDeviceSetMatIndexBase(Mat,type) &
|
||||
& bind(c,name="d_CSRGDeviceSetMatIndexBase") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGDeviceSetMatIndexBase
|
||||
end interface
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
interface CSRGDeviceCsrsmAnalysis
|
||||
function d_CSRGDeviceCsrsmAnalysis(Mat) &
|
||||
& bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGDeviceCsrsmAnalysis
|
||||
end interface
|
||||
#else
|
||||
interface CSRGIsNullSvBuffer
|
||||
function d_CSRGIsNullSvBuffer(Mat) &
|
||||
& bind(c,name="d_CSRGIsNullSvBuffer") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGIsNullSvBuffer
|
||||
end interface
|
||||
#endif
|
||||
|
||||
interface CSRGDeviceAlloc
|
||||
function d_CSRGDeviceAlloc(Mat,nr,nc,nz) &
|
||||
& bind(c,name="d_CSRGDeviceAlloc") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int), value :: nr, nc, nz
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGDeviceAlloc
|
||||
end interface
|
||||
|
||||
interface CSRGDeviceGetParms
|
||||
function d_CSRGDeviceGetParms(Mat,nr,nc,nz) &
|
||||
& bind(c,name="d_CSRGDeviceGetParms") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int) :: nr, nc, nz
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGDeviceGetParms
|
||||
end interface
|
||||
|
||||
interface spsvCSRGDevice
|
||||
function d_spsvCSRGDevice(Mat,alpha,x,beta,y) &
|
||||
& bind(c,name="d_spsvCSRGDevice") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
type(c_ptr), value :: x
|
||||
type(c_ptr), value :: y
|
||||
real(c_double), value :: alpha,beta
|
||||
integer(c_int) :: res
|
||||
end function d_spsvCSRGDevice
|
||||
end interface
|
||||
|
||||
interface spmvCSRGDevice
|
||||
function d_spmvCSRGDevice(Mat,alpha,x,beta,y) &
|
||||
& bind(c,name="d_spmvCSRGDevice") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
type(c_ptr), value :: x
|
||||
type(c_ptr), value :: y
|
||||
real(c_double), value :: alpha,beta
|
||||
integer(c_int) :: res
|
||||
end function d_spmvCSRGDevice
|
||||
end interface
|
||||
|
||||
interface CSRGHost2Device
|
||||
function d_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) &
|
||||
& bind(c,name="d_CSRGHost2Device") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int), value :: m,n,nz
|
||||
integer(c_int) :: irp(*), ja(*)
|
||||
real(c_double) :: val(*)
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGHost2Device
|
||||
end interface
|
||||
|
||||
interface CSRGDevice2Host
|
||||
function d_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) &
|
||||
& bind(c,name="d_CSRGDevice2Host") result(res)
|
||||
use iso_c_binding
|
||||
import d_Cmat
|
||||
type(d_Cmat) :: Mat
|
||||
integer(c_int), value :: m,n,nz
|
||||
integer(c_int) :: irp(*), ja(*)
|
||||
real(c_double) :: val(*)
|
||||
integer(c_int) :: res
|
||||
end function d_CSRGDevice2Host
|
||||
end interface
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
interface HYBGDeviceAlloc
|
||||
function d_HYBGDeviceAlloc(Mat,nr,nc,nz) &
|
||||
& bind(c,name="d_HYBGDeviceAlloc") result(res)
|
||||
use iso_c_binding
|
||||
import d_hmat
|
||||
type(d_Hmat) :: Mat
|
||||
integer(c_int), value :: nr, nc, nz
|
||||
integer(c_int) :: res
|
||||
end function d_HYBGDeviceAlloc
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceFree
|
||||
function d_HYBGDeviceFree(Mat) &
|
||||
& bind(c,name="d_HYBGDeviceFree") result(res)
|
||||
use iso_c_binding
|
||||
import d_Hmat
|
||||
type(d_Hmat) :: Mat
|
||||
integer(c_int) :: res
|
||||
end function d_HYBGDeviceFree
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceSetMatType
|
||||
function d_HYBGDeviceSetMatType(Mat,type) &
|
||||
& bind(c,name="d_HYBGDeviceSetMatType") result(res)
|
||||
use iso_c_binding
|
||||
import d_Hmat
|
||||
type(d_Hmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function d_HYBGDeviceSetMatType
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceSetMatFillMode
|
||||
function d_HYBGDeviceSetMatFillMode(Mat,type) &
|
||||
& bind(c,name="d_HYBGDeviceSetMatFillMode") result(res)
|
||||
use iso_c_binding
|
||||
import d_Hmat
|
||||
type(d_Hmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function d_HYBGDeviceSetMatFillMode
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceSetMatDiagType
|
||||
function d_HYBGDeviceSetMatDiagType(Mat,type) &
|
||||
& bind(c,name="d_HYBGDeviceSetMatDiagType") result(res)
|
||||
use iso_c_binding
|
||||
import d_Hmat
|
||||
type(d_Hmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function d_HYBGDeviceSetMatDiagType
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceSetMatIndexBase
|
||||
function d_HYBGDeviceSetMatIndexBase(Mat,type) &
|
||||
& bind(c,name="d_HYBGDeviceSetMatIndexBase") result(res)
|
||||
use iso_c_binding
|
||||
import d_Hmat
|
||||
type(d_Hmat) :: Mat
|
||||
integer(c_int),value :: type
|
||||
integer(c_int) :: res
|
||||
end function d_HYBGDeviceSetMatIndexBase
|
||||
end interface
|
||||
|
||||
interface HYBGDeviceHybsmAnalysis
|
||||
function d_HYBGDeviceHybsmAnalysis(Mat) &
|
||||
& bind(c,name="d_HYBGDeviceHybsmAnalysis") result(res)
|
||||
use iso_c_binding
|
||||
import d_Hmat
|
||||
type(d_Hmat) :: Mat
|
||||
integer(c_int) :: res
|
||||
end function d_HYBGDeviceHybsmAnalysis
|
||||
end interface
|
||||
|
||||
interface spsvHYBGDevice
|
||||
function d_spsvHYBGDevice(Mat,alpha,x,beta,y) &
|
||||
& bind(c,name="d_spsvHYBGDevice") result(res)
|
||||
use iso_c_binding
|
||||
import d_Hmat
|
||||
type(d_Hmat) :: Mat
|
||||
type(c_ptr), value :: x
|
||||
type(c_ptr), value :: y
|
||||
real(c_double), value :: alpha,beta
|
||||
integer(c_int) :: res
|
||||
end function d_spsvHYBGDevice
|
||||
end interface
|
||||
|
||||
interface spmvHYBGDevice
|
||||
function d_spmvHYBGDevice(Mat,alpha,x,beta,y) &
|
||||
& bind(c,name="d_spmvHYBGDevice") result(res)
|
||||
use iso_c_binding
|
||||
import d_Hmat
|
||||
type(d_Hmat) :: Mat
|
||||
type(c_ptr), value :: x
|
||||
type(c_ptr), value :: y
|
||||
real(c_double), value :: alpha,beta
|
||||
integer(c_int) :: res
|
||||
end function d_spmvHYBGDevice
|
||||
end interface
|
||||
|
||||
interface HYBGHost2Device
|
||||
function d_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) &
|
||||
& bind(c,name="d_HYBGHost2Device") result(res)
|
||||
use iso_c_binding
|
||||
import d_Hmat
|
||||
type(d_Hmat) :: Mat
|
||||
integer(c_int), value :: m,n,nz
|
||||
integer(c_int) :: irp(*), ja(*)
|
||||
real(c_double) :: val(*)
|
||||
integer(c_int) :: res
|
||||
end function d_HYBGHost2Device
|
||||
end interface
|
||||
|
||||
#endif
|
||||
|
||||
end module d_cusparse_mod
|
@ -0,0 +1,99 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cusparse_v2.h>
|
||||
#include "cintrf.h"
|
||||
#include "fcusparse.h"
|
||||
|
||||
|
||||
/* Double precision real */
|
||||
#define TYPE double
|
||||
#define CUSPARSE_BASE_TYPE CUDA_R_64F
|
||||
#define T_CSRGDeviceMat d_CSRGDeviceMat
|
||||
#define T_Cmat d_Cmat
|
||||
#define T_spmvCSRGDevice d_spmvCSRGDevice
|
||||
#define T_spsvCSRGDevice d_spsvCSRGDevice
|
||||
#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc
|
||||
#define T_CSRGDeviceFree d_CSRGDeviceFree
|
||||
#define T_CSRGHost2Device d_CSRGHost2Device
|
||||
#define T_CSRGDevice2Host d_CSRGDevice2Host
|
||||
#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode
|
||||
#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType
|
||||
#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType
|
||||
#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase
|
||||
#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis
|
||||
#define cusparseTcsrmv cusparseDcsrmv
|
||||
#define cusparseTcsrsv_solve cusparseDcsrsv_solve
|
||||
#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis
|
||||
#define T_HYBGDeviceMat d_HYBGDeviceMat
|
||||
#define T_Hmat d_Hmat
|
||||
#define T_HYBGDeviceFree d_HYBGDeviceFree
|
||||
#define T_spmvHYBGDevice d_spmvHYBGDevice
|
||||
#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc
|
||||
#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType
|
||||
#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase
|
||||
#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType
|
||||
#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode
|
||||
#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis
|
||||
#define T_spsvHYBGDevice d_spsvHYBGDevice
|
||||
#define T_HYBGHost2Device d_HYBGHost2Device
|
||||
#define cusparseThybmv cusparseDhybmv
|
||||
#define cusparseThybsv_solve cusparseDhybsv_solve
|
||||
#define cusparseThybsv_analysis cusparseDhybsv_analysis
|
||||
#define cusparseTcsr2hyb cusparseDcsr2hyb
|
||||
|
||||
#elif CUDA_VERSION < 11030
|
||||
|
||||
#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType
|
||||
#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase
|
||||
#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis
|
||||
#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize
|
||||
#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis
|
||||
#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve
|
||||
#else
|
||||
|
||||
#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer
|
||||
#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr
|
||||
#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr
|
||||
#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr
|
||||
|
||||
#endif
|
||||
|
||||
#include "fcusparse_fct.h"
|
||||
|
@ -0,0 +1,261 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
#include "diagdev.h"
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
//new
|
||||
DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, unsigned int diags, unsigned int elementType)
|
||||
{
|
||||
DiagDeviceParams params;
|
||||
|
||||
params.elementType = elementType;
|
||||
//numero di elementi di val
|
||||
params.rows = rows;
|
||||
params.columns = columns;
|
||||
params.diags = diags;
|
||||
|
||||
return params;
|
||||
|
||||
}
|
||||
//new
|
||||
int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params)
|
||||
{
|
||||
struct DiagDevice *tmp = (struct DiagDevice *)malloc(sizeof(struct DiagDevice));
|
||||
int ret=SPGPU_SUCCESS;
|
||||
*remoteMatrix = (void *)tmp;
|
||||
|
||||
tmp->rows = params->rows;
|
||||
|
||||
tmp->cols = params->columns;
|
||||
|
||||
tmp->diags = params->diags;
|
||||
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->off), tmp->diags*sizeof(int));
|
||||
|
||||
/* tmp->baseIndex = params->firstIndex; */
|
||||
|
||||
if (params->elementType == SPGPU_TYPE_INT)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(int));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_FLOAT)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(float));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(double));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuFloatComplex));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuDoubleComplex));
|
||||
}
|
||||
else
|
||||
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||
return ret;
|
||||
}
|
||||
|
||||
void freeDiagDevice(void* remoteMatrix)
|
||||
{
|
||||
struct DiagDevice *devMat = (struct DiagDevice *) remoteMatrix;
|
||||
//fprintf(stderr,"freeHllDevice\n");
|
||||
if (devMat != NULL) {
|
||||
freeRemoteBuffer(devMat->off);
|
||||
freeRemoteBuffer(devMat->cM);
|
||||
free(remoteMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
//new
|
||||
int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,unsigned int diags,unsigned int elementType)
|
||||
{ int i;
|
||||
DiagDeviceParams p;
|
||||
|
||||
p = getDiagDeviceParams(rows, columns, diags,elementType);
|
||||
i = allocDiagDevice(deviceMat, &p);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n)
|
||||
{ int i,fo,fa;
|
||||
char buf_a[255], buf_o[255],tmp[255];
|
||||
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||
// Ex updateFromHost function
|
||||
/* memset(buf_a,'\0',255); */
|
||||
/* memset(buf_o,'\0',255); */
|
||||
/* memset(tmp,'\0',255); */
|
||||
|
||||
/* strcat(buf_a,"mat_"); */
|
||||
/* strcat(buf_o,"off_"); */
|
||||
/* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */
|
||||
/* strcat(buf_a,tmp); */
|
||||
/* memset(tmp,'\0',255); */
|
||||
/* sprintf(tmp,"%d.dat",devMat->cols); */
|
||||
/* strcat(buf_o,tmp); */
|
||||
|
||||
/* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
|
||||
/* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
|
||||
|
||||
/* i = write(fa, a, sizeof(double)*devMat->cols*devMat->rows); */
|
||||
/* i = write(fo, off, sizeof(int)*devMat->cols); */
|
||||
|
||||
/* close(fa); */
|
||||
/* close(fo); */
|
||||
|
||||
i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(double));
|
||||
i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int));
|
||||
|
||||
if(i==0)
|
||||
return SPGPU_SUCCESS;
|
||||
else
|
||||
return SPGPU_UNSUPPORTED;
|
||||
}
|
||||
|
||||
int readDiagDeviceDouble(void* deviceMat, double* a, int* off)
|
||||
{ int i;
|
||||
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||
i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double));
|
||||
i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int));
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
//new
|
||||
int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||
double beta, void* deviceY)
|
||||
{
|
||||
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
#ifdef VERBOSE
|
||||
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||
#endif
|
||||
/* spgpuDdiagspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */
|
||||
|
||||
spgpuDdiaspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta);
|
||||
|
||||
//cudaSync();
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n)
|
||||
{ int i,fo,fa;
|
||||
char buf_a[255], buf_o[255],tmp[255];
|
||||
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||
// Ex updateFromHost function
|
||||
/* memset(buf_a,'\0',255); */
|
||||
/* memset(buf_o,'\0',255); */
|
||||
/* memset(tmp,'\0',255); */
|
||||
|
||||
/* strcat(buf_a,"mat_"); */
|
||||
/* strcat(buf_o,"off_"); */
|
||||
/* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */
|
||||
/* strcat(buf_a,tmp); */
|
||||
/* memset(tmp,'\0',255); */
|
||||
/* sprintf(tmp,"%d.dat",devMat->cols); */
|
||||
/* strcat(buf_o,tmp); */
|
||||
|
||||
/* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
|
||||
/* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
|
||||
|
||||
/* i = write(fa, a, sizeof(float)*devMat->cols*devMat->rows); */
|
||||
/* i = write(fo, off, sizeof(int)*devMat->cols); */
|
||||
|
||||
/* close(fa); */
|
||||
/* close(fo); */
|
||||
|
||||
i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(float));
|
||||
i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int));
|
||||
|
||||
if(i==0)
|
||||
return SPGPU_SUCCESS;
|
||||
else
|
||||
return SPGPU_UNSUPPORTED;
|
||||
}
|
||||
|
||||
int readDiagDeviceFloat(void* deviceMat, float* a, int* off)
|
||||
{ int i;
|
||||
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||
i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float));
|
||||
i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int));
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
//new
|
||||
int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||
float beta, void* deviceY)
|
||||
{
|
||||
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
#ifdef VERBOSE
|
||||
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||
#endif
|
||||
/* spgpuDdiagspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */
|
||||
|
||||
spgpuSdiaspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta);
|
||||
|
||||
//cudaSync();
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
@ -0,0 +1,90 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
#ifndef _DIAGDEV_H_
|
||||
#define _DIAGDEV_H_
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "dia.h"
|
||||
|
||||
struct DiagDevice
|
||||
{
|
||||
// Compressed matrix
|
||||
void *cM; //it can be float or double
|
||||
|
||||
// offset (same size of cM)
|
||||
int *off;
|
||||
|
||||
int rows;
|
||||
|
||||
int cols;
|
||||
|
||||
int diags;
|
||||
|
||||
};
|
||||
|
||||
typedef struct DiagDeviceParams
|
||||
{
|
||||
|
||||
unsigned int elementType;
|
||||
|
||||
// Number of rows.
|
||||
// Used to allocate rS array
|
||||
unsigned int rows;
|
||||
//unsigned int hackOffsLength;
|
||||
|
||||
// Number of columns.
|
||||
// Used for error-checking
|
||||
unsigned int columns;
|
||||
|
||||
unsigned int diags;
|
||||
|
||||
} DiagDeviceParams;
|
||||
DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns,
|
||||
unsigned int elementType, unsigned int firstIndex);
|
||||
int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int cols,
|
||||
unsigned int elementType, unsigned int firstIndex);
|
||||
int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params);
|
||||
void freeDiagDevice(void* remoteMatrix);
|
||||
|
||||
int readDiagDeviceDouble(void* deviceMat, double* a, int* off);
|
||||
int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n);
|
||||
int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||
double beta, void* deviceY);
|
||||
|
||||
int readDiagDeviceFloat(void* deviceMat, float* a, int* off);
|
||||
int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n);
|
||||
int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||
float beta, void* deviceY);
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,224 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
module diagdev_mod
|
||||
use iso_c_binding
|
||||
use core_mod
|
||||
|
||||
type, bind(c) :: diagdev_parms
|
||||
integer(c_int) :: element_type
|
||||
integer(c_int) :: rows
|
||||
integer(c_int) :: columns
|
||||
integer(c_int) :: firstIndex
|
||||
end type diagdev_parms
|
||||
|
||||
interface
|
||||
function FgetDiagDeviceParams(rows, columns, elementType, firstIndex) &
|
||||
& result(res) bind(c,name='getDiagDeviceParams')
|
||||
use iso_c_binding
|
||||
import :: diagdev_parms
|
||||
type(diagdev_parms) :: res
|
||||
integer(c_int), value :: rows,columns,elementType,firstIndex
|
||||
end function FgetDiagDeviceParams
|
||||
end interface
|
||||
|
||||
|
||||
interface
|
||||
function FallocDiagDevice(deviceMat,rows,columns,&
|
||||
& elementType,firstIndex) &
|
||||
& result(res) bind(c,name='FallocDiagDevice')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: rows,columns,elementType,firstIndex
|
||||
type(c_ptr) :: deviceMat
|
||||
end function FallocDiagDevice
|
||||
end interface
|
||||
|
||||
interface writeDiagDevice
|
||||
|
||||
function writeDiagDeviceFloat(deviceMat,a,off,n) &
|
||||
& result(res) bind(c,name='writeDiagDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: n
|
||||
real(c_float) :: a(n,*)
|
||||
integer(c_int) :: off(*)!,irn(*)
|
||||
end function writeDiagDeviceFloat
|
||||
|
||||
function writeDiagDeviceDouble(deviceMat,a,off,n) &
|
||||
& result(res) bind(c,name='writeDiagDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int),value :: n
|
||||
real(c_double) :: a(n,*)
|
||||
integer(c_int) :: off(*)
|
||||
end function writeDiagDeviceDouble
|
||||
|
||||
function writeDiagDeviceFloatComplex(deviceMat,a,off,n) &
|
||||
& result(res) bind(c,name='writeDiagDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: n
|
||||
complex(c_float_complex) :: a(n,*)
|
||||
integer(c_int) :: off(*)!,irn(*)
|
||||
end function writeDiagDeviceFloatComplex
|
||||
|
||||
function writeDiagDeviceDoubleComplex(deviceMat,a,off,n) &
|
||||
& result(res) bind(c,name='writeDiagDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: n
|
||||
complex(c_double_complex) :: a(n,*)
|
||||
integer(c_int) :: off(*)!,irn(*)
|
||||
end function writeDiagDeviceDoubleComplex
|
||||
|
||||
end interface
|
||||
|
||||
interface readDiagDevice
|
||||
|
||||
function readDiagDeviceFloat(deviceMat,a,off,n) &
|
||||
& result(res) bind(c,name='readDiagDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_float) :: a(n,*)
|
||||
integer(c_int) :: off(*)!,irn(*)
|
||||
end function readDiagDeviceFloat
|
||||
|
||||
function readDiagDeviceDouble(deviceMat,a,off,n) &
|
||||
& result(res) bind(c,name='readDiagDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int),value :: n
|
||||
real(c_double) :: a(n,*)
|
||||
integer(c_int) :: off(*)
|
||||
end function readDiagDeviceDouble
|
||||
|
||||
function readDiagDeviceFloatComplex(deviceMat,a,off,n) &
|
||||
& result(res) bind(c,name='readDiagDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: n
|
||||
complex(c_float_complex) :: a(n,*)
|
||||
integer(c_int) :: off(*)!,irn(*)
|
||||
end function readDiagDeviceFloatComplex
|
||||
|
||||
function readDiagDeviceDoubleComplex(deviceMat,a,off,n) &
|
||||
& result(res) bind(c,name='readDiagDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: n
|
||||
complex(c_double_complex) :: a(n,*)
|
||||
integer(c_int) :: off(*)!,irn(*)
|
||||
end function readDiagDeviceDoubleComplex
|
||||
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine freeDiagDevice(deviceMat) &
|
||||
& bind(c,name='freeDiagDevice')
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
end subroutine freeDiagDevice
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine resetDiagTimer() bind(c,name='resetDiagTimer')
|
||||
use iso_c_binding
|
||||
end subroutine resetDiagTimer
|
||||
end interface
|
||||
interface
|
||||
function getDiagTimer() &
|
||||
& bind(c,name='getDiagTimer') result(res)
|
||||
use iso_c_binding
|
||||
real(c_double) :: res
|
||||
end function getDiagTimer
|
||||
end interface
|
||||
|
||||
interface
|
||||
function getDiagDevicePitch(deviceMat) &
|
||||
& bind(c,name='getDiagDevicePitch') result(res)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int) :: res
|
||||
end function getDiagDevicePitch
|
||||
end interface
|
||||
|
||||
interface
|
||||
function getDiagDeviceMaxRowSize(deviceMat) &
|
||||
& bind(c,name='getDiagDeviceMaxRowSize') result(res)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int) :: res
|
||||
end function getDiagDeviceMaxRowSize
|
||||
end interface
|
||||
|
||||
|
||||
interface spmvDiagDevice
|
||||
function spmvDiagDeviceFloat(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvDiagDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
real(c_float),value :: alpha, beta
|
||||
end function spmvDiagDeviceFloat
|
||||
function spmvDiagDeviceDouble(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvDiagDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
real(c_double),value :: alpha, beta
|
||||
end function spmvDiagDeviceDouble
|
||||
function spmvDiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvDiagDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
complex(c_float_complex),value :: alpha, beta
|
||||
end function spmvDiagDeviceFloatComplex
|
||||
function spmvDiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvDiagDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
complex(c_double_complex),value :: alpha, beta
|
||||
end function spmvDiagDeviceDoubleComplex
|
||||
end interface spmvDiagDevice
|
||||
|
||||
end module diagdev_mod
|
@ -0,0 +1,321 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
#include <sys/time.h>
|
||||
#include "dnsdev.h"
|
||||
|
||||
#define PASS_RS 0
|
||||
|
||||
#define IMIN(a,b) ((a)<(b) ? (a) : (b))
|
||||
|
||||
DnsDeviceParams getDnsDeviceParams(unsigned int rows, unsigned int columns,
|
||||
unsigned int elementType, unsigned int firstIndex)
|
||||
{
|
||||
DnsDeviceParams params;
|
||||
|
||||
if (elementType == SPGPU_TYPE_DOUBLE)
|
||||
{
|
||||
params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D;
|
||||
}
|
||||
else
|
||||
{
|
||||
params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S;
|
||||
}
|
||||
//For complex?
|
||||
params.elementType = elementType;
|
||||
params.rows = rows;
|
||||
params.columns = columns;
|
||||
params.firstIndex = firstIndex;
|
||||
|
||||
return params;
|
||||
|
||||
}
|
||||
//new
|
||||
int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params)
|
||||
{
|
||||
struct DnsDevice *tmp = (struct DnsDevice *)malloc(sizeof(struct DnsDevice));
|
||||
*remoteMatrix = (void *)tmp;
|
||||
tmp->rows = params->rows;
|
||||
tmp->columns = params->columns;
|
||||
tmp->cMPitch = params->pitch;
|
||||
tmp->pitch= tmp->cMPitch;
|
||||
tmp->allocsize = (int)tmp->columns * tmp->pitch;
|
||||
tmp->baseIndex = params->firstIndex;
|
||||
//fprintf(stderr,"allocDnsDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize);
|
||||
if (params->elementType == SPGPU_TYPE_FLOAT)
|
||||
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
|
||||
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||||
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
|
||||
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
|
||||
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
|
||||
else
|
||||
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||
//fprintf(stderr,"From allocDnsDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
|
||||
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
void freeDnsDevice(void* remoteMatrix)
|
||||
{
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) remoteMatrix;
|
||||
//fprintf(stderr,"freeDnsDevice\n");
|
||||
if (devMat != NULL) {
|
||||
freeRemoteBuffer(devMat->cM);
|
||||
free(remoteMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
//new
|
||||
int FallocDnsDevice(void** deviceMat, unsigned int rows,
|
||||
unsigned int columns, unsigned int elementType,
|
||||
unsigned int firstIndex)
|
||||
{ int i;
|
||||
DnsDeviceParams p;
|
||||
|
||||
p = getDnsDeviceParams(rows, columns, elementType, firstIndex);
|
||||
i = allocDnsDevice(deviceMat, &p);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","FallocDnsDevice",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
|
||||
int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha,
|
||||
void *deviceMat, void* deviceX, float *beta, void* deviceY)
|
||||
{
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
int status;
|
||||
|
||||
cublasHandle_t handle=psb_cudaGetCublasHandle();
|
||||
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
|
||||
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
|
||||
if (n == 1) {
|
||||
status = cublasSgemv(handle, trans, m,k,
|
||||
alpha, devMat->cM,devMat->pitch, x->v_,1,
|
||||
beta, y->v_,1);
|
||||
} else {
|
||||
status = cublasSgemm(handle, trans, CUBLAS_OP_N, m,n,k,
|
||||
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
|
||||
beta, y->v_,y->pitch_);
|
||||
}
|
||||
|
||||
if (status == CUBLAS_STATUS_SUCCESS)
|
||||
return SPGPU_SUCCESS;
|
||||
else
|
||||
return SPGPU_UNSUPPORTED;
|
||||
}
|
||||
|
||||
int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha,
|
||||
void *deviceMat, void* deviceX, double *beta, void* deviceY)
|
||||
{
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
int status;
|
||||
|
||||
cublasHandle_t handle=psb_cudaGetCublasHandle();
|
||||
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
|
||||
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
|
||||
if (n == 1) {
|
||||
status = cublasDgemv(handle, trans, m,k,
|
||||
alpha, devMat->cM,devMat->pitch, x->v_,1,
|
||||
beta, y->v_,1);
|
||||
} else {
|
||||
status = cublasDgemm(handle, trans, CUBLAS_OP_N, m,n,k,
|
||||
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
|
||||
beta, y->v_,y->pitch_);
|
||||
}
|
||||
|
||||
if (status == CUBLAS_STATUS_SUCCESS)
|
||||
return SPGPU_SUCCESS;
|
||||
else
|
||||
return SPGPU_UNSUPPORTED;
|
||||
}
|
||||
|
||||
int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *alpha,
|
||||
void *deviceMat, void* deviceX, float complex *beta, void* deviceY)
|
||||
{
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
int status;
|
||||
|
||||
cublasHandle_t handle=psb_cudaGetCublasHandle();
|
||||
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
|
||||
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
|
||||
if (n == 1) {
|
||||
status = cublasCgemv(handle, trans, m,k,
|
||||
alpha, devMat->cM,devMat->pitch, x->v_,1,
|
||||
beta, y->v_,1);
|
||||
} else {
|
||||
status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k,
|
||||
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
|
||||
beta, y->v_,y->pitch_);
|
||||
}
|
||||
|
||||
if (status == CUBLAS_STATUS_SUCCESS)
|
||||
return SPGPU_SUCCESS;
|
||||
else
|
||||
return SPGPU_UNSUPPORTED;
|
||||
}
|
||||
|
||||
int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex *alpha,
|
||||
void *deviceMat, void* deviceX, double complex *beta, void* deviceY)
|
||||
{
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
int status;
|
||||
|
||||
cublasHandle_t handle=psb_cudaGetCublasHandle();
|
||||
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
|
||||
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
|
||||
if (n == 1) {
|
||||
status = cublasZgemv(handle, trans, m,k,
|
||||
alpha, devMat->cM,devMat->pitch, x->v_,1,
|
||||
beta, y->v_,1);
|
||||
} else {
|
||||
status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k,
|
||||
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
|
||||
beta, y->v_,y->pitch_);
|
||||
}
|
||||
|
||||
if (status == CUBLAS_STATUS_SUCCESS)
|
||||
return SPGPU_SUCCESS;
|
||||
else
|
||||
return SPGPU_UNSUPPORTED;
|
||||
}
|
||||
|
||||
|
||||
int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc)
|
||||
{ int i;
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
int pitch=devMat->pitch;
|
||||
i = cublasSetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloat",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc)
|
||||
{ int i;
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
int pitch=devMat->pitch;
|
||||
i = cublasSetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDouble",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc)
|
||||
{ int i;
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
int pitch=devMat->pitch;
|
||||
i = cublasSetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloatComplex",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc)
|
||||
{ int i;
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
int pitch=devMat->pitch;
|
||||
i = cublasSetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDoubleComplex",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc)
|
||||
{ int i;
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
int pitch=devMat->pitch;
|
||||
i = cublasGetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloat",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc)
|
||||
{ int i;
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
int pitch=devMat->pitch;
|
||||
i = cublasGetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDouble",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc)
|
||||
{ int i;
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
int pitch=devMat->pitch;
|
||||
i = cublasGetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloatComplex",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc)
|
||||
{ int i;
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
int pitch=devMat->pitch;
|
||||
i = cublasGetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDoubleComplex",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int getDnsDevicePitch(void* deviceMat)
|
||||
{ int i;
|
||||
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||
i = devMat->pitch;
|
||||
return(i);
|
||||
}
|
||||
|
@ -0,0 +1,117 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#ifndef _DNSDEV_H_
|
||||
#define _DNSDEV_H_
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "cuComplex.h"
|
||||
#include "cublas_v2.h"
|
||||
|
||||
|
||||
struct DnsDevice
|
||||
{
|
||||
// Compressed matrix
|
||||
void *cM; //it can be float or double
|
||||
|
||||
|
||||
//matrix size (uncompressed)
|
||||
int rows;
|
||||
int columns;
|
||||
|
||||
int pitch; //old
|
||||
|
||||
int cMPitch;
|
||||
|
||||
//allocation size (in elements)
|
||||
int allocsize;
|
||||
|
||||
/*(i.e. 0 for C, 1 for Fortran)*/
|
||||
int baseIndex;
|
||||
};
|
||||
|
||||
typedef struct DnsDeviceParams
|
||||
{
|
||||
// The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType)
|
||||
unsigned int elementType;
|
||||
|
||||
// Pitch (in number of elements)
|
||||
unsigned int pitch;
|
||||
|
||||
// Number of rows.
|
||||
// Used to allocate rS array
|
||||
unsigned int rows;
|
||||
|
||||
// Number of columns.
|
||||
// Used for error-checking
|
||||
unsigned int columns;
|
||||
|
||||
// First index (e.g 0 or 1)
|
||||
unsigned int firstIndex;
|
||||
} DnsDeviceParams;
|
||||
|
||||
int FallocDnsDevice(void** deviceMat, unsigned int rows,
|
||||
unsigned int columns, unsigned int elementType,
|
||||
unsigned int firstIndex);
|
||||
int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params);
|
||||
void freeDnsDevice(void* remoteMatrix);
|
||||
|
||||
int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc);
|
||||
int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc);
|
||||
int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc);
|
||||
int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc);
|
||||
|
||||
int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc);
|
||||
int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc);
|
||||
int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc);
|
||||
int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc);
|
||||
|
||||
int spmvDnsDeviceFloat(char transa, int m, int n, int k,
|
||||
float *alpha, void *deviceMat, void* deviceX,
|
||||
float *beta, void* deviceY);
|
||||
int spmvDnsDeviceDouble(char transa, int m, int n, int k,
|
||||
double *alpha, void *deviceMat, void* deviceX,
|
||||
double *beta, void* deviceY);
|
||||
int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k,
|
||||
float complex *alpha, void *deviceMat, void* deviceX,
|
||||
float complex *beta, void* deviceY);
|
||||
int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k,
|
||||
double complex *alpha, void *deviceMat, void* deviceX,
|
||||
double complex *beta, void* deviceY);
|
||||
|
||||
int getDnsDevicePitch(void* deviceMat);
|
||||
|
||||
// sparse Dns matrix-vector product
|
||||
//int spmvDnsDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY);
|
||||
//int spmvDnsDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY);
|
||||
|
||||
#endif
|
@ -0,0 +1,270 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
module dnsdev_mod
|
||||
use iso_c_binding
|
||||
use core_mod
|
||||
|
||||
type, bind(c) :: dnsdev_parms
|
||||
integer(c_int) :: element_type
|
||||
integer(c_int) :: pitch
|
||||
integer(c_int) :: rows
|
||||
integer(c_int) :: columns
|
||||
integer(c_int) :: maxRowSize
|
||||
integer(c_int) :: avgRowSize
|
||||
integer(c_int) :: firstIndex
|
||||
end type dnsdev_parms
|
||||
|
||||
interface
|
||||
function FgetDnsDeviceParams(rows, columns, elementType, firstIndex) &
|
||||
& result(res) bind(c,name='getDnsDeviceParams')
|
||||
use iso_c_binding
|
||||
import :: dnsdev_parms
|
||||
type(dnsdev_parms) :: res
|
||||
integer(c_int), value :: rows,columns,elementType,firstIndex
|
||||
end function FgetDnsDeviceParams
|
||||
end interface
|
||||
|
||||
|
||||
interface
|
||||
function FallocDnsDevice(deviceMat,rows,columns,&
|
||||
& elementType,firstIndex) &
|
||||
& result(res) bind(c,name='FallocDnsDevice')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: rows,columns,elementType,firstIndex
|
||||
type(c_ptr) :: deviceMat
|
||||
end function FallocDnsDevice
|
||||
end interface
|
||||
|
||||
|
||||
interface writeDnsDevice
|
||||
|
||||
function writeDnsDeviceFloat(deviceMat,val,lda,nc) &
|
||||
& result(res) bind(c,name='writeDnsDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: lda,nc
|
||||
real(c_float) :: val(lda,*)
|
||||
end function writeDnsDeviceFloat
|
||||
|
||||
|
||||
function writeDnsDeviceDouble(deviceMat,val,lda,nc) &
|
||||
& result(res) bind(c,name='writeDnsDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: lda,nc
|
||||
real(c_double) :: val(lda,*)
|
||||
end function writeDnsDeviceDouble
|
||||
|
||||
|
||||
function writeDnsDeviceFloatComplex(deviceMat,val,lda,nc) &
|
||||
& result(res) bind(c,name='writeDnsDeviceFloatComple')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: lda,nc
|
||||
complex(c_float_complex) :: val(lda,*)
|
||||
end function writeDnsDeviceFloatComplex
|
||||
|
||||
|
||||
function writeDnsDeviceDoubleComplex(deviceMat,val,lda,nc) &
|
||||
& result(res) bind(c,name='writeDnsDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: lda,nc
|
||||
complex(c_double_complex) :: val(lda,*)
|
||||
end function writeDnsDeviceDoubleComplex
|
||||
|
||||
end interface
|
||||
|
||||
interface readDnsDevice
|
||||
|
||||
function readDnsDeviceFloat(deviceMat,val,lda,nc) &
|
||||
& result(res) bind(c,name='readDnsDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: lda,nc
|
||||
real(c_float) :: val(lda,*)
|
||||
end function readDnsDeviceFloat
|
||||
|
||||
|
||||
function readDnsDeviceDouble(deviceMat,val,lda,nc) &
|
||||
& result(res) bind(c,name='readDnsDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: lda,nc
|
||||
real(c_double) :: val(lda,*)
|
||||
end function readDnsDeviceDouble
|
||||
|
||||
|
||||
function readDnsDeviceFloatComplex(deviceMat,val,lda,nc) &
|
||||
& result(res) bind(c,name='readDnsDeviceFloatComple')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: lda,nc
|
||||
complex(c_float_complex) :: val(lda,*)
|
||||
end function readDnsDeviceFloatComplex
|
||||
|
||||
|
||||
function readDnsDeviceDoubleComplex(deviceMat,val,lda,nc) &
|
||||
& result(res) bind(c,name='readDnsDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: lda,nc
|
||||
complex(c_double_complex) :: val(lda,*)
|
||||
end function readDnsDeviceDoubleComplex
|
||||
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine freeDnsDevice(deviceMat) &
|
||||
& bind(c,name='freeDnsDevice')
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
end subroutine freeDnsDevice
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine resetDnsTimer() bind(c,name='resetDnsTimer')
|
||||
use iso_c_binding
|
||||
end subroutine resetDnsTimer
|
||||
end interface
|
||||
interface
|
||||
function getDnsTimer() &
|
||||
& bind(c,name='getDnsTimer') result(res)
|
||||
use iso_c_binding
|
||||
real(c_double) :: res
|
||||
end function getDnsTimer
|
||||
end interface
|
||||
|
||||
|
||||
interface
|
||||
function getDnsDevicePitch(deviceMat) &
|
||||
& bind(c,name='getDnsDevicePitch') result(res)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int) :: res
|
||||
end function getDnsDevicePitch
|
||||
end interface
|
||||
|
||||
!!$ interface csputDnsDeviceFloat
|
||||
!!$ function dev_csputDnsDeviceFloat(deviceMat, nnz, ia, ja, val) &
|
||||
!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloat')
|
||||
!!$ use iso_c_binding
|
||||
!!$ integer(c_int) :: res
|
||||
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
|
||||
!!$ integer(c_int), value :: nnz
|
||||
!!$ end function dev_csputDnsDeviceFloat
|
||||
!!$ end interface
|
||||
!!$
|
||||
!!$ interface csputDnsDeviceDouble
|
||||
!!$ function dev_csputDnsDeviceDouble(deviceMat, nnz, ia, ja, val) &
|
||||
!!$ & result(res) bind(c,name='dev_csputDnsDeviceDouble')
|
||||
!!$ use iso_c_binding
|
||||
!!$ integer(c_int) :: res
|
||||
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
|
||||
!!$ integer(c_int), value :: nnz
|
||||
!!$ end function dev_csputDnsDeviceDouble
|
||||
!!$ end interface
|
||||
!!$
|
||||
!!$ interface csputDnsDeviceFloatComplex
|
||||
!!$ function dev_csputDnsDeviceFloatComplex(deviceMat, nnz, ia, ja, val) &
|
||||
!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloatComplex')
|
||||
!!$ use iso_c_binding
|
||||
!!$ integer(c_int) :: res
|
||||
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
|
||||
!!$ integer(c_int), value :: nnz
|
||||
!!$ end function dev_csputDnsDeviceFloatComplex
|
||||
!!$ end interface
|
||||
!!$
|
||||
!!$ interface csputDnsDeviceDoubleComplex
|
||||
!!$ function dev_csputDnsDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) &
|
||||
!!$ & result(res) bind(c,name='dev_csputDnsDeviceDoubleComplex')
|
||||
!!$ use iso_c_binding
|
||||
!!$ integer(c_int) :: res
|
||||
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
|
||||
!!$ integer(c_int), value :: nnz
|
||||
!!$ end function dev_csputDnsDeviceDoubleComplex
|
||||
!!$ end interface
|
||||
|
||||
interface spmvDnsDevice
|
||||
function spmvDnsDeviceFloat(transa,m,n,k,alpha,deviceMat,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvDnsDeviceFloat')
|
||||
use iso_c_binding
|
||||
character(c_char), value :: transa
|
||||
integer(c_int), value :: m, n, k
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
real(c_float) :: alpha, beta
|
||||
end function spmvDnsDeviceFloat
|
||||
|
||||
function spmvDnsDeviceDouble(transa,m,n,k,alpha,deviceMat,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvDnsDeviceDouble')
|
||||
use iso_c_binding
|
||||
character(c_char), value :: transa
|
||||
integer(c_int), value :: m, n, k
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
real(c_double) :: alpha, beta
|
||||
end function spmvDnsDeviceDouble
|
||||
|
||||
function spmvDnsDeviceFloatComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvDnsDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
character(c_char), value :: transa
|
||||
integer(c_int), value :: m, n, k
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
complex(c_float_complex) :: alpha, beta
|
||||
end function spmvDnsDeviceFloatComplex
|
||||
|
||||
function spmvDnsDeviceDoubleComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvDnsDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
character(c_char), value :: transa
|
||||
integer(c_int), value :: m, n, k
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
complex(c_double_complex) :: alpha, beta
|
||||
end function spmvDnsDeviceDoubleComplex
|
||||
|
||||
end interface
|
||||
|
||||
end module dnsdev_mod
|
@ -0,0 +1,301 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <complex.h>
|
||||
//#include "utils.h"
|
||||
//#include "common.h"
|
||||
#include "dvectordev.h"
|
||||
|
||||
|
||||
int registerMappedDouble(void *buff, void **d_p, int n, double dummy)
|
||||
{
|
||||
return registerMappedMemory(buff,d_p,n*sizeof(double));
|
||||
}
|
||||
|
||||
int writeMultiVecDeviceDouble(void* deviceVec, double* hostVec)
|
||||
{ int i;
|
||||
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||
// Ex updateFromHost vector function
|
||||
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(double));
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
int writeMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld)
|
||||
{ int i;
|
||||
i = writeMultiVecDeviceDouble(deviceVec, (void *) hostVec);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleR2",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
int readMultiVecDeviceDouble(void* deviceVec, double* hostVec)
|
||||
{ int i,j;
|
||||
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
|
||||
devVec->pitch_*devVec->count_*sizeof(double));
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDouble",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
int readMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld)
|
||||
{ int i;
|
||||
i = readMultiVecDeviceDouble(deviceVec, hostVec);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleR2",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
int setscalMultiVecDeviceDouble(double val, int first, int last,
|
||||
int indexBase, void* devMultiVecX)
|
||||
{ int i=0;
|
||||
int pitch = 0;
|
||||
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
|
||||
int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal,
|
||||
int dupl, int indexBase, void* devMultiVecX)
|
||||
{ int j=0, i=0,nmin=0,nmax=0;
|
||||
int pitch = 0;
|
||||
double beta;
|
||||
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
|
||||
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
pitch = devVecIrl->pitch_;
|
||||
if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
|
||||
return SPGPU_UNSUPPORTED;
|
||||
|
||||
//fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_);
|
||||
|
||||
if (dupl == INS_OVERWRITE)
|
||||
beta = 0.0;
|
||||
else if (dupl == INS_ADD)
|
||||
beta = 1.0;
|
||||
else
|
||||
beta = 0.0;
|
||||
|
||||
spgpuDscat(handle, (double *) devVecX->v_, n, (double*)devVecVal->v_,
|
||||
(int*)devVecIrl->v_, indexBase, beta);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
|
||||
int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n,
|
||||
int first, void* deviceIdx, int hfirst,
|
||||
void* host_values, int indexBase)
|
||||
{
|
||||
int i, *idx;
|
||||
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
|
||||
|
||||
i= igathMultiVecDeviceDouble(deviceVec, vectorId, n,
|
||||
first, (void*) devIdx->v_, hfirst, host_values, indexBase);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n,
|
||||
int first, void* indexes, int hfirst, void* host_values, int indexBase)
|
||||
{
|
||||
int i, *idx =(int *) indexes;;
|
||||
double *hv = (double *) host_values;;
|
||||
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
i=0;
|
||||
hv = &(hv[hfirst-indexBase]);
|
||||
idx = &(idx[first-indexBase]);
|
||||
spgpuDgath(handle,hv, n, idx,indexBase, (double *) devVec->v_+vectorId*devVec->pitch_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx,
|
||||
int hfirst, void* host_values, int indexBase, double beta)
|
||||
{
|
||||
int i, *idx;
|
||||
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
|
||||
i= iscatMultiVecDeviceDouble(deviceVec, vectorId, n, first,
|
||||
(void*) devIdx->v_, hfirst,host_values, indexBase, beta);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes,
|
||||
int hfirst, void* host_values, int indexBase, double beta)
|
||||
{ int i=0;
|
||||
double *hv = (double *) host_values;
|
||||
int *idx=(int *) indexes;
|
||||
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
idx = &(idx[first-indexBase]);
|
||||
hv = &(hv[hfirst-indexBase]);
|
||||
spgpuDscat(handle, (double *) devVec->v_, n, hv, idx, indexBase, beta);
|
||||
return SPGPU_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA)
|
||||
{ int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||
// Note: inner kernel can handle aliased input/output
|
||||
spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_,
|
||||
alpha, (double *)devVecA->v_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
|
||||
{ int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||
|
||||
spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
|
||||
{ int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||
|
||||
spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
|
||||
{ int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||
|
||||
spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devMultiVecB)
|
||||
{int i=0;
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
|
||||
double beta, void* devMultiVecY)
|
||||
{ int j=0, i=0;
|
||||
int pitch = 0;
|
||||
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
pitch = devVecY->pitch_;
|
||||
if ((n > devVecY->size_) || (n>devVecX->size_ ))
|
||||
return SPGPU_UNSUPPORTED;
|
||||
|
||||
for(j=0;j<devVecY->count_;j++)
|
||||
spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta,
|
||||
(double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB)
|
||||
{ int i = 0;
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
if ((n > devVecA->size_) || (n>devVecB->size_ ))
|
||||
return SPGPU_UNSUPPORTED;
|
||||
|
||||
spgpuDmaxy(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_,
|
||||
(double*)devVecB->v_, devVecA->count_, devVecA->pitch_);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
|
||||
void *deviceVecB, double beta, void *deviceVecZ)
|
||||
{ int i=0;
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ ))
|
||||
return SPGPU_UNSUPPORTED;
|
||||
spgpuDmaxypbz(handle, (double*)devVecZ->v_, n, beta, (double*)devVecZ->v_,
|
||||
alpha, (double*) devVecA->v_, (double*) devVecB->v_,
|
||||
devVecB->count_, devVecB->pitch_);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA,
|
||||
void *deviceVecB)
|
||||
{ int i=0;
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
if ((n > devVecA->size_) || (n>devVecB->size_ ))
|
||||
return SPGPU_UNSUPPORTED;
|
||||
|
||||
spgpuDabs(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA)
|
||||
{ int i = 0;
|
||||
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
if (n > devVecA->size_)
|
||||
return SPGPU_UNSUPPORTED;
|
||||
|
||||
spgpuDabs(handle, (double*)devVecA->v_, n, alpha, (double*)devVecA->v_);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
@ -0,0 +1,75 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#pragma once
|
||||
//#include "utils.h"
|
||||
#include "vectordev.h"
|
||||
#include "cuda_runtime.h"
|
||||
#include "core.h"
|
||||
|
||||
int registerMappedDouble(void *, void **, int, double);
|
||||
int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);
|
||||
int writeMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld);
|
||||
int readMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);
|
||||
int readMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld);
|
||||
|
||||
int setscalMultiVecDeviceDouble(double val, int first, int last,
|
||||
int indexBase, void* devVecX);
|
||||
|
||||
int geinsMultiVecDeviceDouble(int n, void* devVecIrl, void* devVecVal,
|
||||
int dupl, int indexBase, void* devVecX);
|
||||
|
||||
int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n,
|
||||
int first, void* deviceIdx, int hfirst,
|
||||
void* host_values, int indexBase);
|
||||
int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n,
|
||||
int first, void* indexes, int hfirst, void* host_values,
|
||||
int indexBase);
|
||||
int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first,
|
||||
void *deviceIdx, int hfirst, void* host_values,
|
||||
int indexBase, double beta);
|
||||
int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes,
|
||||
int hfirst, void* host_values, int indexBase, double beta);
|
||||
|
||||
int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA);
|
||||
int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devVecA);
|
||||
int amaxMultiVecDeviceDouble(double* y_res, int n, void* devVecA);
|
||||
int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA);
|
||||
int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB);
|
||||
|
||||
int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY);
|
||||
int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB);
|
||||
int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
|
||||
void *deviceVecB, double beta, void *deviceVecZ);
|
||||
int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA);
|
||||
int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, void *deviceVecB);
|
||||
|
@ -0,0 +1,686 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
#include <sys/time.h>
|
||||
#include "elldev.h"
|
||||
|
||||
#define PASS_RS 0
|
||||
|
||||
EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize,
|
||||
unsigned int nnzeros,
|
||||
unsigned int columns, unsigned int elementType,
|
||||
unsigned int firstIndex)
|
||||
{
|
||||
EllDeviceParams params;
|
||||
|
||||
if (elementType == SPGPU_TYPE_DOUBLE)
|
||||
{
|
||||
params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D;
|
||||
}
|
||||
else
|
||||
{
|
||||
params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S;
|
||||
}
|
||||
//For complex?
|
||||
params.elementType = elementType;
|
||||
|
||||
params.rows = rows;
|
||||
params.maxRowSize = maxRowSize;
|
||||
params.avgRowSize = (nnzeros+rows-1)/rows;
|
||||
params.columns = columns;
|
||||
params.firstIndex = firstIndex;
|
||||
|
||||
//params.pitch = computeEllAllocPitch(rows);
|
||||
|
||||
return params;
|
||||
|
||||
}
|
||||
//new
|
||||
int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params)
|
||||
{
|
||||
struct EllDevice *tmp = (struct EllDevice *)malloc(sizeof(struct EllDevice));
|
||||
*remoteMatrix = (void *)tmp;
|
||||
tmp->rows = params->rows;
|
||||
tmp->cMPitch = computeEllAllocPitch(tmp->rows);
|
||||
tmp->rPPitch = tmp->cMPitch;
|
||||
tmp->pitch= tmp->cMPitch;
|
||||
tmp->maxRowSize = params->maxRowSize;
|
||||
tmp->avgRowSize = params->avgRowSize;
|
||||
tmp->allocsize = (int)tmp->maxRowSize * tmp->pitch;
|
||||
//tmp->allocsize = (int)params->maxRowSize * tmp->cMPitch;
|
||||
allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int));
|
||||
tmp->columns = params->columns;
|
||||
tmp->baseIndex = params->firstIndex;
|
||||
tmp->dataType = params->elementType;
|
||||
//fprintf(stderr,"allocEllDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize);
|
||||
if (params->elementType == SPGPU_TYPE_FLOAT)
|
||||
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
|
||||
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||||
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
|
||||
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
|
||||
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
|
||||
else
|
||||
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||
//fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
|
||||
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
//new
|
||||
void zeroEllDevice(void *remoteMatrix)
|
||||
{
|
||||
struct EllDevice *tmp = (struct EllDevice *) remoteMatrix;
|
||||
|
||||
if (tmp->dataType == SPGPU_TYPE_FLOAT)
|
||||
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(float));
|
||||
else if (tmp->dataType == SPGPU_TYPE_DOUBLE)
|
||||
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(double));
|
||||
else if (tmp->dataType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuFloatComplex));
|
||||
else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex));
|
||||
else
|
||||
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||
//fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
|
||||
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void freeEllDevice(void* remoteMatrix)
|
||||
{
|
||||
struct EllDevice *devMat = (struct EllDevice *) remoteMatrix;
|
||||
//fprintf(stderr,"freeEllDevice\n");
|
||||
if (devMat != NULL) {
|
||||
freeRemoteBuffer(devMat->rS);
|
||||
freeRemoteBuffer(devMat->rP);
|
||||
freeRemoteBuffer(devMat->cM);
|
||||
free(remoteMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
//new
|
||||
int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize,
|
||||
unsigned int nnzeros,
|
||||
unsigned int columns, unsigned int elementType,
|
||||
unsigned int firstIndex)
|
||||
{ int i;
|
||||
EllDeviceParams p;
|
||||
|
||||
p = getEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex);
|
||||
i = allocEllDevice(deviceMat, &p);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, int* rP, int* rS,
|
||||
int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex)
|
||||
{
|
||||
int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
for (i=0; i<s; i++)
|
||||
{
|
||||
if (PASS_RS) {
|
||||
spgpuSellspmv (handle, (float*) z, (float*)y, alpha, (float*) cM, rP, pitch, pitch, rS,
|
||||
NULL, avgRowSize, maxRowSize, rows, (float*)x, beta, firstIndex);
|
||||
} else {
|
||||
spgpuSellspmv (handle, (float*) z, (float*)y, alpha, (float*) cM, rP, pitch, pitch, NULL,
|
||||
NULL, avgRowSize, maxRowSize, rows, (float*)x, beta, firstIndex);
|
||||
}
|
||||
z += vPitch;
|
||||
y += vPitch;
|
||||
x += vPitch;
|
||||
}
|
||||
}
|
||||
//new
|
||||
int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||
float beta, void* deviceY)
|
||||
{ int i=SPGPU_SUCCESS;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
|
||||
#ifdef VERBOSE
|
||||
__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");
|
||||
__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");
|
||||
__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");
|
||||
#endif
|
||||
/*spgpuSellspmv (handle, (float*) y->v_, (float*)y->v_, alpha,
|
||||
(float*) devMat->cM, devMat->rP, devMat->cMPitch,
|
||||
devMat->rPPitch, devMat->rS, devMat->rows,
|
||||
(float*)x->v_, beta, devMat->baseIndex);*/
|
||||
sspmdmm_gpu ( (float *)y->v_,y->count_, y->pitch_, (float *)y->v_, alpha, (float *)devMat->cM, devMat->rP, devMat->rS,
|
||||
devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch,
|
||||
(float *)x->v_, beta, devMat->baseIndex);
|
||||
return(i);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, int* rP,
|
||||
int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
|
||||
double *x, double beta, int firstIndex)
|
||||
{
|
||||
int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
for (i=0; i<s; i++)
|
||||
{
|
||||
if (PASS_RS) {
|
||||
spgpuDellspmv (handle, (double*) z, (double*)y, alpha, (double*) cM, rP,
|
||||
pitch, pitch, rS,
|
||||
NULL, avgRowSize, maxRowSize, rows, (double*)x, beta, firstIndex);
|
||||
} else {
|
||||
spgpuDellspmv (handle, (double*) z, (double*)y, alpha, (double*) cM, rP,
|
||||
pitch, pitch, NULL,
|
||||
NULL, avgRowSize, maxRowSize, rows, (double*)x, beta, firstIndex);
|
||||
}
|
||||
z += vPitch;
|
||||
y += vPitch;
|
||||
x += vPitch;
|
||||
}
|
||||
}
|
||||
|
||||
//new
|
||||
int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||
double beta, void* deviceY)
|
||||
{
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
|
||||
/*spgpuDellspmv (handle, (double*) y->v_, (double*)y->v_, alpha, (double*) devMat->cM, devMat->rP, devMat->cMPitch, devMat->rPPitch, devMat->rS, devMat->rows, (double*)x->v_, beta, devMat->baseIndex);*/
|
||||
/* fprintf(stderr,"From spmvEllDouble: mat %d %d %d %d y %d %d \n", */
|
||||
/* devMat->avgRowSize, devMat->maxRowSize, devMat->rows, */
|
||||
/* devMat->pitch, y->count_, y->pitch_); */
|
||||
dspmdmm_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_,
|
||||
alpha, (double *)devMat->cM,
|
||||
devMat->rP, devMat->rS, devMat->avgRowSize,
|
||||
devMat->maxRowSize, devMat->rows, devMat->pitch,
|
||||
(double *)x->v_, beta, devMat->baseIndex);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y,
|
||||
cuFloatComplex alpha, cuFloatComplex* cM,
|
||||
int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
|
||||
cuFloatComplex *x, cuFloatComplex beta, int firstIndex)
|
||||
{
|
||||
int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
for (i=0; i<s; i++)
|
||||
{
|
||||
if (PASS_RS) {
|
||||
spgpuCellspmv (handle, (cuFloatComplex *) z, (cuFloatComplex *)y, alpha, (cuFloatComplex *) cM, rP,
|
||||
pitch, pitch, rS, NULL, avgRowSize, maxRowSize, rows, (cuFloatComplex *) x, beta, firstIndex);
|
||||
} else {
|
||||
spgpuCellspmv (handle, (cuFloatComplex *) z, (cuFloatComplex *)y, alpha, (cuFloatComplex *) cM, rP,
|
||||
pitch, pitch, NULL, NULL, avgRowSize, maxRowSize, rows, (cuFloatComplex *) x, beta, firstIndex);
|
||||
}
|
||||
z += vPitch;
|
||||
y += vPitch;
|
||||
x += vPitch;
|
||||
}
|
||||
}
|
||||
|
||||
int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
|
||||
float complex beta, void* deviceY)
|
||||
{
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
|
||||
cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
|
||||
cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta));
|
||||
cspmdmm_gpu ((cuFloatComplex *)y->v_, y->count_, y->pitch_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM,
|
||||
devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch,
|
||||
(cuFloatComplex *)x->v_, b, devMat->baseIndex);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* cM,
|
||||
int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
|
||||
cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex)
|
||||
{
|
||||
int i=0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
for (i=0; i<s; i++)
|
||||
{
|
||||
if (PASS_RS) {
|
||||
spgpuZellspmv (handle, (cuDoubleComplex *) z, (cuDoubleComplex *)y, alpha, (cuDoubleComplex *) cM, rP,
|
||||
pitch, pitch, rS, NULL, avgRowSize, maxRowSize, rows, (cuDoubleComplex *) x, beta, firstIndex);
|
||||
} else {
|
||||
spgpuZellspmv (handle, (cuDoubleComplex *) z, (cuDoubleComplex *)y, alpha, (cuDoubleComplex *) cM, rP,
|
||||
pitch, pitch, NULL, NULL, avgRowSize, maxRowSize, rows, (cuDoubleComplex *) x, beta, firstIndex);
|
||||
}
|
||||
z += vPitch;
|
||||
y += vPitch;
|
||||
x += vPitch;
|
||||
}
|
||||
}
|
||||
|
||||
int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
|
||||
double complex beta, void* deviceY)
|
||||
{
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
|
||||
cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
|
||||
cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta));
|
||||
zspmdmm_gpu ((cuDoubleComplex *)y->v_, y->count_, y->pitch_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM,
|
||||
devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows,
|
||||
devMat->pitch, (cuDoubleComplex *)x->v_, b, devMat->baseIndex);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
// Ex updateFromHost function
|
||||
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||||
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
//i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
// Ex updateFromHost function
|
||||
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||||
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
// Ex updateFromHost function
|
||||
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
||||
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
// Ex updateFromHost function
|
||||
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
||||
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||||
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
/*i = readEllDevice(deviceMat, (void *) val, ja, irn);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||||
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
||||
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
||||
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int getEllDevicePitch(void* deviceMat)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
i = devMat->pitch; //old
|
||||
//i = getPitchEllDevice(deviceMat);
|
||||
return(i);
|
||||
}
|
||||
|
||||
int getEllDeviceMaxRowSize(void* deviceMat)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
i = devMat->maxRowSize;
|
||||
return(i);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// New copying interface
|
||||
|
||||
int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||
int *idisp, int *ja, float *val, void *deviceMat)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
float *devVal;
|
||||
int *devIdisp, *devJa;
|
||||
spgpuHandle_t handle;
|
||||
handle = psb_cudaGetHandle();
|
||||
|
||||
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float));
|
||||
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float));
|
||||
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||
|
||||
if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
||||
(int *) devMat->rS,devIdisp,devJa,devVal,
|
||||
(int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM);
|
||||
// Ex updateFromHost function
|
||||
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||||
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
|
||||
|
||||
freeRemoteBuffer(devIdisp);
|
||||
freeRemoteBuffer(devJa);
|
||||
freeRemoteBuffer(devVal);
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||
int *idisp, int *ja, double *val, void *deviceMat)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
double *devVal;
|
||||
int *devIdisp, *devJa;
|
||||
spgpuHandle_t handle;
|
||||
handle = psb_cudaGetHandle();
|
||||
|
||||
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double));
|
||||
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double));
|
||||
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||
|
||||
if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
||||
(int *) devMat->rS,devIdisp,devJa,devVal,
|
||||
(int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM);
|
||||
// Ex updateFromHost function
|
||||
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||||
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
|
||||
|
||||
freeRemoteBuffer(devIdisp);
|
||||
freeRemoteBuffer(devJa);
|
||||
freeRemoteBuffer(devVal);
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||
int *idisp, int *ja, float complex *val, void *deviceMat)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
float complex *devVal;
|
||||
int *devIdisp, *devJa;
|
||||
spgpuHandle_t handle;
|
||||
handle = psb_cudaGetHandle();
|
||||
|
||||
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex));
|
||||
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex));
|
||||
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||
|
||||
if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
||||
(int *) devMat->rS,devIdisp,devJa,devVal,
|
||||
(int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM);
|
||||
// Ex updateFromHost function
|
||||
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float complex));
|
||||
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
|
||||
|
||||
freeRemoteBuffer(devIdisp);
|
||||
freeRemoteBuffer(devJa);
|
||||
freeRemoteBuffer(devVal);
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloatComplex",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||
int *idisp, int *ja, double complex *val, void *deviceMat)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
double complex *devVal;
|
||||
int *devIdisp, *devJa;
|
||||
spgpuHandle_t handle;
|
||||
handle = psb_cudaGetHandle();
|
||||
|
||||
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex));
|
||||
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex));
|
||||
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||
|
||||
if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
||||
(int *) devMat->rS,devIdisp,devJa,devVal,
|
||||
(int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM);
|
||||
// Ex updateFromHost function
|
||||
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double complex));
|
||||
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
|
||||
|
||||
freeRemoteBuffer(devIdisp);
|
||||
freeRemoteBuffer(devJa);
|
||||
freeRemoteBuffer(devVal);
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDoubleComplex",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *val)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
||||
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
||||
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
||||
float alpha=1.0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
if (nnz <=0) return SPGPU_SUCCESS;
|
||||
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
||||
|
||||
spgpuSellcsput(handle,alpha,(float *) devMat->cM,
|
||||
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
||||
nnz, devIa->v_, devJa->v_, (float *) devVal->v_, 1);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void *val)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
||||
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
||||
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
||||
double alpha=1.0;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
if (nnz <=0) return SPGPU_SUCCESS;
|
||||
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
||||
|
||||
spgpuDellcsput(handle,alpha,(double *) devMat->cM,
|
||||
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
||||
nnz, devIa->v_, devJa->v_, (double *) devVal->v_, 1);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz,
|
||||
void *ia, void *ja, void *val)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
||||
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
||||
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
||||
cuFloatComplex alpha = make_cuFloatComplex(1.0, 0.0);
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
if (nnz <=0) return SPGPU_SUCCESS;
|
||||
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
||||
|
||||
spgpuCellcsput(handle,alpha,(cuFloatComplex *) devMat->cM,
|
||||
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
||||
nnz, devIa->v_, devJa->v_, (cuFloatComplex *) devVal->v_, 1);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz,
|
||||
void *ia, void *ja, void *val)
|
||||
{ int i;
|
||||
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
||||
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
||||
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
||||
cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
if (nnz <=0) return SPGPU_SUCCESS;
|
||||
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
||||
|
||||
spgpuZellcsput(handle,alpha,(cuDoubleComplex *) devMat->cM,
|
||||
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
||||
nnz, devIa->v_, devJa->v_, (cuDoubleComplex *) devVal->v_, 1);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,177 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#ifndef _ELLDEV_H_
|
||||
#define _ELLDEV_H_
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "cuComplex.h"
|
||||
#include "ell.h"
|
||||
|
||||
struct EllDevice
|
||||
{
|
||||
// Compressed matrix
|
||||
void *cM; //it can be float or double
|
||||
|
||||
// row pointers (same size of cM)
|
||||
int *rP;
|
||||
int *diag;
|
||||
// row size
|
||||
int *rS;
|
||||
|
||||
//matrix size (uncompressed)
|
||||
int rows;
|
||||
int columns;
|
||||
|
||||
int pitch; //old
|
||||
|
||||
int cMPitch;
|
||||
|
||||
int rPPitch;
|
||||
|
||||
int maxRowSize;
|
||||
int avgRowSize;
|
||||
|
||||
//allocation size (in elements)
|
||||
int allocsize;
|
||||
|
||||
/*(i.e. 0 for C, 1 for Fortran)*/
|
||||
int baseIndex;
|
||||
/* real/complex, single/double */
|
||||
int dataType;
|
||||
|
||||
};
|
||||
|
||||
typedef struct EllDeviceParams
|
||||
{
|
||||
// The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType)
|
||||
unsigned int elementType;
|
||||
|
||||
// Pitch (in number of elements)
|
||||
unsigned int pitch;
|
||||
|
||||
// Number of rows.
|
||||
// Used to allocate rS array
|
||||
unsigned int rows;
|
||||
|
||||
// Number of columns.
|
||||
// Used for error-checking
|
||||
unsigned int columns;
|
||||
|
||||
// Largest row size
|
||||
unsigned int maxRowSize;
|
||||
unsigned int avgRowSize;
|
||||
|
||||
// First index (e.g 0 or 1)
|
||||
unsigned int firstIndex;
|
||||
} EllDeviceParams;
|
||||
|
||||
int FallocEllDevice(void** deviceMat, unsigned int rows, unsigned int maxRowSize,
|
||||
unsigned int nnzeros,
|
||||
unsigned int columns, unsigned int elementType,
|
||||
unsigned int firstIndex);
|
||||
int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params);
|
||||
void freeEllDevice(void* remoteMatrix);
|
||||
|
||||
int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag);
|
||||
int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag);
|
||||
int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag);
|
||||
int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag);
|
||||
|
||||
int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag);
|
||||
int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag);
|
||||
int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag);
|
||||
int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag);
|
||||
|
||||
int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||
float beta, void* deviceY);
|
||||
int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||
double beta, void* deviceY);
|
||||
int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
|
||||
float complex beta, void* deviceY);
|
||||
int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
|
||||
double complex beta, void* deviceY);
|
||||
|
||||
|
||||
|
||||
int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||
int *idisp, int *ja, float *val, void *deviceMat);
|
||||
|
||||
int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||
int *idisp, int *ja, double *val, void *deviceMat);
|
||||
|
||||
int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||
int *idisp, int *ja, float complex *val, void *deviceMat);
|
||||
|
||||
int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||
int *idisp, int *ja, double complex *val, void *deviceMat);
|
||||
|
||||
|
||||
void psi_cuda_s_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
|
||||
int hacksz, int ldv, int nzm,
|
||||
int *rS,int *devIdisp, int *devJa, float *devVal,
|
||||
int *idiag, int *rP, float *cM);
|
||||
|
||||
void psi_cuda_d_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
|
||||
int hacksz, int ldv, int nzm,
|
||||
int *rS,int *devIdisp, int *devJa, double *devVal,
|
||||
int *idiag, int *rP, double *cM);
|
||||
|
||||
void psi_cuda_c_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
|
||||
int hacksz, int ldv, int nzm,
|
||||
int *rS,int *devIdisp, int *devJa, float complex *devVal,
|
||||
int *idiag, int *rP, float complex *cM);
|
||||
|
||||
void psi_cuda_z_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
|
||||
int hacksz, int ldv, int nzm,
|
||||
int *rS,int *devIdisp, int *devJa, double complex *devVal,
|
||||
int *idiag, int *rP, double complex *cM);
|
||||
|
||||
|
||||
int dev_csputEllDeviceFloat(void* deviceMat, int nnz,
|
||||
void *ia, void *ja, void *val);
|
||||
int dev_csputEllDeviceDouble(void* deviceMat, int nnz,
|
||||
void *ia, void *ja, void *val);
|
||||
int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz,
|
||||
void *ia, void *ja, void *val);
|
||||
int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz,
|
||||
void *ia, void *ja, void *val);
|
||||
|
||||
void zeroEllDevice(void* deviceMat);
|
||||
|
||||
int getEllDevicePitch(void* deviceMat);
|
||||
|
||||
// sparse Ell matrix-vector product
|
||||
//int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY);
|
||||
//int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY);
|
||||
|
||||
#endif
|
@ -0,0 +1,321 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
module elldev_mod
|
||||
use iso_c_binding
|
||||
use core_mod
|
||||
|
||||
type, bind(c) :: elldev_parms
|
||||
integer(c_int) :: element_type
|
||||
integer(c_int) :: pitch
|
||||
integer(c_int) :: rows
|
||||
integer(c_int) :: columns
|
||||
integer(c_int) :: maxRowSize
|
||||
integer(c_int) :: avgRowSize
|
||||
integer(c_int) :: firstIndex
|
||||
end type elldev_parms
|
||||
|
||||
interface
|
||||
function FgetEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex) &
|
||||
& result(res) bind(c,name='getEllDeviceParams')
|
||||
use iso_c_binding
|
||||
import :: elldev_parms
|
||||
type(elldev_parms) :: res
|
||||
integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex
|
||||
end function FgetEllDeviceParams
|
||||
end interface
|
||||
|
||||
|
||||
interface
|
||||
function FallocEllDevice(deviceMat,rows,maxRowSize,nnzeros,columns,&
|
||||
& elementType,firstIndex) &
|
||||
& result(res) bind(c,name='FallocEllDevice')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex
|
||||
type(c_ptr) :: deviceMat
|
||||
end function FallocEllDevice
|
||||
end interface
|
||||
|
||||
|
||||
interface writeEllDevice
|
||||
|
||||
function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) &
|
||||
& result(res) bind(c,name='writeEllDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: ldj
|
||||
real(c_float) :: val(ldj,*)
|
||||
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||
end function writeEllDeviceFloat
|
||||
|
||||
function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) &
|
||||
& result(res) bind(c,name='writeEllDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: ldj
|
||||
real(c_double) :: val(ldj,*)
|
||||
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||
end function writeEllDeviceDouble
|
||||
|
||||
function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) &
|
||||
& result(res) bind(c,name='writeEllDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: ldj
|
||||
complex(c_float_complex) :: val(ldj,*)
|
||||
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||
end function writeEllDeviceFloatComplex
|
||||
|
||||
function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) &
|
||||
& result(res) bind(c,name='writeEllDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: ldj
|
||||
complex(c_double_complex) :: val(ldj,*)
|
||||
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||
end function writeEllDeviceDoubleComplex
|
||||
|
||||
end interface
|
||||
|
||||
interface readEllDevice
|
||||
|
||||
function readEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) &
|
||||
& result(res) bind(c,name='readEllDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: ldj
|
||||
real(c_float) :: val(ldj,*)
|
||||
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||
end function readEllDeviceFloat
|
||||
|
||||
function readEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) &
|
||||
& result(res) bind(c,name='readEllDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: ldj
|
||||
real(c_double) :: val(ldj,*)
|
||||
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||
end function readEllDeviceDouble
|
||||
|
||||
function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) &
|
||||
& result(res) bind(c,name='readEllDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: ldj
|
||||
complex(c_float_complex) :: val(ldj,*)
|
||||
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||
end function readEllDeviceFloatComplex
|
||||
|
||||
function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) &
|
||||
& result(res) bind(c,name='readEllDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int), value :: ldj
|
||||
complex(c_double_complex) :: val(ldj,*)
|
||||
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||
end function readEllDeviceDoubleComplex
|
||||
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine freeEllDevice(deviceMat) &
|
||||
& bind(c,name='freeEllDevice')
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
end subroutine freeEllDevice
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine zeroEllDevice(deviceMat) &
|
||||
& bind(c,name='zeroEllDevice')
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
end subroutine zeroEllDevice
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine resetEllTimer() bind(c,name='resetEllTimer')
|
||||
use iso_c_binding
|
||||
end subroutine resetEllTimer
|
||||
end interface
|
||||
interface
|
||||
function getEllTimer() &
|
||||
& bind(c,name='getEllTimer') result(res)
|
||||
use iso_c_binding
|
||||
real(c_double) :: res
|
||||
end function getEllTimer
|
||||
end interface
|
||||
|
||||
|
||||
interface
|
||||
function getEllDevicePitch(deviceMat) &
|
||||
& bind(c,name='getEllDevicePitch') result(res)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int) :: res
|
||||
end function getEllDevicePitch
|
||||
end interface
|
||||
|
||||
interface
|
||||
function getEllDeviceMaxRowSize(deviceMat) &
|
||||
& bind(c,name='getEllDeviceMaxRowSize') result(res)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int) :: res
|
||||
end function getEllDeviceMaxRowSize
|
||||
end interface
|
||||
|
||||
|
||||
interface psi_CopyCooToElg
|
||||
function psiCopyCooToElgFloat(nr, nc, nza, hacksz, ldv, nzm, irn, &
|
||||
& idisp, ja, val, deviceMat) &
|
||||
& result(res) bind(c,name='psiCopyCooToElgFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_float) :: val(*)
|
||||
integer(c_int) :: irn(*),idisp(*),ja(*)
|
||||
end function psiCopyCooToElgFloat
|
||||
function psiCopyCooToElgDouble(nr, nc, nza, hacksz, ldv, nzm, irn, &
|
||||
& idisp, ja, val, deviceMat) &
|
||||
& result(res) bind(c,name='psiCopyCooToElgDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_double) :: val(*)
|
||||
integer(c_int) :: irn(*),idisp(*),ja(*)
|
||||
end function psiCopyCooToElgDouble
|
||||
function psiCopyCooToElgFloatComplex(nr, nc, nza, hacksz, ldv, nzm, irn, &
|
||||
& idisp, ja, val, deviceMat) &
|
||||
& result(res) bind(c,name='psiCopyCooToElgFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
|
||||
type(c_ptr), value :: deviceMat
|
||||
complex(c_float_complex) :: val(*)
|
||||
integer(c_int) :: irn(*),idisp(*),ja(*)
|
||||
end function psiCopyCooToElgFloatComplex
|
||||
function psiCopyCooToElgDoubleComplex(nr, nc, nza, hacksz, ldv, nzm, irn, &
|
||||
& idisp, ja, val, deviceMat) &
|
||||
& result(res) bind(c,name='psiCopyCooToElgDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
|
||||
type(c_ptr), value :: deviceMat
|
||||
complex(c_double_complex) :: val(*)
|
||||
integer(c_int) :: irn(*),idisp(*),ja(*)
|
||||
end function psiCopyCooToElgDoubleComplex
|
||||
end interface
|
||||
|
||||
interface csputEllDeviceFloat
|
||||
function dev_csputEllDeviceFloat(deviceMat, nnz, ia, ja, val) &
|
||||
& result(res) bind(c,name='dev_csputEllDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat , ia, ja, val
|
||||
integer(c_int), value :: nnz
|
||||
end function dev_csputEllDeviceFloat
|
||||
end interface
|
||||
|
||||
interface csputEllDeviceDouble
|
||||
function dev_csputEllDeviceDouble(deviceMat, nnz, ia, ja, val) &
|
||||
& result(res) bind(c,name='dev_csputEllDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat , ia, ja, val
|
||||
integer(c_int), value :: nnz
|
||||
end function dev_csputEllDeviceDouble
|
||||
end interface
|
||||
|
||||
interface csputEllDeviceFloatComplex
|
||||
function dev_csputEllDeviceFloatComplex(deviceMat, nnz, ia, ja, val) &
|
||||
& result(res) bind(c,name='dev_csputEllDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat , ia, ja, val
|
||||
integer(c_int), value :: nnz
|
||||
end function dev_csputEllDeviceFloatComplex
|
||||
end interface
|
||||
|
||||
interface csputEllDeviceDoubleComplex
|
||||
function dev_csputEllDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) &
|
||||
& result(res) bind(c,name='dev_csputEllDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat , ia, ja, val
|
||||
integer(c_int), value :: nnz
|
||||
end function dev_csputEllDeviceDoubleComplex
|
||||
end interface
|
||||
|
||||
interface spmvEllDevice
|
||||
function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvEllDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
real(c_float),value :: alpha, beta
|
||||
end function spmvEllDeviceFloat
|
||||
function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvEllDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
real(c_double),value :: alpha, beta
|
||||
end function spmvEllDeviceDouble
|
||||
function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvEllDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
complex(c_float_complex),value :: alpha, beta
|
||||
end function spmvEllDeviceFloatComplex
|
||||
function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvEllDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
complex(c_double_complex),value :: alpha, beta
|
||||
end function spmvEllDeviceDoubleComplex
|
||||
end interface
|
||||
|
||||
end module elldev_mod
|
@ -0,0 +1,76 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "cintrf.h"
|
||||
#include "fcusparse.h"
|
||||
|
||||
static cusparseHandle_t *cusparse_handle=NULL;
|
||||
|
||||
|
||||
void setHandle(cusparseHandle_t);
|
||||
|
||||
int FcusparseCreate()
|
||||
{
|
||||
int ret=CUSPARSE_STATUS_SUCCESS;
|
||||
cusparseHandle_t *handle;
|
||||
if (cusparse_handle == NULL) {
|
||||
if ((handle = (cusparseHandle_t *)malloc(sizeof(cusparseHandle_t)))==NULL)
|
||||
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
|
||||
ret = (int)cusparseCreate(handle);
|
||||
if (ret == CUSPARSE_STATUS_SUCCESS)
|
||||
cusparse_handle = handle;
|
||||
}
|
||||
fprintf(stderr,"Created cusparses_handle\n");
|
||||
return (ret);
|
||||
}
|
||||
|
||||
int FcusparseDestroy()
|
||||
{
|
||||
int val;
|
||||
if (cusparse_handle!=NULL){
|
||||
val = (int) cusparseDestroy(*cusparse_handle);
|
||||
free(cusparse_handle);
|
||||
}
|
||||
cusparse_handle=NULL;
|
||||
return(val);
|
||||
}
|
||||
cusparseHandle_t *getHandle()
|
||||
{
|
||||
if (cusparse_handle == NULL)
|
||||
FcusparseCreate();
|
||||
return(cusparse_handle);
|
||||
}
|
||||
|
@ -0,0 +1,68 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#ifndef FCUSPARSE_
|
||||
#define FCUSPARSE_
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
#include <cusparse_v2.h>
|
||||
#else
|
||||
#include <cusparse.h>
|
||||
#endif
|
||||
#include "cintrf.h"
|
||||
|
||||
int FcusparseCreate();
|
||||
int FcusparseDestroy();
|
||||
cusparseHandle_t *getHandle();
|
||||
|
||||
#define CHECK_CUDA(func) \
|
||||
{ \
|
||||
cudaError_t status = (func); \
|
||||
if (status != cudaSuccess) { \
|
||||
printf("CUDA API failed at line %d with error: %s (%d)\n", \
|
||||
__LINE__, cudaGetErrorString(status), status); \
|
||||
return EXIT_FAILURE; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHECK_CUSPARSE(func) \
|
||||
{ \
|
||||
cusparseStatus_t status = (func); \
|
||||
if (status != CUSPARSE_STATUS_SUCCESS) { \
|
||||
printf("CUSPARSE API failed at line %d with error: %s (%d)\n", \
|
||||
__LINE__, cusparseGetErrorString(status), status); \
|
||||
return EXIT_FAILURE; \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,824 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
typedef struct T_CSRGDeviceMat
|
||||
{
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
cusparseMatDescr_t descr;
|
||||
cusparseSolveAnalysisInfo_t triang;
|
||||
#elif CUDA_VERSION < 11030
|
||||
cusparseMatDescr_t descr;
|
||||
csrsv2Info_t triang;
|
||||
size_t mvbsize, svbsize;
|
||||
void *mvbuffer, *svbuffer;
|
||||
#else
|
||||
cusparseSpMatDescr_t *spmvDescr;
|
||||
cusparseSpSVDescr_t *spsvDescr;
|
||||
size_t mvbsize, svbsize;
|
||||
void *mvbuffer, *svbuffer;
|
||||
#endif
|
||||
int m, n, nz;
|
||||
TYPE *val;
|
||||
int *irp;
|
||||
int *ja;
|
||||
} T_CSRGDeviceMat;
|
||||
|
||||
/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */
|
||||
typedef struct T_Cmat
|
||||
{
|
||||
T_CSRGDeviceMat *mat;
|
||||
} T_Cmat;
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
typedef struct T_HYBGDeviceMat
|
||||
{
|
||||
cusparseMatDescr_t descr;
|
||||
cusparseSolveAnalysisInfo_t triang;
|
||||
cusparseHybMat_t hybA;
|
||||
int m, n, nz;
|
||||
TYPE *val;
|
||||
int *irp;
|
||||
int *ja;
|
||||
} T_HYBGDeviceMat;
|
||||
|
||||
|
||||
/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */
|
||||
typedef struct T_Hmat
|
||||
{
|
||||
T_HYBGDeviceMat *mat;
|
||||
} T_Hmat;
|
||||
#endif
|
||||
|
||||
int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX,
|
||||
TYPE beta, void *deviceY);
|
||||
int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX,
|
||||
TYPE beta, void *deviceY);
|
||||
int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz);
|
||||
int T_CSRGDeviceFree(T_Cmat *Mat);
|
||||
|
||||
|
||||
int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz,
|
||||
int *irp, int *ja, TYPE *val);
|
||||
int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz,
|
||||
int *irp, int *ja, TYPE *val);
|
||||
|
||||
int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz);
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type);
|
||||
int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type);
|
||||
int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type);
|
||||
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type);
|
||||
int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat);
|
||||
#elif CUDA_VERSION < 11030
|
||||
int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type);
|
||||
int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type);
|
||||
int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type);
|
||||
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type);
|
||||
#else
|
||||
|
||||
int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat);
|
||||
int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat);
|
||||
int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat);
|
||||
int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
|
||||
|
||||
int T_HYBGDeviceFree(T_Hmat *Matrix);
|
||||
int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
|
||||
TYPE beta, void *deviceY);
|
||||
int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz);
|
||||
int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type);
|
||||
int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type);
|
||||
int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type);
|
||||
int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type);
|
||||
int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix);
|
||||
int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
|
||||
TYPE beta, void *deviceY);
|
||||
int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz,
|
||||
int *irp, int *ja, TYPE *val);
|
||||
#endif
|
||||
|
||||
int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
|
||||
TYPE beta, void *deviceY)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat=Matrix->mat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
void *vX, *vY;
|
||||
int r,n;
|
||||
cusparseHandle_t *my_handle=getHandle();
|
||||
TYPE ealpha=alpha, ebeta=beta;
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
/* getAddrMultiVecDevice(deviceX, &vX); */
|
||||
/* getAddrMultiVecDevice(deviceY, &vY); */
|
||||
vX=x->v_;
|
||||
vY=y->v_;
|
||||
|
||||
CHECK_CUSPARSE(cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr,
|
||||
cMat->val, cMat->irp, cMat->ja,
|
||||
(const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY));
|
||||
|
||||
#elif CUDA_VERSION < 11030
|
||||
size_t bfsz;
|
||||
vX=x->v_;
|
||||
vY=y->v_;
|
||||
#if 1
|
||||
CHECK_CUSPARSE(cusparseCsrmvEx_bufferSize(*my_handle,CUSPARSE_ALG_MERGE_PATH,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
cMat->m,cMat->n,cMat->nz,
|
||||
(const void *) &ealpha,CUSPARSE_BASE_TYPE,
|
||||
cMat->descr,
|
||||
(const void *) cMat->val,
|
||||
CUSPARSE_BASE_TYPE,
|
||||
(const int *) cMat->irp,
|
||||
(const int *) cMat->ja,
|
||||
(const void *) vX, CUSPARSE_BASE_TYPE,
|
||||
(const void *) &ebeta, CUSPARSE_BASE_TYPE,
|
||||
(void *) vY, CUSPARSE_BASE_TYPE,
|
||||
CUSPARSE_BASE_TYPE, &bfsz));
|
||||
#else
|
||||
bfsz=cMat->nz;
|
||||
#endif
|
||||
|
||||
if (bfsz > cMat->mvbsize) {
|
||||
if (cMat->mvbuffer != NULL) {
|
||||
CHECK_CUDA(cudaFree(cMat->mvbuffer));
|
||||
cMat->mvbuffer = NULL;
|
||||
}
|
||||
CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz));
|
||||
cMat->mvbsize = bfsz;
|
||||
}
|
||||
CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle,
|
||||
CUSPARSE_ALG_MERGE_PATH,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
cMat->m,cMat->n,cMat->nz,
|
||||
(const void *) &ealpha,CUSPARSE_BASE_TYPE,
|
||||
cMat->descr,
|
||||
(const void *) cMat->val, CUSPARSE_BASE_TYPE,
|
||||
(const int *) cMat->irp, (const int *) cMat->ja,
|
||||
(const void *) vX, CUSPARSE_BASE_TYPE,
|
||||
(const void *) &ebeta, CUSPARSE_BASE_TYPE,
|
||||
(void *) vY, CUSPARSE_BASE_TYPE,
|
||||
CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer));
|
||||
|
||||
#else
|
||||
cusparseDnVecDescr_t vecX, vecY;
|
||||
size_t bfsz;
|
||||
|
||||
if (T_CSRGIsNullMvDescr(cMat)) {
|
||||
cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *));
|
||||
}
|
||||
T_CSRGCreateSpMVDescr(cMat);
|
||||
vX=x->v_;
|
||||
vY=y->v_;
|
||||
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
|
||||
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
|
||||
CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
&alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
|
||||
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
|
||||
&bfsz));
|
||||
if (bfsz > cMat->mvbsize) {
|
||||
if (cMat->mvbuffer != NULL) {
|
||||
CHECK_CUDA(cudaFree(cMat->mvbuffer));
|
||||
cMat->mvbuffer = NULL;
|
||||
}
|
||||
CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz));
|
||||
cMat->mvbsize = bfsz;
|
||||
}
|
||||
CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
&alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
|
||||
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
|
||||
cMat->mvbuffer));
|
||||
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
|
||||
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
|
||||
CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr)));
|
||||
#endif
|
||||
}
|
||||
|
||||
int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
|
||||
TYPE beta, void *deviceY)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat=Matrix->mat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
void *vX, *vY;
|
||||
int r,n;
|
||||
cusparseHandle_t *my_handle=getHandle();
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
vX=x->v_;
|
||||
vY=y->v_;
|
||||
|
||||
return cusparseTcsrsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
cMat->m,(const TYPE *) &alpha,cMat->descr,
|
||||
cMat->val, cMat->irp, cMat->ja, cMat->triang,
|
||||
(const TYPE *) vX, (TYPE *) vY);
|
||||
#elif CUDA_VERSION < 11030
|
||||
vX=x->v_;
|
||||
vY=y->v_;
|
||||
CHECK_CUSPARSE(cusparseTcsrsv2_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
cMat->m,cMat->nz,
|
||||
(const TYPE *) &alpha,
|
||||
cMat->descr,
|
||||
cMat->val, cMat->irp, cMat->ja,
|
||||
cMat->triang,
|
||||
(const TYPE *) vX, (TYPE *) vY,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||
(void *) cMat->svbuffer));
|
||||
#else
|
||||
cusparseDnVecDescr_t vecX, vecY;
|
||||
size_t bfsz;
|
||||
vX=x->v_;
|
||||
vY=y->v_;
|
||||
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
|
||||
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
|
||||
if (T_CSRGIsNullMvDescr(cMat)) {
|
||||
cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *));
|
||||
}
|
||||
T_CSRGCreateSpMVDescr(cMat);
|
||||
// fprintf(stderr,"Entry to SpSVDevice: %d %p\n",
|
||||
// T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr);
|
||||
if (T_CSRGIsNullSvDescr(cMat)) {
|
||||
cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *));
|
||||
cMat->svbsize=0;
|
||||
CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) );
|
||||
//fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n",
|
||||
// T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize);
|
||||
CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
&alpha,*(cMat->spmvDescr),vecX,vecY,
|
||||
CUSPARSE_BASE_TYPE,
|
||||
CUSPARSE_SPSV_ALG_DEFAULT,
|
||||
*(cMat->spsvDescr),
|
||||
&bfsz));
|
||||
if (bfsz > cMat->svbsize) {
|
||||
if (cMat->svbuffer != NULL) {
|
||||
CHECK_CUDA(cudaFree(cMat->svbuffer));
|
||||
cMat->svbuffer = NULL;
|
||||
}
|
||||
CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz));
|
||||
cMat->svbsize=bfsz;
|
||||
CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
&alpha,
|
||||
*(cMat->spmvDescr),
|
||||
vecX, vecY,
|
||||
CUSPARSE_BASE_TYPE,
|
||||
CUSPARSE_SPSV_ALG_DEFAULT,
|
||||
*(cMat->spsvDescr),
|
||||
cMat->svbuffer));
|
||||
}
|
||||
if (T_CSRGIsNullSvBuffer(cMat)) {
|
||||
fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n");
|
||||
}
|
||||
}
|
||||
CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
&alpha,*(cMat->spmvDescr),vecX,vecY,
|
||||
CUSPARSE_BASE_TYPE,
|
||||
CUSPARSE_SPSV_ALG_DEFAULT,
|
||||
*(cMat->spsvDescr)));
|
||||
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
|
||||
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
|
||||
CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr)));
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CUDA_VERSION >= 11030
|
||||
T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat)
|
||||
{
|
||||
int64_t tr,tc,tz;
|
||||
tr = cMat->m;
|
||||
tc = cMat->n;
|
||||
tz = cMat->nz;
|
||||
CHECK_CUSPARSE(cusparseCreateCsr(cMat->spmvDescr,
|
||||
tr,tc,tz,
|
||||
(void *) cMat->irp,
|
||||
(void *) cMat->ja,
|
||||
(void *) cMat->val,
|
||||
CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_BASE_ONE,
|
||||
CUSPARSE_BASE_TYPE) );
|
||||
}
|
||||
#endif
|
||||
int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat;
|
||||
int nr1=nr, nz1=nz, rc;
|
||||
cusparseHandle_t *my_handle=getHandle();
|
||||
int bfsz;
|
||||
|
||||
if ((nr<0)||(nc<0)||(nz<0))
|
||||
return((int) CUSPARSE_STATUS_INVALID_VALUE);
|
||||
if ((cMat=(T_CSRGDeviceMat *) malloc(sizeof(T_CSRGDeviceMat)))==NULL)
|
||||
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
|
||||
cMat->m = nr;
|
||||
cMat->n = nc;
|
||||
cMat->nz = nz;
|
||||
if (nr1 == 0) nr1 = 1;
|
||||
if (nz1 == 0) nz1 = 1;
|
||||
if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), ((nr1+1)*sizeof(int)))) != 0)
|
||||
return(rc);
|
||||
if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), ((nz1)*sizeof(int)))) != 0)
|
||||
return(rc);
|
||||
if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), ((nz1)*sizeof(TYPE)))) != 0)
|
||||
return(rc);
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0)
|
||||
return(rc);
|
||||
if ((rc= cusparseCreateSolveAnalysisInfo(&(cMat->triang))) !=0)
|
||||
return(rc);
|
||||
#elif CUDA_VERSION < 11030
|
||||
if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0)
|
||||
return(rc);
|
||||
CHECK_CUSPARSE(cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL));
|
||||
CHECK_CUSPARSE(cusparseSetMatDiagType(cMat->descr,CUSPARSE_DIAG_TYPE_NON_UNIT));
|
||||
CHECK_CUSPARSE(cusparseSetMatIndexBase(cMat->descr,CUSPARSE_INDEX_BASE_ONE));
|
||||
CHECK_CUSPARSE(cusparseCreateCsrsv2Info(&(cMat->triang)));
|
||||
if (cMat->nz > 0) {
|
||||
CHECK_CUSPARSE(cusparseTcsrsv2_bufferSize(*my_handle,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
cMat->m,cMat->nz, cMat->descr,
|
||||
cMat->val, cMat->irp, cMat->ja,
|
||||
cMat->triang, &bfsz));
|
||||
} else {
|
||||
bfsz = 0;
|
||||
}
|
||||
|
||||
/* if (cMat->svbuffer != NULL) { */
|
||||
/* fprintf(stderr,"Calling cudaFree\n"); */
|
||||
/* CHECK_CUDA(cudaFree(cMat->svbuffer)); */
|
||||
/* cMat->svbuffer = NULL; */
|
||||
/* } */
|
||||
if (bfsz > 0) {
|
||||
CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz));
|
||||
} else {
|
||||
cMat->svbuffer=NULL;
|
||||
}
|
||||
cMat->svbsize=bfsz;
|
||||
|
||||
cMat->mvbuffer=NULL;
|
||||
cMat->mvbsize = 0;
|
||||
|
||||
|
||||
#else
|
||||
|
||||
cMat->spmvDescr=NULL;
|
||||
cMat->spsvDescr=NULL;
|
||||
cMat->mvbuffer=NULL;
|
||||
cMat->svbuffer=NULL;
|
||||
cMat->mvbsize=0;
|
||||
cMat->svbsize=0;
|
||||
#endif
|
||||
Matrix->mat = cMat;
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
int T_CSRGDeviceFree(T_Cmat *Matrix)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
|
||||
if (cMat!=NULL) {
|
||||
freeRemoteBuffer(cMat->irp);
|
||||
freeRemoteBuffer(cMat->ja);
|
||||
freeRemoteBuffer(cMat->val);
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
cusparseDestroyMatDescr(cMat->descr);
|
||||
cusparseDestroySolveAnalysisInfo(cMat->triang);
|
||||
#elif CUDA_VERSION < 11030
|
||||
cusparseDestroyMatDescr(cMat->descr);
|
||||
cusparseDestroyCsrsv2Info(cMat->triang);
|
||||
#else
|
||||
if (!T_CSRGIsNullMvDescr(cMat)) {
|
||||
// already destroyed spmvDescr, just free the pointer
|
||||
free(cMat->spmvDescr);
|
||||
cMat->spmvDescr=NULL;
|
||||
}
|
||||
if (cMat->mvbuffer!=NULL)
|
||||
CHECK_CUDA( cudaFree(cMat->mvbuffer));
|
||||
cMat->mvbuffer=NULL;
|
||||
cMat->mvbsize=0;
|
||||
if (!T_CSRGIsNullSvDescr(cMat)) {
|
||||
CHECK_CUSPARSE(cusparseSpSV_destroyDescr(*(cMat->spsvDescr)));
|
||||
free(cMat->spsvDescr);
|
||||
cMat->spsvDescr=NULL;
|
||||
}
|
||||
if (cMat->svbuffer!=NULL)
|
||||
CHECK_CUDA( cudaFree(cMat->svbuffer));
|
||||
cMat->svbuffer=NULL;
|
||||
cMat->svbsize=0;
|
||||
#endif
|
||||
free(cMat);
|
||||
Matrix->mat = NULL;
|
||||
}
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
int T_CSRGDeviceGetParms(T_Cmat *Matrix,int *nr, int *nc, int *nz)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
|
||||
if (cMat!=NULL) {
|
||||
*nr = cMat->m ;
|
||||
*nc = cMat->n ;
|
||||
*nz = cMat->nz ;
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
} else {
|
||||
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
|
||||
}
|
||||
}
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
|
||||
int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatType(cMat->descr,type));
|
||||
}
|
||||
|
||||
int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatFillMode(cMat->descr,type));
|
||||
}
|
||||
|
||||
int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatDiagType(cMat->descr,type));
|
||||
}
|
||||
|
||||
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatIndexBase(cMat->descr,type));
|
||||
}
|
||||
|
||||
int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Matrix)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
int rc, buffersize;
|
||||
cusparseHandle_t *my_handle=getHandle();
|
||||
cusparseSolveAnalysisInfo_t info;
|
||||
|
||||
rc= (int) cusparseTcsrsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
cMat->m,cMat->nz,cMat->descr,
|
||||
cMat->val, cMat->irp, cMat->ja,
|
||||
cMat->triang);
|
||||
if (rc !=0) {
|
||||
fprintf(stderr,"From csrsv_analysis: %d\n",rc);
|
||||
}
|
||||
return(rc);
|
||||
}
|
||||
|
||||
#elif CUDA_VERSION < 11030
|
||||
int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatType(cMat->descr,type));
|
||||
}
|
||||
|
||||
int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatFillMode(cMat->descr,type));
|
||||
}
|
||||
|
||||
int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatDiagType(cMat->descr,type));
|
||||
}
|
||||
|
||||
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatIndexBase(cMat->descr,type));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
cusparseFillMode_t mode=type;
|
||||
|
||||
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
|
||||
CUSPARSE_SPMAT_FILL_MODE,
|
||||
(const void*) &mode,
|
||||
sizeof(cusparseFillMode_t)));
|
||||
return(0);
|
||||
}
|
||||
|
||||
int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
|
||||
{
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
cusparseDiagType_t cutype=type;
|
||||
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
|
||||
CUSPARSE_SPMAT_DIAG_TYPE,
|
||||
(const void*) &cutype,
|
||||
sizeof(cusparseDiagType_t)));
|
||||
return(0);
|
||||
}
|
||||
|
||||
int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat)
|
||||
{
|
||||
return(cMat->spmvDescr == NULL);
|
||||
}
|
||||
|
||||
int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat)
|
||||
{
|
||||
return(cMat->svbuffer == NULL);
|
||||
}
|
||||
int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat)
|
||||
{
|
||||
return(cMat->spsvDescr == NULL);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz,
|
||||
int *irp, int *ja, TYPE *val)
|
||||
{
|
||||
int rc;
|
||||
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
cusparseHandle_t *my_handle=getHandle();
|
||||
|
||||
if ((rc=writeRemoteBuffer((void *) irp, (void *) cMat->irp,
|
||||
(m+1)*sizeof(int)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
|
||||
if ((rc=writeRemoteBuffer((void *) ja,(void *) cMat->ja,
|
||||
(nz)*sizeof(int)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
if ((rc=writeRemoteBuffer((void *) val, (void *) cMat->val,
|
||||
(nz)*sizeof(TYPE)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
#if (CUDA_SHORT_VERSION > 10 ) && (CUDA_VERSION < 11030)
|
||||
if (cusparseGetMatType(cMat->descr)== CUSPARSE_MATRIX_TYPE_TRIANGULAR) {
|
||||
// Why do we need to set TYPE_GENERAL??? cuSPARSE can be misterious sometimes.
|
||||
cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
CHECK_CUSPARSE(cusparseTcsrsv2_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
cMat->m,cMat->nz, cMat->descr,
|
||||
cMat->val, cMat->irp, cMat->ja,
|
||||
cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||
cMat->svbuffer));
|
||||
}
|
||||
#else
|
||||
//cusparseSetMatType(*(cMat->spmvDescr),CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
#endif
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
int T_CSRGDevice2Host(T_Cmat *Matrix, int m, int n, int nz,
|
||||
int *irp, int *ja, TYPE *val)
|
||||
{
|
||||
int rc;
|
||||
T_CSRGDeviceMat *cMat = Matrix->mat;
|
||||
|
||||
if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, (m+1)*sizeof(int)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
|
||||
if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, (nz)*sizeof(int)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, (nz)*sizeof(TYPE)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
int T_HYBGDeviceFree(T_Hmat *Matrix)
|
||||
{
|
||||
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||
if (hMat != NULL) {
|
||||
cusparseDestroyMatDescr(hMat->descr);
|
||||
cusparseDestroySolveAnalysisInfo(hMat->triang);
|
||||
cusparseDestroyHybMat(hMat->hybA);
|
||||
free(hMat);
|
||||
}
|
||||
Matrix->mat = NULL;
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
|
||||
TYPE beta, void *deviceY)
|
||||
{
|
||||
T_HYBGDeviceMat *hMat=Matrix->mat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
void *vX, *vY;
|
||||
int r,n,rc;
|
||||
cusparseMatrixType_t type;
|
||||
cusparseHandle_t *my_handle=getHandle();
|
||||
|
||||
/*getAddrMultiVecDevice(deviceX, &vX);
|
||||
getAddrMultiVecDevice(deviceY, &vY); */
|
||||
vX=x->v_;
|
||||
vY=y->v_;
|
||||
|
||||
/* rc = (int) cusparseGetMatType(hMat->descr); */
|
||||
/* fprintf(stderr,"Spmv MatType: %d\n",rc); */
|
||||
/* rc = (int) cusparseGetMatDiagType(hMat->descr); */
|
||||
/* fprintf(stderr,"Spmv DiagType: %d\n",rc); */
|
||||
/* rc = (int) cusparseGetMatFillMode(hMat->descr); */
|
||||
/* fprintf(stderr,"Spmv FillMode: %d\n",rc); */
|
||||
/* Dirty trick: apparently hybmv does not accept a triangular
|
||||
matrix even though it should not make a difference. So
|
||||
we claim it's general anyway */
|
||||
type = cusparseGetMatType(hMat->descr);
|
||||
rc = cusparseSetMatType(hMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
if (rc == 0)
|
||||
rc = (int) cusparseThybmv(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
(const TYPE *) &alpha, hMat->descr, hMat->hybA,
|
||||
(const TYPE *) vX, (const TYPE *) &beta,
|
||||
(TYPE *) vY);
|
||||
if (rc == 0)
|
||||
rc = cusparseSetMatType(hMat->descr,type);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz)
|
||||
{
|
||||
T_HYBGDeviceMat *hMat;
|
||||
int nr1=nr, nz1=nz, rc;
|
||||
if ((nr<0)||(nc<0)||(nz<0))
|
||||
return((int) CUSPARSE_STATUS_INVALID_VALUE);
|
||||
if ((hMat=(T_HYBGDeviceMat *) malloc(sizeof(T_HYBGDeviceMat)))==NULL)
|
||||
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
|
||||
hMat->m = nr;
|
||||
hMat->n = nc;
|
||||
hMat->nz = nz;
|
||||
|
||||
if ((rc= cusparseCreateMatDescr(&(hMat->descr))) !=0)
|
||||
return(rc);
|
||||
if ((rc= cusparseCreateSolveAnalysisInfo(&(hMat->triang))) !=0)
|
||||
return(rc);
|
||||
if((rc = cusparseCreateHybMat(&(hMat->hybA))) != 0)
|
||||
return(rc);
|
||||
Matrix->mat = hMat;
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type)
|
||||
{
|
||||
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatDiagType(hMat->descr,type));
|
||||
}
|
||||
|
||||
int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type)
|
||||
{
|
||||
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatIndexBase(hMat->descr,type));
|
||||
}
|
||||
|
||||
int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type)
|
||||
{
|
||||
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatType(hMat->descr,type));
|
||||
}
|
||||
|
||||
int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type)
|
||||
{
|
||||
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||
return ((int) cusparseSetMatFillMode(hMat->descr,type));
|
||||
}
|
||||
|
||||
int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
|
||||
TYPE beta, void *deviceY)
|
||||
{
|
||||
//beta??
|
||||
T_HYBGDeviceMat *hMat=Matrix->mat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
void *vX, *vY;
|
||||
int r,n;
|
||||
cusparseHandle_t *my_handle=getHandle();
|
||||
/*getAddrMultiVecDevice(deviceX, &vX);
|
||||
getAddrMultiVecDevice(deviceY, &vY); */
|
||||
vX=x->v_;
|
||||
vY=y->v_;
|
||||
|
||||
return cusparseThybsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
(const TYPE *) &alpha, hMat->descr,
|
||||
hMat->hybA, hMat->triang,
|
||||
(const TYPE *) vX, (TYPE *) vY);
|
||||
}
|
||||
|
||||
int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix)
|
||||
{
|
||||
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||
cusparseSolveAnalysisInfo_t info;
|
||||
int rc;
|
||||
cusparseHandle_t *my_handle=getHandle();
|
||||
|
||||
/* rc = (int) cusparseGetMatType(hMat->descr); */
|
||||
/* fprintf(stderr,"Analysis MatType: %d\n",rc); */
|
||||
/* rc = (int) cusparseGetMatDiagType(hMat->descr); */
|
||||
/* fprintf(stderr,"Analysis DiagType: %d\n",rc); */
|
||||
/* rc = (int) cusparseGetMatFillMode(hMat->descr); */
|
||||
/* fprintf(stderr,"Analysis FillMode: %d\n",rc); */
|
||||
rc = (int) cusparseThybsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
hMat->descr, hMat->hybA, hMat->triang);
|
||||
|
||||
if (rc !=0) {
|
||||
fprintf(stderr,"From csrsv_analysis: %d\n",rc);
|
||||
}
|
||||
return(rc);
|
||||
}
|
||||
|
||||
int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz,
|
||||
int *irp, int *ja, TYPE *val)
|
||||
{
|
||||
int rc; double t1,t2;
|
||||
int nr1=m, nz1=nz;
|
||||
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||
cusparseHandle_t *my_handle=getHandle();
|
||||
|
||||
if (nr1 == 0) nr1 = 1;
|
||||
if (nz1 == 0) nz1 = 1;
|
||||
if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), ((nr1+1)*sizeof(int)))) != 0)
|
||||
return(rc);
|
||||
if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), ((nz1)*sizeof(int)))) != 0)
|
||||
return(rc);
|
||||
if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), ((nz1)*sizeof(TYPE)))) != 0)
|
||||
return(rc);
|
||||
|
||||
if ((rc=writeRemoteBuffer((void *) irp, (void *) hMat->irp,
|
||||
(m+1)*sizeof(int)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
|
||||
if ((rc=writeRemoteBuffer((void *) ja,(void *) hMat->ja,
|
||||
(nz)*sizeof(int)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
if ((rc=writeRemoteBuffer((void *) val, (void *) hMat->val,
|
||||
(nz)*sizeof(TYPE)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
/* rc = (int) cusparseGetMatType(hMat->descr); */
|
||||
/* fprintf(stderr,"Conversion MatType: %d\n",rc); */
|
||||
/* rc = (int) cusparseGetMatDiagType(hMat->descr); */
|
||||
/* fprintf(stderr,"Conversion DiagType: %d\n",rc); */
|
||||
/* rc = (int) cusparseGetMatFillMode(hMat->descr); */
|
||||
/* fprintf(stderr,"Conversion FillMode: %d\n",rc); */
|
||||
//t1=etime();
|
||||
rc = (int) cusparseTcsr2hyb(*my_handle, m, n,
|
||||
hMat->descr,
|
||||
(const TYPE *)hMat->val,
|
||||
(const int *)hMat->irp, (const int *)hMat->ja,
|
||||
hMat->hybA,0,
|
||||
CUSPARSE_HYB_PARTITION_AUTO);
|
||||
|
||||
freeRemoteBuffer(hMat->irp); hMat->irp = NULL;
|
||||
freeRemoteBuffer(hMat->ja); hMat->ja = NULL;
|
||||
freeRemoteBuffer(hMat->val); hMat->val = NULL;
|
||||
|
||||
//cudaSync();
|
||||
//t2 = etime();
|
||||
//fprintf(stderr,"Inner call to cusparseTcsr2hyb: %lf\n",(t2-t1));
|
||||
if (rc != 0) {
|
||||
fprintf(stderr,"From csr2hyb: %d\n",rc);
|
||||
}
|
||||
return(rc);
|
||||
}
|
||||
#endif
|
||||
|
@ -0,0 +1,386 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
#include "hdiagdev.h"
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#define DEBUG 0
|
||||
void freeHdiagDevice(void* remoteMatrix)
|
||||
{
|
||||
struct HdiagDevice *devMat = (struct HdiagDevice *) remoteMatrix;
|
||||
//fprintf(stderr,"freeHllDevice\n");
|
||||
if (devMat != NULL) {
|
||||
freeRemoteBuffer(devMat->hackOffsets);
|
||||
freeRemoteBuffer(devMat->cM);
|
||||
free(remoteMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns,
|
||||
unsigned int allocationHeight, unsigned int hackSize,
|
||||
unsigned int hackCount, unsigned int elementType)
|
||||
{
|
||||
HdiagDeviceParams params;
|
||||
|
||||
params.elementType = elementType;
|
||||
//numero di elementi di val
|
||||
params.rows = rows;
|
||||
params.columns = columns;
|
||||
params.allocationHeight = allocationHeight;
|
||||
params.hackSize = hackSize;
|
||||
params.hackCount = hackCount;
|
||||
|
||||
return params;
|
||||
|
||||
}
|
||||
|
||||
int allocHdiagDevice(void **remoteMatrix, HdiagDeviceParams* params)
|
||||
{
|
||||
struct HdiagDevice *tmp = (struct HdiagDevice *)malloc(sizeof(struct HdiagDevice));
|
||||
int ret=SPGPU_SUCCESS;
|
||||
int *tmpOff = NULL;
|
||||
|
||||
*remoteMatrix = (void *) tmp;
|
||||
#if DEBUG
|
||||
fprintf(stderr,"From alloc: %p\n",*remoteMatrix);
|
||||
#endif
|
||||
|
||||
tmp->rows = params->rows;
|
||||
|
||||
tmp->hackSize = params->hackSize;
|
||||
|
||||
tmp->cols = params->columns;
|
||||
|
||||
tmp->allocationHeight = params->allocationHeight;
|
||||
|
||||
tmp->hackCount = params->hackCount;
|
||||
|
||||
|
||||
|
||||
#if DEBUG
|
||||
fprintf(stderr,"hackcount %d allocationHeight %d\n",tmp->hackCount,tmp->allocationHeight);
|
||||
#endif
|
||||
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->hackOffsets), (tmp->hackCount+1)*sizeof(int));
|
||||
|
||||
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets), tmp->allocationHeight*sizeof(int));
|
||||
|
||||
/* tmp->baseIndex = params->firstIndex; */
|
||||
|
||||
if (params->elementType == SPGPU_TYPE_INT)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(int));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_FLOAT)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(float));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(double));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuFloatComplex));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuDoubleComplex));
|
||||
}
|
||||
else
|
||||
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||
return ret;
|
||||
}
|
||||
|
||||
int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols,
|
||||
unsigned int allocationHeight, unsigned int hackSize,
|
||||
unsigned int hackCount, unsigned int elementType)
|
||||
{ int i=0;
|
||||
HdiagDeviceParams p;
|
||||
|
||||
p = getHdiagDeviceParams(rows, cols, allocationHeight, hackSize, hackCount,elementType);
|
||||
|
||||
i = allocHdiagDevice(deviceMat, &p);
|
||||
#if DEBUG
|
||||
fprintf(stderr," Falloc %p \n",*deviceMat);
|
||||
#endif
|
||||
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
|
||||
}
|
||||
return(i);
|
||||
|
||||
}
|
||||
|
||||
int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets)
|
||||
{ int i=0,fo,fa,j,k,p;
|
||||
char buf_a[255], buf_o[255],tmp[255];
|
||||
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||
|
||||
i=SPGPU_SUCCESS;
|
||||
|
||||
|
||||
#if DEBUG
|
||||
fprintf(stderr," Write %p \n",devMat);
|
||||
|
||||
fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n",
|
||||
devMat->allocationHeight,devMat->hackCount);
|
||||
fprintf(stderr,"HackOffsets: ");
|
||||
for (j=0; j<devMat->hackCount+1; j++)
|
||||
fprintf(stderr," %d",hackOffsets[j]);
|
||||
fprintf(stderr,"\n");
|
||||
fprintf(stderr,"diaOffsets: ");
|
||||
for (j=0; j<devMat->allocationHeight; j++)
|
||||
fprintf(stderr," %d",hdiaOffsets[j]);
|
||||
fprintf(stderr,"\n");
|
||||
#if 1
|
||||
fprintf(stderr,"values: \n");
|
||||
p=0;
|
||||
for (j=0; j<devMat->hackCount; j++){
|
||||
fprintf(stderr,"Hack no: %d\n",j+1);
|
||||
for (k=0; k<devMat->hackSize*(devMat->allocationHeight/devMat->hackCount); k++){
|
||||
fprintf(stderr," %d %lf\n",p+1,val[p]); p++;
|
||||
}
|
||||
}
|
||||
fprintf(stderr,"\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
if(i== SPGPU_SUCCESS)
|
||||
i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets,
|
||||
(devMat->hackCount+1)*sizeof(int));
|
||||
|
||||
if(i== SPGPU_SUCCESS)
|
||||
i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets,
|
||||
devMat->allocationHeight*sizeof(int));
|
||||
if(i== SPGPU_SUCCESS)
|
||||
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
|
||||
devMat->allocationHeight*devMat->hackSize*sizeof(double));
|
||||
if (i!=0)
|
||||
fprintf(stderr,"Error in writeHdiagDeviceDouble %d\n",i);
|
||||
|
||||
#if DEBUG
|
||||
fprintf(stderr," EndWrite %p \n",devMat);
|
||||
#endif
|
||||
|
||||
if(i==0)
|
||||
return SPGPU_SUCCESS;
|
||||
else
|
||||
return SPGPU_UNSUPPORTED;
|
||||
}
|
||||
|
||||
|
||||
|
||||
long long int sizeofHdiagDeviceDouble(void* deviceMat)
|
||||
{ int i=0,fo,fa;
|
||||
int *hoff=NULL,*hackoff=NULL;
|
||||
long long int memsize=0;
|
||||
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||
|
||||
|
||||
memsize += (devMat->hackCount+1)*sizeof(int);
|
||||
memsize += devMat->allocationHeight*sizeof(int);
|
||||
memsize += devMat->allocationHeight*devMat->hackSize*sizeof(double);
|
||||
return(memsize);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int readHdiagDeviceDouble(void* deviceMat, double* a, int* off)
|
||||
{ int i;
|
||||
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||
/* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); */
|
||||
/* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */
|
||||
|
||||
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||
double beta, void* deviceY)
|
||||
{
|
||||
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
#ifdef VERBOSE
|
||||
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||
#endif
|
||||
#if DEBUG
|
||||
fprintf(stderr," First %p \n",devMat);
|
||||
fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize,
|
||||
devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM);
|
||||
#endif
|
||||
spgpuDhdiaspmv (handle, (double*)y->v_, (double *)y->v_, alpha,
|
||||
(double *)devMat->cM,devMat->hdiaOffsets,
|
||||
devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols,
|
||||
x->v_, beta);
|
||||
|
||||
//cudaSync();
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets)
|
||||
{ int i=0,fo,fa,j,k,p;
|
||||
char buf_a[255], buf_o[255],tmp[255];
|
||||
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||
|
||||
i=SPGPU_SUCCESS;
|
||||
|
||||
|
||||
#if DEBUG
|
||||
fprintf(stderr," Write %p \n",devMat);
|
||||
|
||||
fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n",
|
||||
devMat->allocationHeight,devMat->hackCount);
|
||||
fprintf(stderr,"HackOffsets: ");
|
||||
for (j=0; j<devMat->hackCount+1; j++)
|
||||
fprintf(stderr," %d",hackOffsets[j]);
|
||||
fprintf(stderr,"\n");
|
||||
fprintf(stderr,"diaOffsets: ");
|
||||
for (j=0; j<devMat->allocationHeight; j++)
|
||||
fprintf(stderr," %d",hdiaOffsets[j]);
|
||||
fprintf(stderr,"\n");
|
||||
#if 1
|
||||
fprintf(stderr,"values: \n");
|
||||
p=0;
|
||||
for (j=0; j<devMat->hackCount; j++){
|
||||
fprintf(stderr,"Hack no: %d\n",j+1);
|
||||
for (k=0; k<devMat->hackSize*(devMat->allocationHeight/devMat->hackCount); k++){
|
||||
fprintf(stderr," %d %lf\n",p+1,val[p]); p++;
|
||||
}
|
||||
}
|
||||
fprintf(stderr,"\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
if(i== SPGPU_SUCCESS)
|
||||
i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets,
|
||||
(devMat->hackCount+1)*sizeof(int));
|
||||
|
||||
if(i== SPGPU_SUCCESS)
|
||||
i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets,
|
||||
devMat->allocationHeight*sizeof(int));
|
||||
if(i== SPGPU_SUCCESS)
|
||||
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
|
||||
devMat->allocationHeight*devMat->hackSize*sizeof(float));
|
||||
if (i!=0)
|
||||
fprintf(stderr,"Error in writeHdiagDeviceFloat %d\n",i);
|
||||
|
||||
#if DEBUG
|
||||
fprintf(stderr," EndWrite %p \n",devMat);
|
||||
#endif
|
||||
|
||||
if(i==0)
|
||||
return SPGPU_SUCCESS;
|
||||
else
|
||||
return SPGPU_UNSUPPORTED;
|
||||
}
|
||||
|
||||
|
||||
|
||||
long long int sizeofHdiagDeviceFloat(void* deviceMat)
|
||||
{ int i=0,fo,fa;
|
||||
int *hoff=NULL,*hackoff=NULL;
|
||||
long long int memsize=0;
|
||||
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||
|
||||
|
||||
memsize += (devMat->hackCount+1)*sizeof(int);
|
||||
memsize += devMat->allocationHeight*sizeof(int);
|
||||
memsize += devMat->allocationHeight*devMat->hackSize*sizeof(float);
|
||||
|
||||
return(memsize);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int readHdiagDeviceFloat(void* deviceMat, float* a, int* off)
|
||||
{ int i;
|
||||
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||
/* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); */
|
||||
/* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */
|
||||
|
||||
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||
float beta, void* deviceY)
|
||||
{
|
||||
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
#ifdef VERBOSE
|
||||
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||
#endif
|
||||
#if DEBUG
|
||||
fprintf(stderr," First %p \n",devMat);
|
||||
fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize,
|
||||
devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM);
|
||||
#endif
|
||||
spgpuShdiaspmv (handle, (float*)y->v_, (float *)y->v_, alpha,
|
||||
(float *)devMat->cM,devMat->hdiaOffsets,
|
||||
devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols,
|
||||
x->v_, beta);
|
||||
|
||||
//cudaSync();
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
@ -0,0 +1,106 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
#ifndef _HDIAGDEV_H_
|
||||
#define _HDIAGDEV_H_
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "hdia.h"
|
||||
|
||||
struct HdiagDevice
|
||||
{
|
||||
// Compressed matrix
|
||||
void *cM; //it can be float or double
|
||||
|
||||
// offset (same size of cM)
|
||||
int *hdiaOffsets;
|
||||
|
||||
int *hackOffsets;
|
||||
|
||||
int hackCount;
|
||||
|
||||
int rows;
|
||||
|
||||
int cols;
|
||||
|
||||
|
||||
int hackSize;
|
||||
|
||||
int allocationHeight;
|
||||
|
||||
};
|
||||
|
||||
typedef struct HdiagDeviceParams
|
||||
{
|
||||
|
||||
unsigned int elementType;
|
||||
|
||||
// Number of rows.
|
||||
// Used to allocate rS array
|
||||
unsigned int rows;
|
||||
//unsigned int hackOffsLength;
|
||||
|
||||
// Number of columns.
|
||||
// Used for error-checking
|
||||
unsigned int columns;
|
||||
|
||||
unsigned int hackSize;
|
||||
unsigned int hackCount;
|
||||
unsigned int allocationHeight;
|
||||
|
||||
|
||||
} HdiagDeviceParams;
|
||||
|
||||
|
||||
|
||||
HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns,
|
||||
unsigned int allocationHeight, unsigned int hackSize,
|
||||
unsigned int hackCount, unsigned int elementType);
|
||||
|
||||
int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols,
|
||||
unsigned int allocationHeight, unsigned int hackSize,
|
||||
unsigned int hackCount, unsigned int elementType);
|
||||
|
||||
int allocHdiagDevice(void ** remoteMatrix, HdiagDeviceParams* params);
|
||||
|
||||
|
||||
void freeHdiagDevice(void* remoteMatrix);
|
||||
|
||||
int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets);
|
||||
int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||
float beta, void* deviceY);
|
||||
|
||||
int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets);
|
||||
int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||
double beta, void* deviceY);
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,199 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
module hdiagdev_mod
|
||||
use iso_c_binding
|
||||
use core_mod
|
||||
|
||||
type, bind(c) :: hdiagdev_parms
|
||||
integer(c_int) :: element_type
|
||||
integer(c_int) :: rows
|
||||
integer(c_int) :: columns
|
||||
integer(c_int) :: hackSize
|
||||
integer(c_int) :: hackCount
|
||||
integer(c_int) :: allocationHeight
|
||||
end type hdiagdev_parms
|
||||
|
||||
! interface computeHdiaHacksCount
|
||||
! function computeHdiaHacksCountDouble(allocationHeight,hackOffsets,hackSize, &
|
||||
! & diaValues,diaValuesPitch,diags,rows)&
|
||||
! & result(res) bind(c,name='computeHdiaHackOffsetsDouble')
|
||||
! use iso_c_binding
|
||||
! integer(c_int) :: res
|
||||
! integer(c_int), value :: rows,diags,diaValuesPitch,hackSize,elementType
|
||||
! real(c_double) :: diaValues(rows,:)
|
||||
! integer(c_int) :: hackOffsets,allocationHeight
|
||||
! end function computeHdiaHacksCountDouble
|
||||
! end interface computeHdiaHacksCount
|
||||
|
||||
interface
|
||||
function FgetHdiagDeviceParams(rows, columns, allocationHeight,hackSize, &
|
||||
& hackCount, elementType) &
|
||||
& result(res) bind(c,name='getHdiagDeviceParams')
|
||||
use iso_c_binding
|
||||
import :: hdiagdev_parms
|
||||
type(hdiagdev_parms) :: res
|
||||
integer(c_int), value :: rows,columns,allocationHeight,&
|
||||
& elementType,hackSize,hackCount
|
||||
end function FgetHdiagDeviceParams
|
||||
end interface
|
||||
|
||||
|
||||
interface
|
||||
function FallocHdiagDevice(deviceMat,rows,columns,allocationHeight,&
|
||||
& hackSize,hackCount,elementType) &
|
||||
& result(res) bind(c,name='FallocHdiagDevice')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: rows,columns,allocationHeight,hackSize,&
|
||||
& hackCount,elementType
|
||||
type(c_ptr) :: deviceMat
|
||||
end function FallocHdiagDevice
|
||||
end interface
|
||||
|
||||
|
||||
interface
|
||||
function sizeofHdiagDeviceDouble(deviceMat) &
|
||||
& result(res) bind(c,name='sizeofHdiagDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_long_long) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
end function sizeofHdiagDeviceDouble
|
||||
end interface
|
||||
|
||||
interface writeHdiagDevice
|
||||
|
||||
function writeHdiagDeviceFloat(deviceMat,val,hdiaOffsets, hackOffsets) &
|
||||
& result(res) bind(c,name='writeHdiagDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_float) :: val(*)
|
||||
integer(c_int) :: hdiaOffsets(*), hackOffsets(*)
|
||||
end function writeHdiagDeviceFloat
|
||||
|
||||
function writeHdiagDeviceDouble(deviceMat,val,hdiaOffsets, hackOffsets) &
|
||||
& result(res) bind(c,name='writeHdiagDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_double) :: val(*)
|
||||
integer(c_int) :: hdiaOffsets(*), hackOffsets(*)
|
||||
end function writeHdiagDeviceDouble
|
||||
|
||||
end interface writeHdiagDevice
|
||||
|
||||
!!$ interface readHdiagDevice
|
||||
!!$
|
||||
!!$ function readHdiagDeviceFloat(deviceMat,val,ja,ldj,irn) &
|
||||
!!$ & result(res) bind(c,name='readHdiagDeviceFloat')
|
||||
!!$ use iso_c_binding
|
||||
!!$ integer(c_int) :: res
|
||||
!!$ type(c_ptr), value :: deviceMat
|
||||
!!$ integer(c_int), value :: ldj
|
||||
!!$ real(c_float) :: val(ldj,*)
|
||||
!!$ integer(c_int) :: ja(ldj,*),irn(*)
|
||||
!!$ end function readHdiagDeviceFloat
|
||||
!!$
|
||||
!!$ function readHdiagDeviceDouble(deviceMat,a,off,n) &
|
||||
!!$ & result(res) bind(c,name='readHdiagDeviceDouble')
|
||||
!!$ use iso_c_binding
|
||||
!!$ integer(c_int) :: res
|
||||
!!$ type(c_ptr), value :: deviceMat
|
||||
!!$ integer(c_int),value :: n
|
||||
!!$ real(c_double) :: a(n,*)
|
||||
!!$ integer(c_int) :: off(*)
|
||||
!!$ end function readHdiagDeviceDouble
|
||||
!!$
|
||||
!!$ function readHdiagDeviceFloatComplex(deviceMat,val,ja,ldj,irn) &
|
||||
!!$ & result(res) bind(c,name='readHdiagDeviceFloatComplex')
|
||||
!!$ use iso_c_binding
|
||||
!!$ integer(c_int) :: res
|
||||
!!$ type(c_ptr), value :: deviceMat
|
||||
!!$ integer(c_int), value :: ldj
|
||||
!!$ complex(c_float_complex) :: val(ldj,*)
|
||||
!!$ integer(c_int) :: ja(ldj,*),irn(*)
|
||||
!!$ end function readHdiagDeviceFloatComplex
|
||||
!!$
|
||||
!!$ function readHdiagDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) &
|
||||
!!$ & result(res) bind(c,name='readHdiagDeviceDoubleComplex')
|
||||
!!$ use iso_c_binding
|
||||
!!$ integer(c_int) :: res
|
||||
!!$ type(c_ptr), value :: deviceMat
|
||||
!!$ integer(c_int), value :: ldj
|
||||
!!$ complex(c_double_complex) :: val(ldj,*)
|
||||
!!$ integer(c_int) :: ja(ldj,*),irn(*)
|
||||
!!$ end function readHdiagDeviceDoubleComplex
|
||||
!!$
|
||||
!!$ end interface readHdiagDevice
|
||||
!!$
|
||||
interface
|
||||
subroutine freeHdiagDevice(deviceMat) &
|
||||
& bind(c,name='freeHdiagDevice')
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
end subroutine freeHdiagDevice
|
||||
end interface
|
||||
|
||||
|
||||
interface spmvHdiagDevice
|
||||
function spmvHdiagDeviceFloat(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvHdiagDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
real(c_float),value :: alpha, beta
|
||||
end function spmvHdiagDeviceFloat
|
||||
function spmvHdiagDeviceDouble(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvHdiagDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
real(c_double),value :: alpha, beta
|
||||
end function spmvHdiagDeviceDouble
|
||||
!!$ function spmvHdiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
|
||||
!!$ & result(res) bind(c,name='spmvHdiagDeviceFloatComplex')
|
||||
!!$ use iso_c_binding
|
||||
!!$ integer(c_int) :: res
|
||||
!!$ type(c_ptr), value :: deviceMat, x, y
|
||||
!!$ complex(c_float_complex),value :: alpha, beta
|
||||
!!$ end function spmvHdiagDeviceFloatComplex
|
||||
!!$ function spmvHdiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
|
||||
!!$ & result(res) bind(c,name='spmvHdiagDeviceDoubleComplex')
|
||||
!!$ use iso_c_binding
|
||||
!!$ integer(c_int) :: res
|
||||
!!$ type(c_ptr), value :: deviceMat, x, y
|
||||
!!$ complex(c_double_complex),value :: alpha, beta
|
||||
!!$ end function spmvHdiagDeviceDoubleComplex
|
||||
end interface spmvHdiagDevice
|
||||
|
||||
end module hdiagdev_mod
|
@ -0,0 +1,540 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
#include "hlldev.h"
|
||||
//new
|
||||
HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros,
|
||||
unsigned int allocsize, unsigned int elementType, unsigned int firstIndex)
|
||||
{
|
||||
HllDeviceParams params;
|
||||
|
||||
params.elementType = elementType;
|
||||
params.hackSize = hksize;
|
||||
//numero di elementi di val
|
||||
params.allocsize = allocsize;
|
||||
params.rows = rows;
|
||||
params.nzt = nzeros;
|
||||
params.avgNzr = (nzeros+rows-1)/rows;
|
||||
params.firstIndex = firstIndex;
|
||||
return params;
|
||||
|
||||
}
|
||||
|
||||
int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros,
|
||||
int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr)
|
||||
{
|
||||
|
||||
|
||||
if (mat!=NULL) {
|
||||
*hackOffsLength = mat->hackOffsLength ;
|
||||
*hksize = mat->hackSize ;
|
||||
*nzeros = mat->nzt ;
|
||||
*allocsize = mat->allocsize ;
|
||||
*rows = mat->rows ;
|
||||
*avgnzr = mat->avgNzr ;
|
||||
*firstIndex = mat->baseIndex ;
|
||||
return SPGPU_SUCCESS;
|
||||
} else {
|
||||
return SPGPU_UNSUPPORTED;
|
||||
}
|
||||
}
|
||||
//new
|
||||
int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params)
|
||||
{
|
||||
HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice));
|
||||
int ret=SPGPU_SUCCESS;
|
||||
*remoteMatrix = (void *)tmp;
|
||||
|
||||
tmp->hackSize = params->hackSize;
|
||||
|
||||
tmp->allocsize = params->allocsize;
|
||||
|
||||
tmp->rows = params->rows;
|
||||
tmp->avgNzr = params->avgNzr;
|
||||
tmp->nzt = params->nzt;
|
||||
tmp->baseIndex = params->firstIndex;
|
||||
//fprintf(stderr,"Allocating HLG with %d avgNzr\n",params->avgNzr);
|
||||
tmp->hackOffsLength = (int)(tmp->rows+tmp->hackSize-1)/tmp->hackSize;
|
||||
|
||||
//printf("hackOffsLength %d\n",tmp->hackOffsLength);
|
||||
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int));
|
||||
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int));
|
||||
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int));
|
||||
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->hackOffs), ((tmp->hackOffsLength+1)*sizeof(int)));
|
||||
|
||||
if (params->elementType == SPGPU_TYPE_INT)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(int));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_FLOAT)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
|
||||
}
|
||||
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||
{
|
||||
if (ret == SPGPU_SUCCESS)
|
||||
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
|
||||
}
|
||||
else
|
||||
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||
return ret;
|
||||
}
|
||||
|
||||
void freeHllDevice(void* remoteMatrix)
|
||||
{
|
||||
HllDevice *devMat = (HllDevice *) remoteMatrix;
|
||||
//fprintf(stderr,"freeHllDevice\n");
|
||||
if (devMat != NULL) {
|
||||
freeRemoteBuffer(devMat->rS);
|
||||
freeRemoteBuffer(devMat->diag);
|
||||
freeRemoteBuffer(devMat->rP);
|
||||
freeRemoteBuffer(devMat->cM);
|
||||
free(remoteMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
//new
|
||||
int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros,
|
||||
unsigned int allocsize,
|
||||
unsigned int elementType, unsigned int firstIndex)
|
||||
{ int i;
|
||||
HllDeviceParams p;
|
||||
|
||||
p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex);
|
||||
i = allocHllDevice(deviceMat, &p);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
|
||||
int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||
float beta, void* deviceY)
|
||||
{
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
#ifdef VERBOSE
|
||||
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||
#endif
|
||||
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
|
||||
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
|
||||
devMat->baseIndex);*/
|
||||
|
||||
spgpuShellspmv (handle, (float *)y->v_, (float *)y->v_, alpha, (float *)devMat->cM,
|
||||
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||||
devMat->avgNzr, devMat->rows, (float *)x->v_, beta, devMat->baseIndex);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
//new
|
||||
int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||
double beta, void* deviceY)
|
||||
{
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
#ifdef VERBOSE
|
||||
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||
#endif
|
||||
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
|
||||
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
|
||||
devMat->baseIndex);*/
|
||||
|
||||
spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM,
|
||||
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||||
devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex);
|
||||
//cudaSync();
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
|
||||
float complex beta, void* deviceY)
|
||||
{
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
|
||||
cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta));
|
||||
#ifdef VERBOSE
|
||||
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||
#endif
|
||||
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
|
||||
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
|
||||
devMat->baseIndex);*/
|
||||
|
||||
spgpuChellspmv (handle, (cuFloatComplex *)y->v_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM,
|
||||
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||||
devMat->avgNzr, devMat->rows, (cuFloatComplex *)x->v_, b, devMat->baseIndex);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
|
||||
double complex beta, void* deviceY)
|
||||
{
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
|
||||
cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
|
||||
cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta));
|
||||
#ifdef VERBOSE
|
||||
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||
#endif
|
||||
|
||||
spgpuZhellspmv (handle, (cuDoubleComplex *)y->v_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM,
|
||||
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||||
devMat->avgNzr,devMat->rows, (cuDoubleComplex *)x->v_, b, devMat->baseIndex);
|
||||
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||
{ int i;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
// Ex updateFromHost function
|
||||
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||||
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
//i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||
{ int i;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
// Ex updateFromHost function
|
||||
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||||
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||
{ int i;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
// Ex updateFromHost function
|
||||
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
||||
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||
{ int i;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
// Ex updateFromHost function
|
||||
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
||||
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||
{ int i;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||||
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
/*i = readEllDevice(deviceMat, (void *) val, ja, irn);
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||
{ int i;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||||
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||
{ int i;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
||||
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||
{ int i;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
||||
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
/*if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||
}*/
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
// New copy routines.
|
||||
|
||||
int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||
int *irn, int *hoffs, int *idisp, int *ja,
|
||||
float *val, void *deviceMat)
|
||||
{ int i,j;
|
||||
spgpuHandle_t handle;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
float *devVal;
|
||||
int *devIdisp, *devJa;
|
||||
int *tja;
|
||||
//fprintf(stderr,"devMat: %p\n",devMat);
|
||||
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float));
|
||||
|
||||
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||||
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float));
|
||||
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||
//cudaSync();
|
||||
|
||||
handle = psb_cudaGetHandle();
|
||||
psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||||
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||||
devIdisp,devJa,devVal,
|
||||
(int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM);
|
||||
|
||||
freeRemoteBuffer(devIdisp);
|
||||
freeRemoteBuffer(devJa);
|
||||
freeRemoteBuffer(devVal);
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloat",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||
int *irn, int *hoffs, int *idisp, int *ja,
|
||||
double *val, void *deviceMat)
|
||||
{ int i,j;
|
||||
spgpuHandle_t handle;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
double *devVal;
|
||||
int *devIdisp, *devJa;
|
||||
int *tja;
|
||||
//fprintf(stderr,"devMat: %p\n",devMat);
|
||||
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double));
|
||||
|
||||
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||||
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double));
|
||||
//fprintf(stderr,"WriteRemoteBuffer val %d\n",i);
|
||||
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||
//fprintf(stderr,"WriteRemoteBuffer ja %d\n",i);
|
||||
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||
//fprintf(stderr,"WriteRemoteBuffer irn %d\n",i);
|
||||
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
//fprintf(stderr,"WriteRemoteBuffer hoffs %d\n",i);
|
||||
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||
//fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i);
|
||||
//cudaSync();
|
||||
//fprintf(stderr," hacksz: %d \n",hacksz);
|
||||
handle = psb_cudaGetHandle();
|
||||
psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||||
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||||
devIdisp,devJa,devVal,
|
||||
(int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM);
|
||||
|
||||
freeRemoteBuffer(devIdisp);
|
||||
freeRemoteBuffer(devJa);
|
||||
freeRemoteBuffer(devVal);
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDouble",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||
int *irn, int *hoffs, int *idisp, int *ja,
|
||||
float complex *val, void *deviceMat)
|
||||
{ int i,j;
|
||||
spgpuHandle_t handle;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
float complex *devVal;
|
||||
int *devIdisp, *devJa;
|
||||
int *tja;
|
||||
//fprintf(stderr,"devMat: %p\n",devMat);
|
||||
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex));
|
||||
|
||||
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||||
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex));
|
||||
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||
//cudaSync();
|
||||
|
||||
handle = psb_cudaGetHandle();
|
||||
psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||||
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||||
devIdisp,devJa,devVal,
|
||||
(int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM);
|
||||
|
||||
freeRemoteBuffer(devIdisp);
|
||||
freeRemoteBuffer(devJa);
|
||||
freeRemoteBuffer(devVal);
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloatComplex",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
||||
|
||||
int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||
int *irn, int *hoffs, int *idisp, int *ja,
|
||||
double complex *val, void *deviceMat)
|
||||
{ int i,j;
|
||||
spgpuHandle_t handle;
|
||||
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||
double complex *devVal;
|
||||
int *devIdisp, *devJa;
|
||||
int *tja;
|
||||
//fprintf(stderr,"devMat: %p\n",devMat);
|
||||
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex));
|
||||
|
||||
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||||
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex));
|
||||
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||
//cudaSync();
|
||||
|
||||
handle = psb_cudaGetHandle();
|
||||
psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||||
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||||
devIdisp,devJa,devVal,
|
||||
(int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM);
|
||||
|
||||
freeRemoteBuffer(devIdisp);
|
||||
freeRemoteBuffer(devJa);
|
||||
freeRemoteBuffer(devVal);
|
||||
|
||||
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||
if (i != 0) {
|
||||
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDoubleComplex",i);
|
||||
}
|
||||
return SPGPU_SUCCESS;
|
||||
}
|
@ -0,0 +1,156 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
#ifndef _HLLDEV_H_
|
||||
#define _HLLDEV_H_
|
||||
|
||||
#include "cintrf.h"
|
||||
#include "hell.h"
|
||||
|
||||
|
||||
typedef struct hlldevice
|
||||
{
|
||||
// Compressed matrix
|
||||
void *cM; //it can be float or double
|
||||
|
||||
// row pointers (same size of cM)
|
||||
int *rP;
|
||||
|
||||
// row size and diagonal position
|
||||
int *rS;
|
||||
int *diag;
|
||||
|
||||
int *hackOffs;
|
||||
|
||||
int rows;
|
||||
int avgNzr;
|
||||
int hackOffsLength;
|
||||
int nzt;
|
||||
int hackSize; //must be multiple of 32
|
||||
|
||||
//matrix size (uncompressed)
|
||||
//int rows;
|
||||
//int columns;
|
||||
|
||||
//allocation size
|
||||
int allocsize;
|
||||
|
||||
/*(i.e. 0 for C, 1 for Fortran)*/
|
||||
int baseIndex;
|
||||
} HllDevice;
|
||||
|
||||
typedef struct hlldeviceparams
|
||||
{
|
||||
|
||||
unsigned int elementType;
|
||||
|
||||
unsigned int hackSize;
|
||||
|
||||
// Number of rows.
|
||||
// Used to allocate rS array
|
||||
unsigned int rows;
|
||||
unsigned int avgNzr;
|
||||
unsigned int nzt;
|
||||
//unsigned int hackOffsLength;
|
||||
|
||||
// Number of columns.
|
||||
// Used for error-checking
|
||||
// unsigned int columns;
|
||||
|
||||
unsigned int allocsize;
|
||||
|
||||
// First index (e.g 0 or 1)
|
||||
unsigned int firstIndex;
|
||||
|
||||
} HllDeviceParams;
|
||||
|
||||
|
||||
HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros,
|
||||
unsigned int allocsize,
|
||||
unsigned int elementType, unsigned int firstIndex);
|
||||
int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros,
|
||||
int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr);
|
||||
int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros,
|
||||
unsigned int allocsize, unsigned int elementType, unsigned int firstIndex);
|
||||
int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params);
|
||||
void freeHllDevice(void* remoteMatrix);
|
||||
int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag);
|
||||
int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag);
|
||||
int writeHllDeviceFloatComplex(void* deviceMat, float complex* val,
|
||||
int* ja, int *hkoffs, int* irn, int *idiag);
|
||||
int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val,
|
||||
int* ja, int *hkoffs, int* irn, int *idiag);
|
||||
int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag);
|
||||
int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag);
|
||||
int readHllDeviceFloatComplex(void* deviceMat, float complex* val,
|
||||
int* ja, int *hkoffs, int* irn, int *idiag);
|
||||
int readHllDeviceDoubleComplex(void* deviceMat, double complex* val,
|
||||
int* ja, int *hkoffs, int* irn, int *idiag);
|
||||
|
||||
|
||||
int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||
int *irn, int *hoffs, int *idisp, int *ja,
|
||||
float *val, void *deviceMat);
|
||||
int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||
int *irn, int *hoffs, int *idisp, int *ja,
|
||||
double *val, void *deviceMat);
|
||||
int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz,
|
||||
int noffs, int isz, int *irn,
|
||||
int *hoffs, int *idisp, int *ja,
|
||||
float complex *val, void *deviceMat);
|
||||
int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz,
|
||||
int noffs, int isz, int *irn,
|
||||
int *hoffs, int *idisp, int *ja,
|
||||
double complex *val, void *deviceMat);
|
||||
|
||||
int psi_cuda_s_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
|
||||
int baseIdx, int hacksz, int noffs, int isz,
|
||||
int *irn, int *hoffs, int *idisp,
|
||||
int *ja, float *val,
|
||||
int *idiag, int *rP, float *cM);
|
||||
int psi_cuda_d_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
|
||||
int baseIdx, int hacksz, int noffs, int isz,
|
||||
int *irn, int *hoffs, int *idisp,
|
||||
int *ja, double *val,
|
||||
int *idiag, int *rP, double *cM);
|
||||
int psi_cuda_c_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
|
||||
int baseIdx, int hacksz, int noffs, int isz,
|
||||
int *irn, int *hoffs, int *idisp,
|
||||
int *ja, float complex *val,
|
||||
int *idiag, int *rP, float complex *cM);
|
||||
int psi_cuda_z_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
|
||||
int baseIdx, int hacksz, int noffs, int isz,
|
||||
int *irn, int *hoffs, int *idisp,
|
||||
int *ja, double complex *val,
|
||||
int *idiag, int *rP, double complex *cM);
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,268 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
module hlldev_mod
|
||||
use iso_c_binding
|
||||
use core_mod
|
||||
|
||||
type, bind(c) :: hlldev_parms
|
||||
integer(c_int) :: element_type
|
||||
integer(c_int) :: hackSize
|
||||
integer(c_int) :: rows
|
||||
integer(c_int) :: avgNzr
|
||||
integer(c_int) :: allocsize
|
||||
integer(c_int) :: firstIndex
|
||||
end type hlldev_parms
|
||||
|
||||
interface
|
||||
function bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex) &
|
||||
& result(res) bind(c,name='bldHllDeviceParams')
|
||||
use iso_c_binding
|
||||
import :: hlldev_parms
|
||||
type(hlldev_parms) :: res
|
||||
integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex
|
||||
end function BldHllDeviceParams
|
||||
end interface
|
||||
|
||||
interface
|
||||
function getHllDeviceParams(deviceMat,hksize, rows, nzeros, allocsize,&
|
||||
& hackOffsLength, firstIndex,avgnzr) &
|
||||
& result(res) bind(c,name='getHllDeviceParams')
|
||||
use iso_c_binding
|
||||
import :: hlldev_parms
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
integer(c_int) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr
|
||||
end function GetHllDeviceParams
|
||||
end interface
|
||||
|
||||
|
||||
interface
|
||||
function FallocHllDevice(deviceMat,hksize,rows, nzeros,allocsize, &
|
||||
& elementType,firstIndex) &
|
||||
& result(res) bind(c,name='FallocHllDevice')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex
|
||||
type(c_ptr) :: deviceMat
|
||||
end function FallocHllDevice
|
||||
end interface
|
||||
|
||||
|
||||
interface writeHllDevice
|
||||
|
||||
function writeHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||
& result(res) bind(c,name='writeHllDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_float) :: val(*)
|
||||
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||
end function writeHllDeviceFloat
|
||||
|
||||
function writeHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||
& result(res) bind(c,name='writeHllDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_double) :: val(*)
|
||||
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||
end function writeHllDeviceDouble
|
||||
|
||||
function writeHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||
& result(res) bind(c,name='writeHllDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
complex(c_float_complex) :: val(*)
|
||||
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||
end function writeHllDeviceFloatComplex
|
||||
|
||||
function writeHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||
& result(res) bind(c,name='writeHllDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
complex(c_double_complex) :: val(*)
|
||||
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||
end function writeHllDeviceDoubleComplex
|
||||
|
||||
end interface
|
||||
|
||||
interface readHllDevice
|
||||
|
||||
function readHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||
& result(res) bind(c,name='readHllDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_float) :: val(*)
|
||||
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||
end function readHllDeviceFloat
|
||||
|
||||
function readHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||
& result(res) bind(c,name='readHllDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_double) :: val(*)
|
||||
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||
end function readHllDeviceDouble
|
||||
|
||||
function readHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||
& result(res) bind(c,name='readHllDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
complex(c_float_complex) :: val(*)
|
||||
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||
end function readHllDeviceFloatComplex
|
||||
|
||||
function readHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||
& result(res) bind(c,name='readHllDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat
|
||||
complex(c_double_complex) :: val(*)
|
||||
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||
end function readHllDeviceDoubleComplex
|
||||
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine freeHllDevice(deviceMat) &
|
||||
& bind(c,name='freeHllDevice')
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: deviceMat
|
||||
end subroutine freeHllDevice
|
||||
end interface
|
||||
|
||||
|
||||
interface psi_CopyCooToHlg
|
||||
function psiCopyCooToHlgFloat(nr, nc, nza, hacksz, noffs, isz, irn, &
|
||||
& hoffs, idisp, ja, val, deviceMat) &
|
||||
& result(res) bind(c,name='psiCopyCooToHlgFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_float) :: val(*)
|
||||
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
|
||||
end function psiCopyCooToHlgFloat
|
||||
function psiCopyCooToHlgDouble(nr, nc, nza, hacksz, noffs, isz, irn, &
|
||||
& hoffs, idisp, ja, val, deviceMat) &
|
||||
& result(res) bind(c,name='psiCopyCooToHlgDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
|
||||
type(c_ptr), value :: deviceMat
|
||||
real(c_double) :: val(*)
|
||||
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
|
||||
end function psiCopyCooToHlgDouble
|
||||
function psiCopyCooToHlgFloatComplex(nr, nc, nza, hacksz, noffs, isz, irn, &
|
||||
& hoffs, idisp, ja, val, deviceMat) &
|
||||
& result(res) bind(c,name='psiCopyCooToHlgFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
|
||||
type(c_ptr), value :: deviceMat
|
||||
complex(c_float_complex) :: val(*)
|
||||
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
|
||||
end function psiCopyCooToHlgFloatComplex
|
||||
function psiCopyCooToHlgDoubleComplex(nr, nc, nza, hacksz, noffs, isz, irn, &
|
||||
& hoffs, idisp, ja, val, deviceMat) &
|
||||
& result(res) bind(c,name='psiCopyCooToHlgDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
|
||||
type(c_ptr), value :: deviceMat
|
||||
complex(c_double_complex) :: val(*)
|
||||
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
|
||||
end function psiCopyCooToHlgDoubleComplex
|
||||
end interface
|
||||
|
||||
|
||||
!interface
|
||||
! function getHllDevicePitch(deviceMat) &
|
||||
! & bind(c,name='getHllDevicePitch') result(res)
|
||||
! use iso_c_binding
|
||||
! type(c_ptr), value :: deviceMat
|
||||
! integer(c_int) :: res
|
||||
! end function getHllDevicePitch
|
||||
!end interface
|
||||
|
||||
!interface
|
||||
! function getHllDeviceMaxRowSize(deviceMat) &
|
||||
! & bind(c,name='getHllDeviceMaxRowSize') result(res)
|
||||
! use iso_c_binding
|
||||
! type(c_ptr), value :: deviceMat
|
||||
! integer(c_int) :: res
|
||||
! end function getHllDeviceMaxRowSize
|
||||
!end interface
|
||||
|
||||
interface spmvHllDevice
|
||||
|
||||
function spmvHllDeviceFloat(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvHllDeviceFloat')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
real(c_float),value :: alpha, beta
|
||||
end function spmvHllDeviceFloat
|
||||
|
||||
function spmvHllDeviceDouble(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvHllDeviceDouble')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
real(c_double),value :: alpha, beta
|
||||
end function spmvHllDeviceDouble
|
||||
|
||||
function spmvHllDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvHllDeviceFloatComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
complex(c_float_complex),value :: alpha, beta
|
||||
end function spmvHllDeviceFloatComplex
|
||||
|
||||
function spmvHllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
|
||||
& result(res) bind(c,name='spmvHllDeviceDoubleComplex')
|
||||
use iso_c_binding
|
||||
integer(c_int) :: res
|
||||
type(c_ptr), value :: deviceMat, x, y
|
||||
complex(c_double_complex),value :: alpha, beta
|
||||
end function spmvHllDeviceDoubleComplex
|
||||
|
||||
end interface
|
||||
|
||||
end module hlldev_mod
|
@ -0,0 +1,297 @@
|
||||
include ../../Make.inc
|
||||
LIBDIR=../../lib
|
||||
INCDIR=../../include
|
||||
MODDIR=../../modules
|
||||
PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base
|
||||
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
|
||||
LDLIBS=$(PSBLDLIBS)
|
||||
#
|
||||
# Compilers and such
|
||||
#
|
||||
#CCOPT= -g
|
||||
FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG)..
|
||||
CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR)
|
||||
LIBNAME=libpsb_gpu.a
|
||||
CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES)
|
||||
CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES)
|
||||
FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES)
|
||||
|
||||
OBJS= \
|
||||
psb_d_cuda_cp_csrg_from_coo.o \
|
||||
psb_d_cuda_cp_csrg_from_fmt.o \
|
||||
psb_d_cuda_cp_elg_from_coo.o \
|
||||
psb_d_cuda_cp_elg_from_fmt.o \
|
||||
psb_s_cuda_cp_csrg_from_coo.o \
|
||||
psb_s_cuda_cp_csrg_from_fmt.o \
|
||||
psb_s_cuda_csrg_allocate_mnnz.o \
|
||||
psb_s_cuda_csrg_csmm.o \
|
||||
psb_s_cuda_csrg_csmv.o \
|
||||
psb_s_cuda_csrg_mold.o \
|
||||
psb_s_cuda_csrg_reallocate_nz.o \
|
||||
psb_s_cuda_csrg_scal.o \
|
||||
psb_s_cuda_csrg_scals.o \
|
||||
psb_s_cuda_csrg_from_gpu.o \
|
||||
psb_s_cuda_csrg_to_gpu.o \
|
||||
psb_s_cuda_csrg_vect_mv.o \
|
||||
psb_s_cuda_csrg_inner_vect_sv.o \
|
||||
psb_d_cuda_csrg_allocate_mnnz.o \
|
||||
psb_d_cuda_csrg_csmm.o \
|
||||
psb_d_cuda_csrg_csmv.o \
|
||||
psb_d_cuda_csrg_mold.o \
|
||||
psb_d_cuda_csrg_reallocate_nz.o \
|
||||
psb_d_cuda_csrg_scal.o \
|
||||
psb_d_cuda_csrg_scals.o \
|
||||
psb_d_cuda_csrg_from_gpu.o \
|
||||
psb_d_cuda_csrg_to_gpu.o \
|
||||
psb_d_cuda_csrg_vect_mv.o \
|
||||
psb_d_cuda_csrg_inner_vect_sv.o \
|
||||
psb_d_cuda_elg_allocate_mnnz.o \
|
||||
psb_d_cuda_elg_asb.o \
|
||||
psb_d_cuda_elg_csmm.o \
|
||||
psb_d_cuda_elg_csmv.o \
|
||||
psb_d_cuda_elg_csput.o \
|
||||
psb_d_cuda_elg_from_gpu.o \
|
||||
psb_d_cuda_elg_inner_vect_sv.o \
|
||||
psb_d_cuda_elg_mold.o \
|
||||
psb_d_cuda_elg_reallocate_nz.o \
|
||||
psb_d_cuda_elg_scal.o \
|
||||
psb_d_cuda_elg_scals.o \
|
||||
psb_d_cuda_elg_to_gpu.o \
|
||||
psb_d_cuda_elg_vect_mv.o \
|
||||
psb_d_cuda_mv_csrg_from_coo.o \
|
||||
psb_d_cuda_mv_csrg_from_fmt.o \
|
||||
psb_d_cuda_mv_elg_from_coo.o \
|
||||
psb_d_cuda_mv_elg_from_fmt.o \
|
||||
psb_s_cuda_mv_csrg_from_coo.o \
|
||||
psb_s_cuda_mv_csrg_from_fmt.o \
|
||||
psb_s_cuda_cp_elg_from_coo.o \
|
||||
psb_s_cuda_cp_elg_from_fmt.o \
|
||||
psb_s_cuda_elg_allocate_mnnz.o \
|
||||
psb_s_cuda_elg_asb.o \
|
||||
psb_s_cuda_elg_csmm.o \
|
||||
psb_s_cuda_elg_csmv.o \
|
||||
psb_s_cuda_elg_csput.o \
|
||||
psb_s_cuda_elg_inner_vect_sv.o \
|
||||
psb_s_cuda_elg_mold.o \
|
||||
psb_s_cuda_elg_reallocate_nz.o \
|
||||
psb_s_cuda_elg_scal.o \
|
||||
psb_s_cuda_elg_scals.o \
|
||||
psb_s_cuda_elg_to_gpu.o \
|
||||
psb_s_cuda_elg_from_gpu.o \
|
||||
psb_s_cuda_elg_vect_mv.o \
|
||||
psb_s_cuda_mv_elg_from_coo.o \
|
||||
psb_s_cuda_mv_elg_from_fmt.o \
|
||||
psb_s_cuda_cp_hlg_from_fmt.o \
|
||||
psb_s_cuda_cp_hlg_from_coo.o \
|
||||
psb_d_cuda_cp_hlg_from_fmt.o \
|
||||
psb_d_cuda_cp_hlg_from_coo.o \
|
||||
psb_d_cuda_hlg_allocate_mnnz.o \
|
||||
psb_d_cuda_hlg_csmm.o \
|
||||
psb_d_cuda_hlg_csmv.o \
|
||||
psb_d_cuda_hlg_inner_vect_sv.o \
|
||||
psb_d_cuda_hlg_mold.o \
|
||||
psb_d_cuda_hlg_reallocate_nz.o \
|
||||
psb_d_cuda_hlg_scal.o \
|
||||
psb_d_cuda_hlg_scals.o \
|
||||
psb_d_cuda_hlg_from_gpu.o \
|
||||
psb_d_cuda_hlg_to_gpu.o \
|
||||
psb_d_cuda_hlg_vect_mv.o \
|
||||
psb_s_cuda_hlg_allocate_mnnz.o \
|
||||
psb_s_cuda_hlg_csmm.o \
|
||||
psb_s_cuda_hlg_csmv.o \
|
||||
psb_s_cuda_hlg_inner_vect_sv.o \
|
||||
psb_s_cuda_hlg_mold.o \
|
||||
psb_s_cuda_hlg_reallocate_nz.o \
|
||||
psb_s_cuda_hlg_scal.o \
|
||||
psb_s_cuda_hlg_scals.o \
|
||||
psb_s_cuda_hlg_from_gpu.o \
|
||||
psb_s_cuda_hlg_to_gpu.o \
|
||||
psb_s_cuda_hlg_vect_mv.o \
|
||||
psb_s_cuda_mv_hlg_from_coo.o \
|
||||
psb_s_cuda_cp_hlg_from_coo.o \
|
||||
psb_s_cuda_mv_hlg_from_fmt.o \
|
||||
psb_d_cuda_mv_hlg_from_coo.o \
|
||||
psb_d_cuda_cp_hlg_from_coo.o \
|
||||
psb_d_cuda_mv_hlg_from_fmt.o \
|
||||
psb_s_cuda_hybg_allocate_mnnz.o \
|
||||
psb_s_cuda_hybg_csmm.o \
|
||||
psb_s_cuda_hybg_csmv.o \
|
||||
psb_s_cuda_hybg_reallocate_nz.o \
|
||||
psb_s_cuda_hybg_scal.o \
|
||||
psb_s_cuda_hybg_scals.o \
|
||||
psb_s_cuda_hybg_to_gpu.o \
|
||||
psb_s_cuda_hybg_vect_mv.o \
|
||||
psb_s_cuda_hybg_inner_vect_sv.o \
|
||||
psb_s_cuda_cp_hybg_from_coo.o \
|
||||
psb_s_cuda_cp_hybg_from_fmt.o \
|
||||
psb_s_cuda_mv_hybg_from_fmt.o \
|
||||
psb_s_cuda_mv_hybg_from_coo.o \
|
||||
psb_s_cuda_hybg_mold.o \
|
||||
psb_d_cuda_hybg_allocate_mnnz.o \
|
||||
psb_d_cuda_hybg_csmm.o \
|
||||
psb_d_cuda_hybg_csmv.o \
|
||||
psb_d_cuda_hybg_reallocate_nz.o \
|
||||
psb_d_cuda_hybg_scal.o \
|
||||
psb_d_cuda_hybg_scals.o \
|
||||
psb_d_cuda_hybg_to_gpu.o \
|
||||
psb_d_cuda_hybg_vect_mv.o \
|
||||
psb_d_cuda_hybg_inner_vect_sv.o \
|
||||
psb_d_cuda_cp_hybg_from_coo.o \
|
||||
psb_d_cuda_cp_hybg_from_fmt.o \
|
||||
psb_d_cuda_mv_hybg_from_fmt.o \
|
||||
psb_d_cuda_mv_hybg_from_coo.o \
|
||||
psb_d_cuda_hybg_mold.o \
|
||||
psb_z_cuda_cp_csrg_from_coo.o \
|
||||
psb_z_cuda_cp_csrg_from_fmt.o \
|
||||
psb_z_cuda_cp_elg_from_coo.o \
|
||||
psb_z_cuda_cp_elg_from_fmt.o \
|
||||
psb_c_cuda_cp_csrg_from_coo.o \
|
||||
psb_c_cuda_cp_csrg_from_fmt.o \
|
||||
psb_c_cuda_csrg_allocate_mnnz.o \
|
||||
psb_c_cuda_csrg_csmm.o \
|
||||
psb_c_cuda_csrg_csmv.o \
|
||||
psb_c_cuda_csrg_mold.o \
|
||||
psb_c_cuda_csrg_reallocate_nz.o \
|
||||
psb_c_cuda_csrg_scal.o \
|
||||
psb_c_cuda_csrg_scals.o \
|
||||
psb_c_cuda_csrg_from_gpu.o \
|
||||
psb_c_cuda_csrg_to_gpu.o \
|
||||
psb_c_cuda_csrg_vect_mv.o \
|
||||
psb_c_cuda_csrg_inner_vect_sv.o \
|
||||
psb_z_cuda_csrg_allocate_mnnz.o \
|
||||
psb_z_cuda_csrg_csmm.o \
|
||||
psb_z_cuda_csrg_csmv.o \
|
||||
psb_z_cuda_csrg_mold.o \
|
||||
psb_z_cuda_csrg_reallocate_nz.o \
|
||||
psb_z_cuda_csrg_scal.o \
|
||||
psb_z_cuda_csrg_scals.o \
|
||||
psb_z_cuda_csrg_from_gpu.o \
|
||||
psb_z_cuda_csrg_to_gpu.o \
|
||||
psb_z_cuda_csrg_vect_mv.o \
|
||||
psb_z_cuda_csrg_inner_vect_sv.o \
|
||||
psb_z_cuda_elg_allocate_mnnz.o \
|
||||
psb_z_cuda_elg_asb.o \
|
||||
psb_z_cuda_elg_csmm.o \
|
||||
psb_z_cuda_elg_csmv.o \
|
||||
psb_z_cuda_elg_csput.o \
|
||||
psb_z_cuda_elg_inner_vect_sv.o \
|
||||
psb_z_cuda_elg_mold.o \
|
||||
psb_z_cuda_elg_reallocate_nz.o \
|
||||
psb_z_cuda_elg_scal.o \
|
||||
psb_z_cuda_elg_scals.o \
|
||||
psb_z_cuda_elg_to_gpu.o \
|
||||
psb_z_cuda_elg_from_gpu.o \
|
||||
psb_z_cuda_elg_vect_mv.o \
|
||||
psb_z_cuda_mv_csrg_from_coo.o \
|
||||
psb_z_cuda_mv_csrg_from_fmt.o \
|
||||
psb_z_cuda_mv_elg_from_coo.o \
|
||||
psb_z_cuda_mv_elg_from_fmt.o \
|
||||
psb_c_cuda_mv_csrg_from_coo.o \
|
||||
psb_c_cuda_mv_csrg_from_fmt.o \
|
||||
psb_c_cuda_cp_elg_from_coo.o \
|
||||
psb_c_cuda_cp_elg_from_fmt.o \
|
||||
psb_c_cuda_elg_allocate_mnnz.o \
|
||||
psb_c_cuda_elg_asb.o \
|
||||
psb_c_cuda_elg_csmm.o \
|
||||
psb_c_cuda_elg_csmv.o \
|
||||
psb_c_cuda_elg_csput.o \
|
||||
psb_c_cuda_elg_inner_vect_sv.o \
|
||||
psb_c_cuda_elg_mold.o \
|
||||
psb_c_cuda_elg_reallocate_nz.o \
|
||||
psb_c_cuda_elg_scal.o \
|
||||
psb_c_cuda_elg_scals.o \
|
||||
psb_c_cuda_elg_to_gpu.o \
|
||||
psb_c_cuda_elg_from_gpu.o \
|
||||
psb_c_cuda_elg_vect_mv.o \
|
||||
psb_c_cuda_mv_elg_from_coo.o \
|
||||
psb_c_cuda_mv_elg_from_fmt.o \
|
||||
psb_c_cuda_cp_hlg_from_fmt.o \
|
||||
psb_c_cuda_cp_hlg_from_coo.o \
|
||||
psb_z_cuda_cp_hlg_from_fmt.o \
|
||||
psb_z_cuda_cp_hlg_from_coo.o \
|
||||
psb_z_cuda_hlg_allocate_mnnz.o \
|
||||
psb_z_cuda_hlg_csmm.o \
|
||||
psb_z_cuda_hlg_csmv.o \
|
||||
psb_z_cuda_hlg_inner_vect_sv.o \
|
||||
psb_z_cuda_hlg_mold.o \
|
||||
psb_z_cuda_hlg_reallocate_nz.o \
|
||||
psb_z_cuda_hlg_scal.o \
|
||||
psb_z_cuda_hlg_scals.o \
|
||||
psb_z_cuda_hlg_from_gpu.o \
|
||||
psb_z_cuda_hlg_to_gpu.o \
|
||||
psb_z_cuda_hlg_vect_mv.o \
|
||||
psb_c_cuda_hlg_allocate_mnnz.o \
|
||||
psb_c_cuda_hlg_csmm.o \
|
||||
psb_c_cuda_hlg_csmv.o \
|
||||
psb_c_cuda_hlg_inner_vect_sv.o \
|
||||
psb_c_cuda_hlg_mold.o \
|
||||
psb_c_cuda_hlg_reallocate_nz.o \
|
||||
psb_c_cuda_hlg_scal.o \
|
||||
psb_c_cuda_hlg_scals.o \
|
||||
psb_c_cuda_hlg_from_gpu.o \
|
||||
psb_c_cuda_hlg_to_gpu.o \
|
||||
psb_c_cuda_hlg_vect_mv.o \
|
||||
psb_c_cuda_mv_hlg_from_coo.o \
|
||||
psb_c_cuda_cp_hlg_from_coo.o \
|
||||
psb_c_cuda_mv_hlg_from_fmt.o \
|
||||
psb_z_cuda_mv_hlg_from_coo.o \
|
||||
psb_z_cuda_cp_hlg_from_coo.o \
|
||||
psb_z_cuda_mv_hlg_from_fmt.o \
|
||||
psb_c_cuda_hybg_allocate_mnnz.o \
|
||||
psb_c_cuda_hybg_csmm.o \
|
||||
psb_c_cuda_hybg_csmv.o \
|
||||
psb_c_cuda_hybg_reallocate_nz.o \
|
||||
psb_c_cuda_hybg_scal.o \
|
||||
psb_c_cuda_hybg_scals.o \
|
||||
psb_c_cuda_hybg_to_gpu.o \
|
||||
psb_c_cuda_hybg_vect_mv.o \
|
||||
psb_c_cuda_hybg_inner_vect_sv.o \
|
||||
psb_c_cuda_cp_hybg_from_coo.o \
|
||||
psb_c_cuda_cp_hybg_from_fmt.o \
|
||||
psb_c_cuda_mv_hybg_from_fmt.o \
|
||||
psb_c_cuda_mv_hybg_from_coo.o \
|
||||
psb_c_cuda_hybg_mold.o \
|
||||
psb_z_cuda_hybg_allocate_mnnz.o \
|
||||
psb_z_cuda_hybg_csmm.o \
|
||||
psb_z_cuda_hybg_csmv.o \
|
||||
psb_z_cuda_hybg_reallocate_nz.o \
|
||||
psb_z_cuda_hybg_scal.o \
|
||||
psb_z_cuda_hybg_scals.o \
|
||||
psb_z_cuda_hybg_to_gpu.o \
|
||||
psb_z_cuda_hybg_vect_mv.o \
|
||||
psb_z_cuda_hybg_inner_vect_sv.o \
|
||||
psb_z_cuda_cp_hybg_from_coo.o \
|
||||
psb_z_cuda_cp_hybg_from_fmt.o \
|
||||
psb_z_cuda_mv_hybg_from_fmt.o \
|
||||
psb_z_cuda_mv_hybg_from_coo.o \
|
||||
psb_z_cuda_hybg_mold.o \
|
||||
psb_d_cuda_cp_diag_from_coo.o \
|
||||
psb_d_cuda_mv_diag_from_coo.o \
|
||||
psb_d_cuda_diag_to_gpu.o \
|
||||
psb_d_cuda_diag_csmv.o \
|
||||
psb_d_cuda_diag_mold.o \
|
||||
psb_d_cuda_diag_vect_mv.o \
|
||||
psb_d_cuda_cp_hdiag_from_coo.o \
|
||||
psb_d_cuda_mv_hdiag_from_coo.o \
|
||||
psb_d_cuda_hdiag_to_gpu.o \
|
||||
psb_d_cuda_hdiag_csmv.o \
|
||||
psb_d_cuda_hdiag_mold.o \
|
||||
psb_d_cuda_hdiag_vect_mv.o \
|
||||
psb_s_cuda_cp_hdiag_from_coo.o \
|
||||
psb_s_cuda_mv_hdiag_from_coo.o \
|
||||
psb_s_cuda_hdiag_to_gpu.o \
|
||||
psb_s_cuda_hdiag_csmv.o \
|
||||
psb_s_cuda_hdiag_mold.o \
|
||||
psb_s_cuda_hdiag_vect_mv.o \
|
||||
psb_s_cuda_dnsg_mat_impl.o \
|
||||
psb_d_cuda_dnsg_mat_impl.o \
|
||||
psb_c_cuda_dnsg_mat_impl.o \
|
||||
psb_z_cuda_dnsg_mat_impl.o
|
||||
|
||||
|
||||
objs: $(OBJS)
|
||||
lib: objs
|
||||
ar cur ../$(LIBNAME) $(OBJS)
|
||||
|
||||
clean:
|
||||
/bin/rm -f $(OBJS)
|
@ -0,0 +1,56 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_coo
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_c_csr_sparse_mat%cp_from_coo(b,info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%to_gpu(info)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_cp_csrg_from_coo
|
@ -0,0 +1,55 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_fmt
|
||||
!use iso_c_binding
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
|
||||
info = psb_success_
|
||||
select type(b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%cp_from_coo(b,info)
|
||||
class default
|
||||
call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info)
|
||||
if (info /= 0) return
|
||||
call a%to_gpu(info)
|
||||
end select
|
||||
|
||||
end subroutine psb_c_cuda_cp_csrg_from_fmt
|
@ -0,0 +1,58 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_cp_diag_from_coo(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use diagdev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_cp_diag_from_coo
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
!locals
|
||||
info = psb_success_
|
||||
call a%psb_c_dia_sparse_mat%cp_from_coo(b,info)
|
||||
|
||||
call a%to_gpu(info)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_cp_diag_from_coo
|
@ -0,0 +1,161 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
subroutine psb_c_cuda_cp_elg_from_coo(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_coo
|
||||
use psi_ext_util_mod
|
||||
use psb_cuda_env_mod
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
!locals
|
||||
Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, &
|
||||
& ir, ic, ld, ldv, hacksize
|
||||
integer(psb_ipk_) :: debug_level, debug_unit
|
||||
character(len=20) :: name
|
||||
type(psb_c_coo_sparse_mat) :: tmp
|
||||
integer(psb_ipk_), allocatable :: idisp(:)
|
||||
|
||||
info = psb_success_
|
||||
hacksize = max(1,psb_cuda_WarpSize())
|
||||
if (b%is_dev()) call b%sync()
|
||||
|
||||
if (b%is_by_rows()) then
|
||||
|
||||
call psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize)
|
||||
|
||||
|
||||
if (c_associated(a%deviceMat)) then
|
||||
call freeEllDevice(a%deviceMat)
|
||||
endif
|
||||
|
||||
nr = b%get_nrows()
|
||||
nc = b%get_ncols()
|
||||
nza = b%get_nzeros()
|
||||
info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1)
|
||||
|
||||
if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, &
|
||||
& a%irn,idisp,b%ja,b%val, a%deviceMat)
|
||||
call a%set_dev()
|
||||
else
|
||||
call b%cp_to_coo(tmp,info)
|
||||
call psi_c_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize)
|
||||
|
||||
|
||||
if (c_associated(a%deviceMat)) then
|
||||
call freeEllDevice(a%deviceMat)
|
||||
endif
|
||||
|
||||
nr = b%get_nrows()
|
||||
nc = b%get_ncols()
|
||||
nza = b%get_nzeros()
|
||||
info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1)
|
||||
|
||||
if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, &
|
||||
& a%irn,idisp,tmp%ja,tmp%val, a%deviceMat)
|
||||
|
||||
call a%set_dev()
|
||||
end if
|
||||
|
||||
if (info /= psb_success_) goto 9999
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
contains
|
||||
|
||||
subroutine psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize)
|
||||
|
||||
use psb_base_mod
|
||||
use psi_ext_util_mod
|
||||
implicit none
|
||||
|
||||
class(psb_c_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), allocatable, intent(out) :: idisp(:)
|
||||
integer(psb_ipk_), intent(out) :: info, nzm, ldv
|
||||
integer(psb_ipk_), intent(in), optional :: hacksize
|
||||
|
||||
!locals
|
||||
Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, &
|
||||
& ir, ic, hsz_
|
||||
real(psb_dpk_) :: t0,t1
|
||||
logical, parameter :: timing=.true.
|
||||
|
||||
|
||||
info = psb_success_
|
||||
|
||||
nr = b%get_nrows()
|
||||
nc = b%get_ncols()
|
||||
nza = b%get_nzeros()
|
||||
|
||||
hsz_ = 1
|
||||
if (present(hacksize)) then
|
||||
if (hacksize> 1) hsz_ = hacksize
|
||||
end if
|
||||
! Make ldv a multiple of hacksize
|
||||
ldv = ((nr+hsz_-1)/hsz_)*hsz_
|
||||
|
||||
! If it is sorted then we can lessen memory impact
|
||||
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
||||
|
||||
! First compute the number of nonzeros in each row.
|
||||
call psb_realloc(nr,a%irn,info)
|
||||
if (info == psb_success_) call psb_realloc(nr+1,idisp,info)
|
||||
if (info /= psb_success_) return
|
||||
if (timing) t0=psb_wtime()
|
||||
|
||||
a%irn = 0
|
||||
do i=1, nza
|
||||
ir = b%ia(i)
|
||||
a%irn(ir) = a%irn(ir) + 1
|
||||
end do
|
||||
nzm = 0
|
||||
a%nzt = 0
|
||||
idisp(1) = 0
|
||||
do i=1,nr
|
||||
nzm = max(nzm,a%irn(i))
|
||||
a%nzt = a%nzt + a%irn(i)
|
||||
idisp(i+1) = a%nzt
|
||||
end do
|
||||
|
||||
end subroutine psi_c_count_ell_from_coo
|
||||
|
||||
end subroutine psb_c_cuda_cp_elg_from_coo
|
@ -0,0 +1,89 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_fmt
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
!locals
|
||||
type(psb_c_coo_sparse_mat) :: tmp
|
||||
Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m
|
||||
integer(psb_ipk_) :: debug_level, debug_unit
|
||||
character(len=20) :: name
|
||||
type(elldev_parms) :: gpu_parms
|
||||
|
||||
info = psb_success_
|
||||
if (b%is_dev()) call b%sync()
|
||||
|
||||
select type (b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%cp_from_coo(b,info)
|
||||
|
||||
class is (psb_c_ell_sparse_mat)
|
||||
nzm = psb_size(b%ja,2)
|
||||
m = b%get_nrows()
|
||||
nc = b%get_ncols()
|
||||
nza = b%get_nzeros()
|
||||
gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1)
|
||||
ld = gpu_parms%pitch
|
||||
nzm = gpu_parms%maxRowSize
|
||||
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
||||
if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info)
|
||||
if (info == 0) call psb_safe_cpy( b%irn, a%irn , info)
|
||||
if (info == 0) call psb_safe_cpy( b%ja , a%ja , info)
|
||||
if (info == 0) call psb_safe_cpy( b%val, a%val , info)
|
||||
if (info == 0) call psb_realloc(ld,nzm,a%ja,info)
|
||||
if (info == 0) then
|
||||
a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm)
|
||||
end if
|
||||
if (info == 0) call psb_realloc(ld,nzm,a%val,info)
|
||||
if (info == 0) then
|
||||
a%val(1:m,1:nzm) = b%val(1:m,1:nzm)
|
||||
end if
|
||||
a%nzt = nza
|
||||
call a%to_gpu(info)
|
||||
|
||||
class default
|
||||
|
||||
call b%cp_to_coo(tmp,info)
|
||||
if (info == psb_success_) call a%mv_from_coo(tmp,info)
|
||||
end select
|
||||
|
||||
end subroutine psb_c_cuda_cp_elg_from_fmt
|
@ -0,0 +1,63 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use hdiagdev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_cp_hdiag_from_coo
|
||||
use psb_cuda_env_mod
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
!locals
|
||||
integer(psb_ipk_) :: debug_level, debug_unit
|
||||
character(len=20) :: name
|
||||
|
||||
info = psb_success_
|
||||
|
||||
a%hacksize = psb_cuda_WarpSize()
|
||||
|
||||
call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info)
|
||||
|
||||
call a%to_gpu(info)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_cp_hdiag_from_coo
|
@ -0,0 +1,190 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use hlldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_cuda_env_mod
|
||||
use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_coo
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
!locals
|
||||
type(psb_c_coo_sparse_mat) :: tmp
|
||||
integer(psb_ipk_) :: debug_level, debug_unit, hksz
|
||||
integer(psb_ipk_), allocatable :: idisp(:)
|
||||
character(len=20) :: name='hll_from_coo'
|
||||
Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs
|
||||
integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc
|
||||
integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:)
|
||||
real(psb_dpk_), allocatable :: val(:)
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
debug_unit = psb_get_debug_unit()
|
||||
debug_level = psb_get_debug_level()
|
||||
hksz = max(1,psb_cuda_WarpSize())
|
||||
|
||||
if (b%is_by_rows()) then
|
||||
|
||||
nr = b%get_nrows()
|
||||
nc = b%get_ncols()
|
||||
nza = b%get_nzeros()
|
||||
if (debug) write(0,*) 'Copying through GPU',nza
|
||||
call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info)
|
||||
if (info /=0) then
|
||||
write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz
|
||||
return
|
||||
end if
|
||||
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
|
||||
|
||||
if (c_associated(a%deviceMat)) then
|
||||
call freeHllDevice(a%deviceMat)
|
||||
endif
|
||||
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
|
||||
if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,&
|
||||
& a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat)
|
||||
call a%set_dev()
|
||||
else
|
||||
! This is to guarantee tmp%is_by_rows()
|
||||
call b%cp_to_coo(tmp,info)
|
||||
call tmp%fix(info)
|
||||
|
||||
nr = tmp%get_nrows()
|
||||
nc = tmp%get_ncols()
|
||||
nza = tmp%get_nzeros()
|
||||
if (debug) write(0,*) 'Copying through GPU'
|
||||
call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info)
|
||||
if (info /=0) then
|
||||
write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz
|
||||
return
|
||||
end if
|
||||
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
|
||||
|
||||
if (c_associated(a%deviceMat)) then
|
||||
call freeHllDevice(a%deviceMat)
|
||||
endif
|
||||
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
|
||||
if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,&
|
||||
& a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat)
|
||||
|
||||
call tmp%free()
|
||||
call a%set_dev()
|
||||
end if
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
contains
|
||||
subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info)
|
||||
use psb_base_mod
|
||||
use psi_ext_util_mod
|
||||
implicit none
|
||||
class(psb_c_hll_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), allocatable, intent(out) :: idisp(:)
|
||||
integer(psb_ipk_), intent(in) :: hksz
|
||||
integer(psb_ipk_), intent(out) :: info, noffs, isz
|
||||
|
||||
!locals
|
||||
Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs
|
||||
integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = 0
|
||||
nr = b%get_nrows()
|
||||
nc = b%get_ncols()
|
||||
nza = b%get_nzeros()
|
||||
|
||||
! If it is sorted then we can lessen memory impact
|
||||
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
||||
if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza
|
||||
! First compute the number of nonzeros in each row.
|
||||
call psb_realloc(nr,a%irn,info)
|
||||
if (info == 0) call psb_realloc(nr+1,idisp,info)
|
||||
if (info /= 0) return
|
||||
a%irn = 0
|
||||
if (debug) then
|
||||
do i=1, nza
|
||||
if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then
|
||||
a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1
|
||||
else
|
||||
write(0,*) 'Out of bouds IA ',i,b%ia(i),nr
|
||||
end if
|
||||
end do
|
||||
else
|
||||
do i=1, nza
|
||||
a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1
|
||||
end do
|
||||
end if
|
||||
a%nzt = nza
|
||||
|
||||
|
||||
! Second. Figure out the block offsets.
|
||||
call a%set_hksz(hksz)
|
||||
noffs = (nr+hksz-1)/hksz
|
||||
call psb_realloc(noffs+1,a%hkoffs,info)
|
||||
if (debug) write(0,*) ' noffsets ',noffs,info
|
||||
if (info /= 0) return
|
||||
a%hkoffs(1) = 0
|
||||
j=1
|
||||
idisp(1) = 0
|
||||
do i=1,nr,hksz
|
||||
ir = min(hksz,nr-i+1)
|
||||
mxrwl = a%irn(i)
|
||||
idisp(i+1) = idisp(i) + a%irn(i)
|
||||
do k=1,ir-1
|
||||
idisp(i+k+1) = idisp(i+k) + a%irn(i+k)
|
||||
mxrwl = max(mxrwl,a%irn(i+k))
|
||||
end do
|
||||
a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz
|
||||
j = j + 1
|
||||
end do
|
||||
|
||||
!
|
||||
! At this point a%hkoffs(noffs+1) contains the allocation
|
||||
! size a%ja a%val.
|
||||
!
|
||||
isz = a%hkoffs(noffs+1)
|
||||
!!$ write(*,*) 'End of psi_comput_hckoff ',info
|
||||
end subroutine psi_compute_hckoff_from_coo
|
||||
|
||||
end subroutine psb_c_cuda_cp_hlg_from_coo
|
@ -0,0 +1,62 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use hlldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_fmt
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%cp_from_coo(b,info)
|
||||
class default
|
||||
call a%psb_c_hll_sparse_mat%cp_from_fmt(b,info)
|
||||
if (info == 0) call a%to_gpu(info)
|
||||
end select
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_cp_hlg_from_fmt
|
@ -0,0 +1,58 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
|
||||
subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_coo
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_c_csr_sparse_mat%cp_from_coo(b,info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%to_gpu(info)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_cp_hybg_from_coo
|
||||
#endif
|
@ -0,0 +1,56 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
#if CUDA_SHORT_VERSION <= 10
|
||||
|
||||
subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_fmt
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%cp_from_coo(b,info)
|
||||
class default
|
||||
call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info)
|
||||
if (info /= 0) return
|
||||
call a%to_gpu(info)
|
||||
end select
|
||||
|
||||
end subroutine psb_c_cuda_cp_hybg_from_fmt
|
||||
#endif
|
@ -0,0 +1,62 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_allocate_mnnz
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m,n
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(in), optional :: nz
|
||||
Integer(Psb_ipk_) :: err_act, info, nz_,ld
|
||||
character(len=20) :: name='allocate_mnz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_c_csr_sparse_mat%allocate(m,n,nz)
|
||||
|
||||
info = initFcusparse()
|
||||
if (info == 0) call a%to_gpu(info,nzrm=nz)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_csrg_allocate_mnnz
|
@ -0,0 +1,126 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmm
|
||||
implicit none
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
|
||||
complex(psb_spk_), intent(inout) :: y(:,:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
|
||||
complex(psb_spk_), allocatable :: acc(:)
|
||||
type(c_ptr) :: gpX, gpY
|
||||
logical :: tra
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='d_csrg_csmm'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
m = a%get_ncols()
|
||||
n = a%get_nrows()
|
||||
else
|
||||
n = a%get_ncols()
|
||||
m = a%get_nrows()
|
||||
end if
|
||||
|
||||
if (size(x,1)<n) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(y,1)<m) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
|
||||
if (tra) then
|
||||
call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
else
|
||||
!
|
||||
! Just to test, move X/Y to/from the GPU.
|
||||
!
|
||||
nxy = min(size(x,2),size(y,2))
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_complex_float)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpX,x,nxy)
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_complex_float)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpY,y,nxy)
|
||||
|
||||
if (info == 0) &
|
||||
& info = spmvCSRGDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||
if (info == 0) &
|
||||
& info = readMultiVecDevice(gpY,y,nxy)
|
||||
if (info /= 0) goto 9999
|
||||
call freeMultiVecDevice(gpX)
|
||||
call freeMultiVecDevice(gpY)
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
|
||||
end subroutine psb_c_cuda_csrg_csmm
|
@ -0,0 +1,131 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmv
|
||||
implicit none
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
|
||||
complex(psb_spk_), intent(inout) :: y(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc
|
||||
complex(psb_spk_) :: acc
|
||||
type(c_ptr) :: gpX
|
||||
type(c_ptr) :: gpY
|
||||
logical :: tra
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_csrg_csmv'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
m = a%get_ncols()
|
||||
n = a%get_nrows()
|
||||
else
|
||||
n = a%get_ncols()
|
||||
m = a%get_nrows()
|
||||
end if
|
||||
|
||||
if (size(x,1)<n) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(y,1)<m) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (tra) then
|
||||
call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
else
|
||||
!
|
||||
! Just to test, move X/Y to/from the GPU.
|
||||
!
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_complex_float)
|
||||
if (alpha /= dzero) then
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpX,x)
|
||||
end if
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_complex_float)
|
||||
if (beta /= dzero) then
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpY,y)
|
||||
end if
|
||||
|
||||
if (info == 0) &
|
||||
& info = spmvCSRGDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||
if (info == 0) &
|
||||
& info = readMultiVecDevice(gpY,y)
|
||||
if (info /= 0) goto 9999
|
||||
call freeMultiVecDevice(gpX)
|
||||
call freeMultiVecDevice(gpY)
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
|
||||
end subroutine psb_c_cuda_csrg_csmv
|
@ -0,0 +1,67 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_csrg_from_gpu(a,info)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_from_gpu
|
||||
implicit none
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: m, n, nz
|
||||
|
||||
info = 0
|
||||
|
||||
if (.not.(c_associated(a%deviceMat%mat))) then
|
||||
call a%free()
|
||||
return
|
||||
end if
|
||||
|
||||
info = CSRGDeviceGetParms(a%deviceMat,m,n,nz)
|
||||
if (info /= psb_success_) return
|
||||
|
||||
if (info == 0) call psb_realloc(m+1,a%irp,info)
|
||||
if (info == 0) call psb_realloc(nz,a%ja,info)
|
||||
if (info == 0) call psb_realloc(nz,a%val,info)
|
||||
if (info == 0) info = &
|
||||
& CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
|
||||
#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030)
|
||||
a%irp(:) = a%irp(:)+1
|
||||
a%ja(:) = a%ja(:)+1
|
||||
#endif
|
||||
|
||||
call a%set_sync()
|
||||
|
||||
end subroutine psb_c_cuda_csrg_from_gpu
|
@ -0,0 +1,125 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_inner_vect_sv
|
||||
use psb_c_cuda_vect_mod
|
||||
implicit none
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_csrg_inner_vect_sv'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
! This is the base version. If we get here
|
||||
! it means the derived class is incomplete,
|
||||
! so we throw an error.
|
||||
info = psb_success_
|
||||
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra.or.(beta/=dzero)) then
|
||||
call x%sync()
|
||||
call y%sync()
|
||||
call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans)
|
||||
call y%set_host()
|
||||
else
|
||||
select type (xx => x)
|
||||
type is (psb_c_vect_cuda)
|
||||
select type(yy => y)
|
||||
type is (psb_c_vect_cuda)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||
& beta,yy%deviceVect)
|
||||
if (info /= 0) then
|
||||
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||
& a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||
info = psb_err_from_subroutine_ai_
|
||||
goto 9999
|
||||
end if
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
end if
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_from_subroutine_
|
||||
call psb_errpush(info,name, a_err='csrg_vect_sv')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_csrg_inner_vect_sv
|
@ -0,0 +1,65 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_csrg_mold(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_mold
|
||||
implicit none
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='csrg_mold'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b,stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_c_cuda_csrg_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_csrg_mold
|
@ -0,0 +1,64 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_csrg_reallocate_nz(nz,a)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_reallocate_nz
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: nz
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_) :: m, nzrm,ld
|
||||
Integer(Psb_ipk_) :: err_act, info
|
||||
character(len=20) :: name='c_cuda_csrg_reallocate_nz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
!
|
||||
! What should this really do???
|
||||
!
|
||||
call a%psb_c_csr_sparse_mat%reallocate(nz)
|
||||
|
||||
call a%to_gpu(info,nzrm=nz)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_csrg_reallocate_nz
|
@ -0,0 +1,67 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_csrg_scal(d,a,info,side)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scal
|
||||
implicit none
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: d(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, intent(in), optional :: side
|
||||
|
||||
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_dev()) call a%sync()
|
||||
|
||||
call a%psb_c_csr_sparse_mat%scal(d,info,side=side)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call a%to_gpu(info)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_csrg_scal
|
@ -0,0 +1,65 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_csrg_scals(d,a,info)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scals
|
||||
implicit none
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: d
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_dev()) call a%sync()
|
||||
call a%psb_c_csr_sparse_mat%scal(d,info)
|
||||
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call a%to_gpu(info)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_csrg_scals
|
@ -0,0 +1,378 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu
|
||||
implicit none
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_), intent(in), optional :: nzrm
|
||||
|
||||
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz
|
||||
integer(psb_ipk_) :: nzdi,i,j,k,nrz
|
||||
integer(psb_ipk_), allocatable :: irpdi(:),jadi(:)
|
||||
complex(psb_spk_), allocatable :: valdi(:)
|
||||
|
||||
info = 0
|
||||
|
||||
if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return
|
||||
|
||||
m = a%get_nrows()
|
||||
n = a%get_ncols()
|
||||
nz = a%get_nzeros()
|
||||
if (c_associated(a%deviceMat%Mat)) then
|
||||
info = CSRGDeviceFree(a%deviceMat)
|
||||
end if
|
||||
#if (CUDA_SHORT_VERSION <= 10 )
|
||||
if (a%is_unit()) then
|
||||
!
|
||||
! CUSPARSE has the habit of storing the diagonal and then ignoring,
|
||||
! whereas we do not store it. Hence this adapter code.
|
||||
!
|
||||
nzdi = nz + m
|
||||
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
|
||||
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
|
||||
if (info == 0) then
|
||||
if (a%is_unit()) then
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||
else
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
end if
|
||||
end if
|
||||
!!! We are explicitly adding the diagonal
|
||||
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
if ((info == 0) .and. a%is_triangle()) then
|
||||
!info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||
if ((info == 0).and.a%is_upper()) then
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||
else
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||
end if
|
||||
end if
|
||||
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
|
||||
if (info == 0) then
|
||||
irpdi(1) = 1
|
||||
if (a%is_triangle().and.a%is_upper()) then
|
||||
do i=1,m
|
||||
j = irpdi(i)
|
||||
jadi(j) = i
|
||||
valdi(j) = cone
|
||||
nrz = a%irp(i+1)-a%irp(i)
|
||||
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||
irpdi(i+1) = j + nrz + 1
|
||||
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||
end do
|
||||
else
|
||||
do i=1,m
|
||||
j = irpdi(i)
|
||||
nrz = a%irp(i+1)-a%irp(i)
|
||||
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||
jadi(j+nrz) = i
|
||||
valdi(j+nrz) = cone
|
||||
irpdi(i+1) = j + nrz + 1
|
||||
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||
end do
|
||||
end if
|
||||
end if
|
||||
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
|
||||
|
||||
else
|
||||
|
||||
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
|
||||
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
|
||||
!!$ if (info == 0) then
|
||||
!!$ if (a%is_unit()) then
|
||||
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||
!!$ else
|
||||
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
!!$ end if
|
||||
!!$ end if
|
||||
if ((info == 0) .and. a%is_triangle()) then
|
||||
!info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||
if ((info == 0).and.a%is_upper()) then
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||
else
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||
end if
|
||||
end if
|
||||
|
||||
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
|
||||
endif
|
||||
|
||||
if ((info == 0) .and. a%is_triangle()) then
|
||||
info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
|
||||
end if
|
||||
|
||||
#elif CUDA_VERSION < 11030
|
||||
if (a%is_unit()) then
|
||||
!
|
||||
! CUSPARSE has the habit of storing the diagonal and then ignoring,
|
||||
! whereas we do not store it. Hence this adapter code.
|
||||
!
|
||||
nzdi = nz + m
|
||||
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
|
||||
!!$ write(0,*) 'Done deviceAlloc'
|
||||
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero)
|
||||
!!$ write(0,*) 'Done SetIndexBase'
|
||||
if (info == 0) then
|
||||
if (a%is_unit()) then
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||
else
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
end if
|
||||
end if
|
||||
!!! We are explicitly adding the diagonal
|
||||
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
if ((info == 0) .and. a%is_triangle()) then
|
||||
info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||
if ((info == 0).and.a%is_upper()) then
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||
else
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||
end if
|
||||
end if
|
||||
if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info)
|
||||
if (info == 0) then
|
||||
irpdi(1) = 0
|
||||
if (a%is_triangle().and.a%is_upper()) then
|
||||
do i=1,m
|
||||
j = irpdi(i)
|
||||
jadi(j) = i
|
||||
valdi(j) = cone
|
||||
nrz = a%irp(i+1)-a%irp(i)
|
||||
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1
|
||||
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||
irpdi(i+1) = j + nrz + 1
|
||||
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||
end do
|
||||
else
|
||||
do i=1,m
|
||||
j = irpdi(i)
|
||||
nrz = a%irp(i+1)-a%irp(i)
|
||||
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1
|
||||
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||
jadi(j+nrz) = i
|
||||
valdi(j+nrz) = cone
|
||||
irpdi(i+1) = j + nrz + 1
|
||||
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||
end do
|
||||
end if
|
||||
end if
|
||||
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
|
||||
|
||||
else
|
||||
|
||||
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
|
||||
!!$ write(0,*) 'Done deviceAlloc', info
|
||||
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,&
|
||||
& cusparse_index_base_zero)
|
||||
!!$ write(0,*) 'Done setIndexBase', info
|
||||
if (info == 0) then
|
||||
if (a%is_unit()) then
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||
else
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
end if
|
||||
end if
|
||||
if ((info == 0) .and. a%is_triangle()) then
|
||||
info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||
if ((info == 0).and.a%is_upper()) then
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||
else
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||
end if
|
||||
end if
|
||||
nzdi=a%irp(m+1)-1
|
||||
if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info)
|
||||
if (info == 0) then
|
||||
irpdi(1:m+1) = a%irp(1:m+1) -1
|
||||
jadi(1:nzdi) = a%ja(1:nzdi) -1
|
||||
end if
|
||||
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val)
|
||||
!!$ write(0,*) 'Done Host2Device', info
|
||||
endif
|
||||
|
||||
|
||||
#elif 0
|
||||
|
||||
if (a%is_unit()) then
|
||||
!
|
||||
! CUSPARSE has the habit of storing the diagonal and then ignoring,
|
||||
! whereas we do not store it. Hence this adapter code.
|
||||
!
|
||||
nzdi = nz + m
|
||||
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
|
||||
if (info == 0) then
|
||||
if (a%is_unit()) then
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||
else
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
end if
|
||||
end if
|
||||
!!! We are explicitly adding the diagonal
|
||||
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
if ((info == 0) .and. a%is_triangle()) then
|
||||
!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||
if ((info == 0).and.a%is_upper()) then
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||
else
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||
end if
|
||||
end if
|
||||
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
|
||||
if (info == 0) then
|
||||
irpdi(1) = 1
|
||||
if (a%is_triangle().and.a%is_upper()) then
|
||||
do i=1,m
|
||||
j = irpdi(i)
|
||||
jadi(j) = i
|
||||
valdi(j) = cone
|
||||
nrz = a%irp(i+1)-a%irp(i)
|
||||
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||
irpdi(i+1) = j + nrz + 1
|
||||
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||
end do
|
||||
else
|
||||
do i=1,m
|
||||
j = irpdi(i)
|
||||
nrz = a%irp(i+1)-a%irp(i)
|
||||
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||
jadi(j+nrz) = i
|
||||
valdi(j+nrz) = cone
|
||||
irpdi(i+1) = j + nrz + 1
|
||||
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||
end do
|
||||
end if
|
||||
end if
|
||||
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
|
||||
|
||||
else
|
||||
|
||||
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
|
||||
!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
|
||||
if (info == 0) then
|
||||
if (a%is_unit()) then
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||
else
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
end if
|
||||
end if
|
||||
if ((info == 0) .and. a%is_triangle()) then
|
||||
!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||
if ((info == 0).and.a%is_upper()) then
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||
else
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||
end if
|
||||
end if
|
||||
|
||||
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
|
||||
endif
|
||||
|
||||
!!$ if ((info == 0) .and. a%is_triangle()) then
|
||||
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
|
||||
!!$ end if
|
||||
|
||||
#else
|
||||
|
||||
if (a%is_unit()) then
|
||||
!
|
||||
! CUSPARSE has the habit of storing the diagonal and then ignoring,
|
||||
! whereas we do not store it. Hence this adapter code.
|
||||
!
|
||||
nzdi = nz + m
|
||||
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
|
||||
if (info == 0) then
|
||||
if (a%is_unit()) then
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||
else
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
end if
|
||||
end if
|
||||
!!! We are explicitly adding the diagonal
|
||||
if ((info == 0) .and. a%is_triangle()) then
|
||||
if ((info == 0).and.a%is_upper()) then
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||
else
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||
end if
|
||||
end if
|
||||
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
|
||||
if (info == 0) then
|
||||
irpdi(1) = 1
|
||||
if (a%is_triangle().and.a%is_upper()) then
|
||||
do i=1,m
|
||||
j = irpdi(i)
|
||||
jadi(j) = i
|
||||
valdi(j) = cone
|
||||
nrz = a%irp(i+1)-a%irp(i)
|
||||
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||
irpdi(i+1) = j + nrz + 1
|
||||
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||
end do
|
||||
else
|
||||
do i=1,m
|
||||
j = irpdi(i)
|
||||
nrz = a%irp(i+1)-a%irp(i)
|
||||
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||
jadi(j+nrz) = i
|
||||
valdi(j+nrz) = cone
|
||||
irpdi(i+1) = j + nrz + 1
|
||||
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||
end do
|
||||
end if
|
||||
end if
|
||||
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
|
||||
|
||||
else
|
||||
|
||||
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
|
||||
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
|
||||
endif
|
||||
|
||||
#endif
|
||||
call a%set_sync()
|
||||
|
||||
if (info /= 0) then
|
||||
write(0,*) 'Error in CSRG_TO_GPU ',info
|
||||
end if
|
||||
|
||||
end subroutine psb_c_cuda_csrg_to_gpu
|
@ -0,0 +1,117 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_vect_mv
|
||||
use psb_c_cuda_vect_mod
|
||||
implicit none
|
||||
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_base_vect_type), intent(inout) :: x
|
||||
class(psb_c_base_vect_type), intent(inout) :: y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_csrg_vect_mv'
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
if (.not.x%is_host()) call x%sync()
|
||||
if (beta /= czero) then
|
||||
if (.not.y%is_host()) call y%sync()
|
||||
end if
|
||||
call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
call y%set_host()
|
||||
else
|
||||
if (a%is_host()) call a%sync()
|
||||
select type (xx => x)
|
||||
type is (psb_c_vect_cuda)
|
||||
select type(yy => y)
|
||||
type is (psb_c_vect_cuda)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= czero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||
& beta,yy%deviceVect)
|
||||
if (info /= 0) then
|
||||
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||
& a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||
info = psb_err_from_subroutine_ai_
|
||||
goto 9999
|
||||
end if
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
end if
|
||||
if (info /= 0) goto 9999
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
end subroutine psb_c_cuda_csrg_vect_mv
|
@ -0,0 +1,127 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use diagdev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_csmv
|
||||
implicit none
|
||||
class(psb_c_cuda_diag_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
|
||||
complex(psb_spk_), intent(inout) :: y(:)
|
||||
integer, intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
character :: trans_
|
||||
integer :: i,j,k,m,n, nnz, ir, jc
|
||||
complex(psb_spk_) :: acc
|
||||
type(c_ptr) :: gpX, gpY
|
||||
logical :: tra
|
||||
Integer :: err_act
|
||||
character(len=20) :: name='c_cuda_diag_csmv'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
m = a%get_ncols()
|
||||
n = a%get_nrows()
|
||||
else
|
||||
n = a%get_ncols()
|
||||
m = a%get_nrows()
|
||||
end if
|
||||
|
||||
if (size(x,1)<n) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(y,1)<m) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (tra) then
|
||||
call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
else
|
||||
!
|
||||
! Just to test, move X/Y to/from the GPU.
|
||||
!
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_double)
|
||||
if (alpha /= dzero) then
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpX,x)
|
||||
end if
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_double)
|
||||
if (beta /= dzero) then
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpY,y)
|
||||
end if
|
||||
if (info == 0) &
|
||||
& info = spmvDiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||
if (info == 0) &
|
||||
& info = readMultiVecDevice(gpY,y)
|
||||
if (info /= 0) goto 9999
|
||||
call freeMultiVecDevice(gpX)
|
||||
call freeMultiVecDevice(gpY)
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_diag_csmv
|
@ -0,0 +1,65 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_diag_mold(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_mold
|
||||
implicit none
|
||||
class(psb_c_cuda_diag_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='diag_mold'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b,stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_c_cuda_diag_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_diag_mold
|
@ -0,0 +1,66 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm)
|
||||
|
||||
use psb_base_mod
|
||||
use diagdev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu
|
||||
use iso_c_binding
|
||||
implicit none
|
||||
class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_), intent(in), optional :: nzrm
|
||||
|
||||
integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d
|
||||
type(diagdev_parms) :: gpu_parms
|
||||
|
||||
info = 0
|
||||
|
||||
if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return
|
||||
|
||||
n = size(a%data,1)
|
||||
d = size(a%data,2)
|
||||
c = a%get_ncols()
|
||||
!allocsize = a%get_size()
|
||||
!write(*,*) 'Create the DIAG matrix'
|
||||
gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_float)
|
||||
if (c_associated(a%deviceMat)) then
|
||||
call freeDiagDevice(a%deviceMat)
|
||||
endif
|
||||
info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_float)
|
||||
if (info == 0) info = &
|
||||
& writeDiagDevice(a%deviceMat,a%data,a%offset,n)
|
||||
! if (info /= 0) goto 9999
|
||||
|
||||
end subroutine psb_c_cuda_diag_to_gpu
|
@ -0,0 +1,116 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use diagdev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_vect_mv
|
||||
use psb_c_cuda_vect_mod
|
||||
implicit none
|
||||
class(psb_c_cuda_diag_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_base_vect_type), intent(inout) :: x
|
||||
class(psb_c_base_vect_type), intent(inout) :: y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_diag_vect_mv'
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
if (tra) then
|
||||
if (.not.x%is_host()) call x%sync()
|
||||
if (beta /= szero) then
|
||||
if (.not.y%is_host()) call y%sync()
|
||||
end if
|
||||
call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
call y%set_host()
|
||||
else
|
||||
if (a%is_host()) call a%sync()
|
||||
select type (xx => x)
|
||||
type is (psb_c_vect_cuda)
|
||||
select type(yy => y)
|
||||
type is (psb_c_vect_cuda)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||
& beta,yy%deviceVect)
|
||||
if (info /= 0) then
|
||||
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||
& a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||
info = psb_err_from_subroutine_ai_
|
||||
goto 9999
|
||||
end if
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%spmm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%spmm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
|
||||
end if
|
||||
if (info /= 0) goto 9999
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_diag_vect_mv
|
@ -0,0 +1,428 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans)
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_vect_mod
|
||||
use dnsdev_mod
|
||||
use psb_c_vectordev_mod
|
||||
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_vect_mv
|
||||
implicit none
|
||||
class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_base_vect_type), intent(inout) :: x
|
||||
class(psb_c_base_vect_type), intent(inout) :: y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
Integer(Psb_ipk_) :: err_act, m, n, k
|
||||
character(len=20) :: name='c_cuda_dnsg_vect_mv'
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
if (present(trans)) then
|
||||
trans_ = psb_toupper(trans)
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
if (trans_ =='N') then
|
||||
m = a%get_nrows()
|
||||
n = 1
|
||||
k = a%get_ncols()
|
||||
else
|
||||
m = a%get_ncols()
|
||||
n = 1
|
||||
k = a%get_nrows()
|
||||
end if
|
||||
select type (xx => x)
|
||||
type is (psb_c_vect_cuda)
|
||||
select type(yy => y)
|
||||
type is (psb_c_vect_cuda)
|
||||
if (a%is_host()) call a%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= czero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,&
|
||||
& xx%deviceVect,beta,yy%deviceVect)
|
||||
if (info /= 0) then
|
||||
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||
& a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||
info = psb_err_from_subroutine_ai_
|
||||
goto 9999
|
||||
end if
|
||||
call yy%set_dev()
|
||||
class default
|
||||
if (a%is_dev()) call a%sync()
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%spmm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
if (a%is_dev()) call a%sync()
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%spmm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
|
||||
|
||||
if (info /= 0) goto 9999
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_dnsg_vect_mv
|
||||
|
||||
|
||||
subroutine psb_c_cuda_dnsg_mold(a,b,info)
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_vect_mod
|
||||
use dnsdev_mod
|
||||
use psb_c_vectordev_mod
|
||||
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_mold
|
||||
implicit none
|
||||
class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='dnsg_mold'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b,stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_c_cuda_dnsg_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_dnsg_mold
|
||||
|
||||
|
||||
!!$
|
||||
!!$ interface
|
||||
!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans)
|
||||
!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type
|
||||
!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a
|
||||
!!$ complex(psb_spk_), intent(in) :: alpha, beta
|
||||
!!$ class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||
!!$ integer(psb_ipk_), intent(out) :: info
|
||||
!!$ character, optional, intent(in) :: trans
|
||||
!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv
|
||||
!!$ end interface
|
||||
|
||||
!!$ interface
|
||||
!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a)
|
||||
!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_
|
||||
!!$ integer(psb_ipk_), intent(in) :: nz
|
||||
!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||
!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz
|
||||
!!$ end interface
|
||||
!!$
|
||||
!!$ interface
|
||||
!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz)
|
||||
!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_
|
||||
!!$ integer(psb_ipk_), intent(in) :: m,n
|
||||
!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||
!!$ integer(psb_ipk_), intent(in), optional :: nz
|
||||
!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz
|
||||
!!$ end interface
|
||||
|
||||
subroutine psb_c_cuda_dnsg_to_gpu(a,info)
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_vect_mod
|
||||
use dnsdev_mod
|
||||
use psb_c_vectordev_mod
|
||||
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_to_gpu
|
||||
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
Integer(Psb_ipk_) :: err_act, pitch, lda
|
||||
logical, parameter :: debug=.false.
|
||||
character(len=20) :: name='c_cuda_dnsg_to_gpu'
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2)
|
||||
info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),&
|
||||
& spgpu_type_complex_float,1)
|
||||
if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2))
|
||||
if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info
|
||||
|
||||
if (info /= 0) goto 9999
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_dnsg_to_gpu
|
||||
|
||||
|
||||
|
||||
subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info)
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_vect_mod
|
||||
use dnsdev_mod
|
||||
use psb_c_vectordev_mod
|
||||
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_coo
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_dnsg_cp_from_coo'
|
||||
integer(psb_ipk_) :: debug_level, debug_unit
|
||||
logical, parameter :: debug=.false.
|
||||
type(psb_c_coo_sparse_mat) :: tmp
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
if (b%is_dev()) call b%sync()
|
||||
|
||||
call a%psb_c_dns_sparse_mat%cp_from_coo(b,info)
|
||||
if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info
|
||||
if (info == 0) call a%to_gpu(info)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_cp_dnsg_from_coo
|
||||
|
||||
subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info)
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_vect_mod
|
||||
use dnsdev_mod
|
||||
use psb_c_vectordev_mod
|
||||
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_fmt
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
type(psb_c_coo_sparse_mat) :: tmp
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_dnsg_cp_from_fmt'
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
if (b%is_dev()) call b%sync()
|
||||
|
||||
select type (b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%cp_from_coo(b,info)
|
||||
|
||||
!!$ class is (psb_c_ell_sparse_mat)
|
||||
!!$ nzm = psb_size(b%ja,2)
|
||||
!!$ m = b%get_nrows()
|
||||
!!$ nc = b%get_ncols()
|
||||
!!$ nza = b%get_nzeros()
|
||||
!!$#ifdef HAVE_SPGPU
|
||||
!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1)
|
||||
!!$ ld = gpu_parms%pitch
|
||||
!!$ nzm = gpu_parms%maxRowSize
|
||||
!!$#else
|
||||
!!$ ld = m
|
||||
!!$#endif
|
||||
!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
||||
!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info)
|
||||
!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info)
|
||||
!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info)
|
||||
!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info)
|
||||
!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info)
|
||||
!!$ if (info == 0) then
|
||||
!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm)
|
||||
!!$ end if
|
||||
!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info)
|
||||
!!$ if (info == 0) then
|
||||
!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm)
|
||||
!!$ end if
|
||||
!!$ a%nzt = nza
|
||||
!!$#ifdef HAVE_SPGPU
|
||||
!!$ call a%to_gpu(info)
|
||||
!!$#endif
|
||||
|
||||
class default
|
||||
|
||||
call b%cp_to_coo(tmp,info)
|
||||
if (info == psb_success_) call a%mv_from_coo(tmp,info)
|
||||
end select
|
||||
|
||||
if (info /= 0) goto 9999
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_cp_dnsg_from_fmt
|
||||
|
||||
|
||||
|
||||
subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info)
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_vect_mod
|
||||
use dnsdev_mod
|
||||
use psb_c_vectordev_mod
|
||||
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_coo
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
logical, parameter :: debug=.false.
|
||||
character(len=20) :: name='c_cuda_dnsg_mv_from_coo'
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (.not.b%is_by_rows()) call b%fix(info)
|
||||
if (info /= psb_success_) return
|
||||
if (b%is_dev()) call b%sync()
|
||||
call a%cp_from_coo(b,info)
|
||||
if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info
|
||||
call b%free()
|
||||
if (info /= 0) goto 9999
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_mv_dnsg_from_coo
|
||||
|
||||
subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info)
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_vect_mod
|
||||
use dnsdev_mod
|
||||
use psb_c_vectordev_mod
|
||||
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_fmt
|
||||
implicit none
|
||||
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
|
||||
type(psb_c_coo_sparse_mat) :: tmp
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_dnsg_cp_from_fmt'
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
if (b%is_dev()) call b%sync()
|
||||
|
||||
select type (b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%mv_from_coo(b,info)
|
||||
|
||||
!!$ class is (psb_c_ell_sparse_mat)
|
||||
!!$ nzm = psb_size(b%ja,2)
|
||||
!!$ m = b%get_nrows()
|
||||
!!$ nc = b%get_ncols()
|
||||
!!$ nza = b%get_nzeros()
|
||||
!!$#ifdef HAVE_SPGPU
|
||||
!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1)
|
||||
!!$ ld = gpu_parms%pitch
|
||||
!!$ nzm = gpu_parms%maxRowSize
|
||||
!!$#else
|
||||
!!$ ld = m
|
||||
!!$#endif
|
||||
!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
||||
!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info)
|
||||
!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info)
|
||||
!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info)
|
||||
!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info)
|
||||
!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info)
|
||||
!!$ if (info == 0) then
|
||||
!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm)
|
||||
!!$ end if
|
||||
!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info)
|
||||
!!$ if (info == 0) then
|
||||
!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm)
|
||||
!!$ end if
|
||||
!!$ a%nzt = nza
|
||||
!!$#ifdef HAVE_SPGPU
|
||||
!!$ call a%to_gpu(info)
|
||||
!!$#endif
|
||||
|
||||
class default
|
||||
|
||||
call b%mv_to_coo(tmp,info)
|
||||
if (info == psb_success_) call a%mv_from_coo(tmp,info)
|
||||
end select
|
||||
|
||||
if (info /= 0) goto 9999
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
|
||||
end subroutine psb_c_cuda_mv_dnsg_from_fmt
|
@ -0,0 +1,99 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_allocate_mnnz
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m,n
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(in), optional :: nz
|
||||
Integer(Psb_ipk_) :: err_act, info, nz_,ld
|
||||
character(len=20) :: name='allocate_mnz'
|
||||
logical, parameter :: debug=.false.
|
||||
type(elldev_parms) :: gpu_parms
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
if (m < 0) then
|
||||
info = psb_err_iarg_neg_
|
||||
call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/))
|
||||
goto 9999
|
||||
endif
|
||||
if (n < 0) then
|
||||
info = psb_err_iarg_neg_
|
||||
call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/))
|
||||
goto 9999
|
||||
endif
|
||||
if (present(nz)) then
|
||||
nz_ = (max(nz,ione) + m -1 )/m
|
||||
else
|
||||
nz_ = (max(7*m,7*n,ione)+m-1)/m
|
||||
end if
|
||||
if (nz_ < 0) then
|
||||
info = psb_err_iarg_neg_
|
||||
call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/))
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_float,1)
|
||||
ld = gpu_parms%pitch
|
||||
nz_ = gpu_parms%maxRowSize
|
||||
|
||||
if (info == psb_success_) call psb_realloc(m,a%irn,info)
|
||||
if (info == psb_success_) call psb_realloc(m,a%idiag,info)
|
||||
if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info)
|
||||
if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info)
|
||||
if (info == psb_success_) then
|
||||
a%irn = 0
|
||||
a%idiag = 0
|
||||
a%nzt = 0
|
||||
call a%set_nrows(m)
|
||||
call a%set_ncols(n)
|
||||
call a%set_bld()
|
||||
call a%set_triangle(.false.)
|
||||
call a%set_unit(.false.)
|
||||
call a%set_dupl(psb_dupl_def_)
|
||||
end if
|
||||
|
||||
call a%to_gpu(info,nzrm=nz_)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_allocate_mnnz
|
@ -0,0 +1,64 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_asb(a)
|
||||
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_asb
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
|
||||
integer(psb_ipk_) :: err_act, info
|
||||
character(len=20) :: name='elg_asb'
|
||||
logical :: clear_
|
||||
logical, parameter :: debug=.false.
|
||||
real(psb_dpk_), allocatable :: valt(:,:)
|
||||
integer(psb_ipk_), allocatable :: jat(:,:)
|
||||
integer(psb_ipk_) :: nr, nc
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
! Only call sync() if we are on host
|
||||
if (a%is_host()) then
|
||||
call a%sync()
|
||||
end if
|
||||
call a%set_asb()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_asb
|
@ -0,0 +1,124 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmm
|
||||
implicit none
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
|
||||
complex(psb_spk_), intent(inout) :: y(:,:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
|
||||
complex(psb_spk_), allocatable :: acc(:)
|
||||
type(c_ptr) :: gpX, gpY
|
||||
logical :: tra
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_elg_csmm'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
m = a%get_ncols()
|
||||
n = a%get_nrows()
|
||||
else
|
||||
n = a%get_ncols()
|
||||
m = a%get_nrows()
|
||||
end if
|
||||
|
||||
if (size(x,1)<n) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(y,1)<m) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (tra) then
|
||||
if (a%is_dev()) call a%sync()
|
||||
call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
else
|
||||
!
|
||||
! Just to test, move X/Y to/from the GPU.
|
||||
!
|
||||
nxy = min(size(x,2),size(y,2))
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_complex_float)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpX,x,nxy)
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_complex_float)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpY,y,nxy)
|
||||
|
||||
if (info == 0) &
|
||||
& info = spmvEllDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||
if (info == 0) &
|
||||
& info = readMultiVecDevice(gpY,y,nxy)
|
||||
if (info /= 0) goto 9999
|
||||
call freeMultiVecDevice(gpX)
|
||||
call freeMultiVecDevice(gpY)
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
|
||||
end subroutine psb_c_cuda_elg_csmm
|
@ -0,0 +1,127 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmv
|
||||
implicit none
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
|
||||
complex(psb_spk_), intent(inout) :: y(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc
|
||||
complex(psb_spk_) :: acc
|
||||
type(c_ptr) :: gpX, gpY
|
||||
logical :: tra
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='d_elg_csmv'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
m = a%get_ncols()
|
||||
n = a%get_nrows()
|
||||
else
|
||||
n = a%get_ncols()
|
||||
m = a%get_nrows()
|
||||
end if
|
||||
|
||||
if (size(x,1)<n) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(y,1)<m) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (tra) then
|
||||
if (a%is_dev()) call a%sync()
|
||||
call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
else
|
||||
!
|
||||
! Just to test, move X/Y to/from the GPU.
|
||||
!
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_complex_float)
|
||||
if (alpha /= dzero) then
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpX,x)
|
||||
end if
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_complex_float)
|
||||
if (beta /= dzero) then
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpY,y)
|
||||
end if
|
||||
if (info == 0) &
|
||||
& info = spmvEllDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||
if (info == 0) &
|
||||
& info = readMultiVecDevice(gpY,y)
|
||||
if (info /= 0) goto 9999
|
||||
call freeMultiVecDevice(gpX)
|
||||
call freeMultiVecDevice(gpY)
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_csmv
|
@ -0,0 +1,239 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
|
||||
|
||||
use psb_base_mod
|
||||
use iso_c_binding
|
||||
use elldev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_a
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: val(:)
|
||||
integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_elg_csput_a'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit
|
||||
real(psb_dpk_) :: t1,t2,t3
|
||||
type(c_ptr) :: devIdxUpd
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
debug_unit = psb_get_debug_unit()
|
||||
debug_level = psb_get_debug_level()
|
||||
|
||||
!!$ write(0,*) 'In ELG_csput_a'
|
||||
if (nz <= 0) then
|
||||
info = psb_err_iarg_neg_
|
||||
int_err(1)=1
|
||||
call psb_errpush(info,name,i_err=int_err)
|
||||
goto 9999
|
||||
end if
|
||||
if (size(ia) < nz) then
|
||||
info = psb_err_input_asize_invalid_i_
|
||||
int_err(1)=2
|
||||
call psb_errpush(info,name,i_err=int_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(ja) < nz) then
|
||||
info = psb_err_input_asize_invalid_i_
|
||||
int_err(1)=3
|
||||
call psb_errpush(info,name,i_err=int_err)
|
||||
goto 9999
|
||||
end if
|
||||
if (size(val) < nz) then
|
||||
info = psb_err_input_asize_invalid_i_
|
||||
int_err(1)=4
|
||||
call psb_errpush(info,name,i_err=int_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (nz == 0) return
|
||||
|
||||
|
||||
if (a%is_bld()) then
|
||||
! Build phase should only ever be in COO
|
||||
info = psb_err_invalid_mat_state_
|
||||
|
||||
else if (a%is_upd()) then
|
||||
!!$ write(*,*) 'elg_csput_a '
|
||||
if (a%is_dev()) call a%sync()
|
||||
call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,&
|
||||
& imin,imax,jmin,jmax,info)
|
||||
if (info /= psb_success_) then
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
end if
|
||||
call a%set_host()
|
||||
else
|
||||
! State is wrong.
|
||||
info = psb_err_invalid_mat_state_
|
||||
end if
|
||||
if (info /= psb_success_) then
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_csput_a
|
||||
|
||||
|
||||
|
||||
subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
|
||||
|
||||
use psb_base_mod
|
||||
use iso_c_binding
|
||||
use elldev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_v
|
||||
use psb_c_cuda_vect_mod
|
||||
implicit none
|
||||
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_vect_type), intent(inout) :: val
|
||||
class(psb_i_base_vect_type), intent(inout) :: ia, ja
|
||||
integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_elg_csput_v'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw
|
||||
logical :: gpu_invoked
|
||||
real(psb_dpk_) :: t1,t2,t3
|
||||
type(c_ptr) :: devIdxUpd
|
||||
integer(psb_ipk_), allocatable :: idxs(:)
|
||||
logical, parameter :: debug_idxs=.false., debug_vals=.false.
|
||||
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
debug_unit = psb_get_debug_unit()
|
||||
debug_level = psb_get_debug_level()
|
||||
|
||||
! write(0,*) 'In ELG_csput_v'
|
||||
if (nz <= 0) then
|
||||
info = psb_err_iarg_neg_
|
||||
int_err(1)=1
|
||||
call psb_errpush(info,name,i_err=int_err)
|
||||
goto 9999
|
||||
end if
|
||||
if (ia%get_nrows() < nz) then
|
||||
info = psb_err_input_asize_invalid_i_
|
||||
int_err(1)=2
|
||||
call psb_errpush(info,name,i_err=int_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (ja%get_nrows() < nz) then
|
||||
info = psb_err_input_asize_invalid_i_
|
||||
int_err(1)=3
|
||||
call psb_errpush(info,name,i_err=int_err)
|
||||
goto 9999
|
||||
end if
|
||||
if (val%get_nrows() < nz) then
|
||||
info = psb_err_input_asize_invalid_i_
|
||||
int_err(1)=4
|
||||
call psb_errpush(info,name,i_err=int_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (nz == 0) return
|
||||
|
||||
|
||||
if (a%is_bld()) then
|
||||
! Build phase should only ever be in COO
|
||||
info = psb_err_invalid_mat_state_
|
||||
|
||||
else if (a%is_upd()) then
|
||||
|
||||
t1=psb_wtime()
|
||||
gpu_invoked = .false.
|
||||
select type (ia)
|
||||
class is (psb_i_vect_cuda)
|
||||
select type (ja)
|
||||
class is (psb_i_vect_cuda)
|
||||
select type (val)
|
||||
class is (psb_c_vect_cuda)
|
||||
if (a%is_host()) call a%sync()
|
||||
if (val%is_host()) call val%sync()
|
||||
if (ia%is_host()) call ia%sync()
|
||||
if (ja%is_host()) call ja%sync()
|
||||
info = csputEllDeviceFloatComplex(a%deviceMat,nz,&
|
||||
& ia%deviceVect,ja%deviceVect,val%deviceVect)
|
||||
call a%set_dev()
|
||||
gpu_invoked=.true.
|
||||
end select
|
||||
end select
|
||||
end select
|
||||
if (.not.gpu_invoked) then
|
||||
!!$ write(0,*)'Not gpu_invoked '
|
||||
if (a%is_dev()) call a%sync()
|
||||
call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,&
|
||||
& imin,imax,jmin,jmax,info)
|
||||
call a%set_host()
|
||||
end if
|
||||
|
||||
if (info /= 0) then
|
||||
info = psb_err_internal_error_
|
||||
end if
|
||||
|
||||
|
||||
else
|
||||
! State is wrong.
|
||||
info = psb_err_invalid_mat_state_
|
||||
end if
|
||||
if (info /= psb_success_) then
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
|
||||
end subroutine psb_c_cuda_elg_csput_v
|
@ -0,0 +1,67 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_from_gpu(a,info)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_from_gpu
|
||||
implicit none
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize
|
||||
|
||||
info = 0
|
||||
|
||||
if (.not.(c_associated(a%deviceMat))) then
|
||||
call a%free()
|
||||
return
|
||||
end if
|
||||
|
||||
m = a%get_nrows()
|
||||
nzm = psb_size(a%val,2)
|
||||
n = a%get_ncols()
|
||||
|
||||
pitch = getEllDevicePitch(a%deviceMat)
|
||||
maxrowsize = getEllDeviceMaxRowSize(a%deviceMat)
|
||||
|
||||
if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then
|
||||
call psb_realloc(pitch,maxrowsize,a%val,info)
|
||||
if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info)
|
||||
if (info == 0) call psb_realloc(pitch,a%irn,info)
|
||||
end if
|
||||
if (info == 0) info = &
|
||||
& readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag)
|
||||
call a%set_sync()
|
||||
|
||||
end subroutine psb_c_cuda_elg_from_gpu
|
@ -0,0 +1,84 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_inner_vect_sv
|
||||
use psb_c_cuda_vect_mod
|
||||
implicit none
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_elg_inner_vect_sv'
|
||||
logical, parameter :: debug=.false.
|
||||
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
! This is the base version. If we get here
|
||||
! it means the derived class is incomplete,
|
||||
! so we throw an error.
|
||||
info = psb_success_
|
||||
|
||||
if (a%is_dev()) call a%sync()
|
||||
if (.false.) then
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%inner_spsm(alpha,rx,beta,ry,info,trans)
|
||||
call y%bld(ry)
|
||||
else
|
||||
call x%sync()
|
||||
call y%sync()
|
||||
call a%psb_c_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans)
|
||||
call y%set_host()
|
||||
end if
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_from_subroutine_
|
||||
call psb_errpush(info,name, a_err='inner_cssm')
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_inner_vect_sv
|
@ -0,0 +1,63 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
subroutine psb_c_cuda_elg_mold(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_mold
|
||||
implicit none
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='elg_mold'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b,stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_c_cuda_elg_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_mold
|
@ -0,0 +1,72 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_reallocate_nz(nz,a)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_reallocate_nz
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: nz
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_) :: m, nzrm,ld
|
||||
Integer(Psb_ipk_) :: err_act, info
|
||||
character(len=20) :: name='c_cuda_elg_reallocate_nz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
!
|
||||
! What should this really do???
|
||||
!
|
||||
if (a%is_dev()) call a%sync()
|
||||
m = a%get_nrows()
|
||||
nzrm = (max(nz,ione)+m-1)/m
|
||||
ld = size(a%ja,1)
|
||||
call psb_realloc(ld,nzrm,a%ja,info)
|
||||
if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info)
|
||||
if (info /= psb_success_) then
|
||||
call psb_errpush(psb_err_alloc_dealloc_,name)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
call a%to_gpu(info,nzrm=nzrm)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_reallocate_nz
|
@ -0,0 +1,71 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_scal(d,a,info,side)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scal
|
||||
implicit none
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: d(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, intent(in), optional :: side
|
||||
|
||||
|
||||
Integer(Psb_ipk_) :: err_act,mnm, i, j, m
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_dev()) call a%sync()
|
||||
|
||||
if (a%is_unit()) then
|
||||
call a%make_nonunit()
|
||||
end if
|
||||
|
||||
call a%psb_c_ell_sparse_mat%scal(d,info,side)
|
||||
if (info /= psb_success_) goto 9999
|
||||
|
||||
call a%to_gpu(info)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_scal
|
@ -0,0 +1,66 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_scals(d,a,info)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scals
|
||||
implicit none
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: d
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_dev()) call a%sync()
|
||||
if (a%is_unit()) then
|
||||
call a%make_nonunit()
|
||||
end if
|
||||
|
||||
a%val(:,:) = a%val(:,:) * d
|
||||
|
||||
call a%to_gpu(info)
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_scals
|
@ -0,0 +1,84 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu
|
||||
implicit none
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_), intent(in), optional :: nzrm
|
||||
|
||||
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt
|
||||
type(elldev_parms) :: gpu_parms
|
||||
|
||||
info = 0
|
||||
|
||||
if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return
|
||||
|
||||
m = a%get_nrows()
|
||||
nzm = psb_size(a%val,2)
|
||||
n = a%get_ncols()
|
||||
nzt = a%get_nzeros()
|
||||
if (present(nzrm)) nzm = max(nzm,nzrm)
|
||||
|
||||
gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_float,1)
|
||||
|
||||
if (c_associated(a%deviceMat)) then
|
||||
pitch = getEllDevicePitch(a%deviceMat)
|
||||
maxrowsize = getEllDeviceMaxRowSize(a%deviceMat)
|
||||
else
|
||||
pitch = -1
|
||||
maxrowsize = -1
|
||||
end if
|
||||
|
||||
if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then
|
||||
if (c_associated(a%deviceMat)) then
|
||||
call freeEllDevice(a%deviceMat)
|
||||
endif
|
||||
info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_float,1)
|
||||
pitch = getEllDevicePitch(a%deviceMat)
|
||||
maxrowsize = getEllDeviceMaxRowSize(a%deviceMat)
|
||||
end if
|
||||
if (info == 0) then
|
||||
if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then
|
||||
call psb_realloc(pitch,maxrowsize,a%val,info)
|
||||
if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info)
|
||||
end if
|
||||
end if
|
||||
if (info == 0) info = &
|
||||
& writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag)
|
||||
call a%set_sync()
|
||||
|
||||
end subroutine psb_c_cuda_elg_to_gpu
|
@ -0,0 +1,61 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_trim(a)
|
||||
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_trim
|
||||
implicit none
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||
Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld
|
||||
character(len=20) :: name='trim'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
m = max(1_psb_ipk_,a%get_nrows())
|
||||
ld = max(1_psb_ipk_,size(a%ja,1))
|
||||
nzm = max(1_psb_ipk_,maxval(a%irn(1:m)))
|
||||
|
||||
call psb_realloc(m,a%irn,info)
|
||||
if (info == psb_success_) call psb_realloc(m,a%idiag,info)
|
||||
if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info)
|
||||
if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info)
|
||||
|
||||
if (info /= psb_success_) goto 9999
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_trim
|
@ -0,0 +1,121 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_vect_mv
|
||||
use psb_c_cuda_vect_mod
|
||||
implicit none
|
||||
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_base_vect_type), intent(inout) :: x
|
||||
class(psb_c_base_vect_type), intent(inout) :: y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_elg_vect_mv'
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
if (tra) then
|
||||
if (a%is_dev()) call a%sync()
|
||||
if (.not.x%is_host()) call x%sync()
|
||||
if (beta /= czero) then
|
||||
if (.not.y%is_host()) call y%sync()
|
||||
end if
|
||||
call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
call y%set_host()
|
||||
else
|
||||
if (a%is_host()) call a%sync()
|
||||
select type (xx => x)
|
||||
type is (psb_c_vect_cuda)
|
||||
select type(yy => y)
|
||||
type is (psb_c_vect_cuda)
|
||||
if (a%is_host()) call a%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= czero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||
& beta,yy%deviceVect)
|
||||
if (info /= 0) then
|
||||
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||
& a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||
info = psb_err_from_subroutine_ai_
|
||||
goto 9999
|
||||
end if
|
||||
call yy%set_dev()
|
||||
class default
|
||||
if (a%is_dev()) call a%sync()
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%spmm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
if (a%is_dev()) call a%sync()
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%spmm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
|
||||
end if
|
||||
if (info /= 0) goto 9999
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_elg_vect_mv
|
@ -0,0 +1,126 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use hdiagdev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmv
|
||||
implicit none
|
||||
class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
|
||||
complex(psb_spk_), intent(inout) :: y(:)
|
||||
integer, intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
character :: trans_
|
||||
integer :: i,j,k,m,n, nnz, ir, jc
|
||||
complex(psb_spk_) :: acc
|
||||
type(c_ptr) :: gpX, gpY
|
||||
logical :: tra
|
||||
Integer :: err_act
|
||||
character(len=20) :: name='c_cuda_hdiag_csmv'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
m = a%get_ncols()
|
||||
n = a%get_nrows()
|
||||
else
|
||||
n = a%get_ncols()
|
||||
m = a%get_nrows()
|
||||
end if
|
||||
|
||||
if (size(x,1)<n) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(y,1)<m) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (tra) then
|
||||
call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
else
|
||||
!
|
||||
! Just to test, move X/Y to/from the GPU.
|
||||
!
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_double)
|
||||
if (alpha /= dzero) then
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpX,x)
|
||||
end if
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_double)
|
||||
if (beta /= dzero) then
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpY,y)
|
||||
end if
|
||||
if (info == 0) &
|
||||
& info = spmvHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||
if (info == 0) &
|
||||
& info = readMultiVecDevice(gpY,y)
|
||||
if (info /= 0) goto 9999
|
||||
call freeMultiVecDevice(gpX)
|
||||
call freeMultiVecDevice(gpY)
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_hdiag_csmv
|
@ -0,0 +1,64 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_hdiag_mold(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_mold
|
||||
implicit none
|
||||
class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='hdiag_mold'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b,stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_c_cuda_hdiag_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_hdiag_mold
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue