Merge branch 'repackage' into non-diag
commit
74cf138a6c
@ -0,0 +1,37 @@
|
|||||||
|
TOPDIR=../..
|
||||||
|
include $(TOPDIR)/Make.inc
|
||||||
|
#
|
||||||
|
# Libraries used
|
||||||
|
#
|
||||||
|
PSBLIBDIR=$(PSBLASDIR)/lib/
|
||||||
|
PSBINCDIR=$(PSBLASDIR)/include
|
||||||
|
LIBDIR=$(TOPDIR)/lib
|
||||||
|
INCDIR=$(TOPDIR)/include
|
||||||
|
PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base
|
||||||
|
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
|
||||||
|
LDLIBS=$(PSBLDLIBS)
|
||||||
|
#
|
||||||
|
# Compilers and such
|
||||||
|
#
|
||||||
|
#CCOPT= -g
|
||||||
|
FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG).
|
||||||
|
CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. -I$(INCDIR)
|
||||||
|
LIBNAME=libpsb_gpu.a
|
||||||
|
|
||||||
|
|
||||||
|
CUDAOBJS=psi_cuda_c_CopyCooToElg.o psi_cuda_c_CopyCooToHlg.o \
|
||||||
|
psi_cuda_d_CopyCooToElg.o psi_cuda_d_CopyCooToHlg.o \
|
||||||
|
psi_cuda_s_CopyCooToElg.o psi_cuda_s_CopyCooToHlg.o \
|
||||||
|
psi_cuda_z_CopyCooToElg.o psi_cuda_z_CopyCooToHlg.o
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
objs: $(CUDAOBJS)
|
||||||
|
|
||||||
|
lib: objs
|
||||||
|
ar cur ../$(LIBNAME) $(CUDAOBJS)
|
||||||
|
|
||||||
|
$(CUDAOBJS): psi_cuda_common.cuh psi_cuda_CopyCooToElg.cuh psi_cuda_CopyCooToHlg.cuh
|
||||||
|
|
||||||
|
clean:
|
||||||
|
/bin/rm -f $(CUDAOBJS)
|
@ -0,0 +1,104 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
#include "psi_cuda_common.cuh"
|
||||||
|
|
||||||
|
|
||||||
|
#undef GEN_PSI_FUNC_NAME
|
||||||
|
#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToElg)
|
||||||
|
|
||||||
|
#define THREAD_BLOCK 256
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza,
|
||||||
|
int baseIdx, int hacksz, int ldv, int nzm,
|
||||||
|
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
|
||||||
|
int *idiag, int *rP, VALUE_TYPE *cM);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza,
|
||||||
|
int baseIdx, int hacksz, int ldv, int nzm,
|
||||||
|
int *rS, int *devIdisp, int *devJa, VALUE_TYPE *devVal,
|
||||||
|
int *idiag, int *rP, VALUE_TYPE *cM)
|
||||||
|
{
|
||||||
|
int ir, k, ipnt, rsz,jc;
|
||||||
|
int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK);
|
||||||
|
int i=ii+ki;
|
||||||
|
int idval=0;
|
||||||
|
|
||||||
|
if (ki >= nrws) return;
|
||||||
|
if (i >= nr) return;
|
||||||
|
|
||||||
|
ipnt=devIdisp[i];
|
||||||
|
rsz=rS[i];
|
||||||
|
ir = i;
|
||||||
|
for (k=0; k<rsz; k++) {
|
||||||
|
if (devJa[ipnt] == i+baseIdx) idval = ipnt-devIdisp[i]+baseIdx;
|
||||||
|
rP[ir] = devJa[ipnt];
|
||||||
|
cM[ir] = devVal[ipnt];
|
||||||
|
ir += ldv;
|
||||||
|
ipnt++;
|
||||||
|
}
|
||||||
|
// Here we are assuming that devJa[] has at least one valid entry
|
||||||
|
// Pick one valid value.
|
||||||
|
jc = devJa[devIdisp[1]];
|
||||||
|
for (k=rsz; k<nzm; k++) {
|
||||||
|
rP[ir] = jc;
|
||||||
|
cM[ir] = CONCAT(zero_,VALUE_TYPE)();
|
||||||
|
ir += ldv;
|
||||||
|
}
|
||||||
|
idiag[i]=idval;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, int i, int nr, int nza,
|
||||||
|
int baseIdx, int hacksz, int ldv, int nzm,
|
||||||
|
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
|
||||||
|
int *idiag, int *rP, VALUE_TYPE *cM)
|
||||||
|
{
|
||||||
|
dim3 block (THREAD_BLOCK, 1);
|
||||||
|
dim3 grid ((nrws + THREAD_BLOCK - 1) / THREAD_BLOCK);
|
||||||
|
|
||||||
|
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)
|
||||||
|
<<< grid, block, 0, handle->currentStream >>>(i,nrws, nr, nza, baseIdx, hacksz, ldv, nzm,
|
||||||
|
rS,devIdisp,devJa,devVal,idiag, rP,cM);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
GEN_PSI_FUNC_NAME(TYPE_SYMBOL)
|
||||||
|
(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm,
|
||||||
|
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
|
||||||
|
int *idiag, int *rP, VALUE_TYPE *cM)
|
||||||
|
{ int i,j, nrws;
|
||||||
|
//int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX;
|
||||||
|
int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX);
|
||||||
|
|
||||||
|
|
||||||
|
//fprintf(stderr,"Loop on j: %d\n",j);
|
||||||
|
for (i=0; i<nr; i+=nrws) {
|
||||||
|
nrws = MIN(maxNForACall, nr - i);
|
||||||
|
//fprintf(stderr,"ifirst: %d i : %d nrws: %d i + ifirst + (nrws -1) -1 %d \n",ifirst,i,nrws,i + ifirst + (nrws -1) -1);
|
||||||
|
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(handle,nrws,i, nr, nza, baseIdx, hacksz, ldv, nzm,
|
||||||
|
rS,devIdisp, devJa, devVal, idiag, rP, cM);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,108 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
#include "psi_cuda_common.cuh"
|
||||||
|
|
||||||
|
|
||||||
|
#undef GEN_PSI_FUNC_NAME
|
||||||
|
#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToHlg)
|
||||||
|
|
||||||
|
#define THREAD_BLOCK 256
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz,
|
||||||
|
int noffs, int isz, int *rS, int *hackOffs, int *devIdisp,
|
||||||
|
int *devJa, VALUE_TYPE *devVal,
|
||||||
|
int *idiag, int *rP, VALUE_TYPE *cM);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza,
|
||||||
|
int baseIdx, int hacksz, int noffs, int isz,
|
||||||
|
int *rS, int *hackOffs, int *devIdisp,
|
||||||
|
int *devJa, VALUE_TYPE *devVal,
|
||||||
|
int *idiag, int *rP, VALUE_TYPE *cM)
|
||||||
|
{
|
||||||
|
int ir, k, ipnt, rsz,jc;
|
||||||
|
int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK);
|
||||||
|
int i=ii+ki;
|
||||||
|
|
||||||
|
if (ki >= nrws) return;
|
||||||
|
|
||||||
|
|
||||||
|
if (i<nr) {
|
||||||
|
int hackId = i / hacksz;
|
||||||
|
int hackLaneId = i % hacksz;
|
||||||
|
int hackOffset = hackOffs[hackId] + hackLaneId;
|
||||||
|
int nzm = (hackOffs[hackId+1]-hackOffs[hackId])/hacksz;
|
||||||
|
int idval=0;
|
||||||
|
rsz = rS[i];
|
||||||
|
ipnt = devIdisp[i];
|
||||||
|
ir = hackOffset;
|
||||||
|
for (k=0; k<rsz; k++) {
|
||||||
|
cM[ir] = devVal[ipnt];
|
||||||
|
if (devJa[ipnt] == i+baseIdx) idval = ipnt-devIdisp[i]+baseIdx;
|
||||||
|
rP[ir] = devJa[ipnt];
|
||||||
|
ir += hacksz;
|
||||||
|
ipnt++;
|
||||||
|
}
|
||||||
|
// Here we are assuming that devJa[] has at least one valid entry
|
||||||
|
// Pick one valid value.
|
||||||
|
jc = devJa[devIdisp[1]];
|
||||||
|
for (k=rsz; k<nzm; k++) {
|
||||||
|
rP[ir] = jc;
|
||||||
|
cM[ir] = CONCAT(zero_,VALUE_TYPE)();
|
||||||
|
ir += hacksz;
|
||||||
|
}
|
||||||
|
idiag[i]=idval;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, int i,
|
||||||
|
int nr, int nza, int baseIdx, int hacksz, int noffs, int isz,
|
||||||
|
int *rS, int *hackOffs, int *devIdisp, int *devJa,
|
||||||
|
VALUE_TYPE *devVal, int *idiag, int *rP, VALUE_TYPE *cM)
|
||||||
|
{
|
||||||
|
dim3 block (THREAD_BLOCK, 1);
|
||||||
|
dim3 grid ((nrws + THREAD_BLOCK - 1) / THREAD_BLOCK);
|
||||||
|
|
||||||
|
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)
|
||||||
|
<<< grid, block, 0, handle->currentStream >>>(i,nrws,nr, nza, baseIdx, hacksz, noffs, isz,
|
||||||
|
rS,hackOffs,devIdisp,devJa,devVal,idiag,rP,cM);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza,
|
||||||
|
int baseIdx, int hacksz, int noffs, int isz,
|
||||||
|
int *rS, int *hackOffs, int *devIdisp,
|
||||||
|
int *devJa, VALUE_TYPE *devVal,
|
||||||
|
int *idiag, int *rP, VALUE_TYPE *cM)
|
||||||
|
{ int i, nrws;
|
||||||
|
//int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX;
|
||||||
|
int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX);
|
||||||
|
|
||||||
|
//fprintf(stderr,"Loop on j: %d\n",j);
|
||||||
|
for (i=0; i<nr; i+=nrws) {
|
||||||
|
nrws = MIN(maxNForACall, nr - i);
|
||||||
|
//fprintf(stderr,"cpy_coo_2_hlg: i : %d nrws: %d \n", i,nrws);
|
||||||
|
CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(handle,nrws,i, nr, nza, baseIdx, hacksz, noffs, isz,
|
||||||
|
rS, hackOffs, devIdisp, devJa, devVal, idiag, rP, cM);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define VALUE_TYPE cuFloatComplex
|
||||||
|
#define TYPE_SYMBOL c
|
||||||
|
#include "psi_cuda_CopyCooToElg.cuh"
|
@ -0,0 +1,10 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define VALUE_TYPE cuFloatComplex
|
||||||
|
#define TYPE_SYMBOL c
|
||||||
|
#include "psi_cuda_CopyCooToHlg.cuh"
|
@ -0,0 +1,16 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#define PRE_CONCAT(A, B) A ## B
|
||||||
|
#define CONCAT(A, B) PRE_CONCAT(A, B)
|
||||||
|
#define MIN(A,B) ( (A)<(B) ? (A) : (B) )
|
||||||
|
#define SQUARE(x) ((x)*(x))
|
||||||
|
#define GET_ADDR(a,ix,iy,nc) a[(nc)*(ix)+(iy)]
|
||||||
|
#define GET_VAL(a,ix,iy,nc) (GET_ADDR(a,ix,iy,nc))
|
||||||
|
|
||||||
|
__device__ __host__ static float zero_float() { return 0.0f; }
|
||||||
|
__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); }
|
||||||
|
|
||||||
|
#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__)
|
||||||
|
__device__ __host__ static double zero_double() { return 0.0; }
|
||||||
|
__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); }
|
||||||
|
#endif
|
@ -0,0 +1,10 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define VALUE_TYPE double
|
||||||
|
#define TYPE_SYMBOL d
|
||||||
|
#include "psi_cuda_CopyCooToElg.cuh"
|
@ -0,0 +1,10 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define VALUE_TYPE double
|
||||||
|
#define TYPE_SYMBOL d
|
||||||
|
#include "psi_cuda_CopyCooToHlg.cuh"
|
@ -0,0 +1,10 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define VALUE_TYPE float
|
||||||
|
#define TYPE_SYMBOL s
|
||||||
|
#include "psi_cuda_CopyCooToElg.cuh"
|
@ -0,0 +1,10 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define VALUE_TYPE float
|
||||||
|
#define TYPE_SYMBOL s
|
||||||
|
#include "psi_cuda_CopyCooToHlg.cuh"
|
@ -0,0 +1,10 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define VALUE_TYPE cuDoubleComplex
|
||||||
|
#define TYPE_SYMBOL z
|
||||||
|
#include "psi_cuda_CopyCooToElg.cuh"
|
@ -0,0 +1,10 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define VALUE_TYPE cuDoubleComplex
|
||||||
|
#define TYPE_SYMBOL z
|
||||||
|
#include "psi_cuda_CopyCooToHlg.cuh"
|
@ -0,0 +1,21 @@
|
|||||||
|
(c) Copyright 2011-2021 Davide Barbieri, Salvatore Filippone
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation and/or
|
||||||
|
other materials provided with the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
||||||
|
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||||
|
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,143 @@
|
|||||||
|
include ../Make.inc
|
||||||
|
#
|
||||||
|
# Libraries used
|
||||||
|
#
|
||||||
|
LIBDIR=../lib
|
||||||
|
INCDIR=../include
|
||||||
|
MODDIR=../modules
|
||||||
|
PSBLAS_LIB= -lpsb_util -lpsb_base
|
||||||
|
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
|
||||||
|
LDLIBS=$(PSBLDLIBS)
|
||||||
|
#
|
||||||
|
# Compilers and such
|
||||||
|
#
|
||||||
|
#CCOPT= -g
|
||||||
|
FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG).
|
||||||
|
CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I$(INCDIR)
|
||||||
|
LIBNAME=libpsb_cuda.a
|
||||||
|
|
||||||
|
|
||||||
|
FOBJS=cusparse_mod.o base_cusparse_mod.o \
|
||||||
|
s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o \
|
||||||
|
psb_vectordev_mod.o core_mod.o \
|
||||||
|
psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_i_vectordev_mod.o\
|
||||||
|
psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_base_vectordev_mod.o \
|
||||||
|
elldev_mod.o hlldev_mod.o diagdev_mod.o hdiagdev_mod.o \
|
||||||
|
psb_i_cuda_vect_mod.o \
|
||||||
|
psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\
|
||||||
|
psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\
|
||||||
|
psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \
|
||||||
|
psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\
|
||||||
|
psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \
|
||||||
|
psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\
|
||||||
|
psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \
|
||||||
|
psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\
|
||||||
|
psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \
|
||||||
|
psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\
|
||||||
|
psb_cuda_env_mod.o psb_cuda_mod.o \
|
||||||
|
psb_d_cuda_diag_mat_mod.o\
|
||||||
|
psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\
|
||||||
|
psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \
|
||||||
|
psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o \
|
||||||
|
dnsdev_mod.o
|
||||||
|
|
||||||
|
COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o\
|
||||||
|
svectordev.o dvectordev.o cvectordev.o zvectordev.o cuda_util.o \
|
||||||
|
fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o
|
||||||
|
|
||||||
|
OBJS=$(COBJS) $(FOBJS)
|
||||||
|
|
||||||
|
lib: objs
|
||||||
|
|
||||||
|
objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs
|
||||||
|
/bin/cp -p *$(.mod) $(MODDIR)
|
||||||
|
/bin/cp -p *.h $(INCDIR)
|
||||||
|
|
||||||
|
spgpuinc:
|
||||||
|
$(MAKE) -C spgpu includes
|
||||||
|
spgpuobjs:
|
||||||
|
$(MAKE) -C spgpu objs
|
||||||
|
spgpulib:
|
||||||
|
$(MAKE) -C spgpu lib
|
||||||
|
|
||||||
|
lib: ilib cudalib spgpulib
|
||||||
|
ar cur $(LIBNAME) $(OBJS)
|
||||||
|
/bin/cp -p $(LIBNAME) $(LIBDIR)
|
||||||
|
|
||||||
|
dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o
|
||||||
|
psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o
|
||||||
|
psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o
|
||||||
|
cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o
|
||||||
|
s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o : base_cusparse_mod.o
|
||||||
|
psb_d_cuda_hlg_mat_mod.o: hlldev_mod.o psb_d_cuda_vect_mod.o psb_cuda_env_mod.o
|
||||||
|
psb_d_cuda_elg_mat_mod.o: elldev_mod.o psb_d_cuda_vect_mod.o
|
||||||
|
psb_d_cuda_diag_mat_mod.o: diagdev_mod.o psb_d_cuda_vect_mod.o
|
||||||
|
psb_d_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_cuda_vect_mod.o
|
||||||
|
psb_s_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_s_cuda_vect_mod.o
|
||||||
|
psb_d_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_d_cuda_vect_mod.o
|
||||||
|
psb_c_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_c_cuda_vect_mod.o
|
||||||
|
psb_z_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_z_cuda_vect_mod.o
|
||||||
|
psb_s_cuda_hlg_mat_mod.o: hlldev_mod.o psb_s_cuda_vect_mod.o psb_cuda_env_mod.o
|
||||||
|
psb_s_cuda_elg_mat_mod.o: elldev_mod.o psb_s_cuda_vect_mod.o
|
||||||
|
psb_s_cuda_diag_mat_mod.o: diagdev_mod.o psb_s_cuda_vect_mod.o
|
||||||
|
psb_s_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_cuda_vect_mod.o
|
||||||
|
psb_s_cuda_csrg_mat_mod.o psb_s_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
|
||||||
|
psb_d_cuda_csrg_mat_mod.o psb_d_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
|
||||||
|
psb_z_cuda_hlg_mat_mod.o: hlldev_mod.o psb_z_cuda_vect_mod.o psb_cuda_env_mod.o
|
||||||
|
psb_z_cuda_elg_mat_mod.o: elldev_mod.o psb_z_cuda_vect_mod.o
|
||||||
|
psb_c_cuda_hlg_mat_mod.o: hlldev_mod.o psb_c_cuda_vect_mod.o psb_cuda_env_mod.o
|
||||||
|
psb_c_cuda_elg_mat_mod.o: elldev_mod.o psb_c_cuda_vect_mod.o
|
||||||
|
psb_c_cuda_csrg_mat_mod.o psb_c_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
|
||||||
|
psb_z_cuda_csrg_mat_mod.o psb_z_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o
|
||||||
|
psb_vectordev_mod.o: psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_i_vectordev_mod.o
|
||||||
|
psb_i_vectordev_mod.o psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o: psb_base_vectordev_mod.o
|
||||||
|
vectordev.o: cuda_util.o vectordev.h
|
||||||
|
elldev.o: elldev.c
|
||||||
|
dnsdev.o: dnsdev.c
|
||||||
|
fcusparse.h elldev.c: elldev.h vectordev.h
|
||||||
|
fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse.h
|
||||||
|
fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse_fct.h
|
||||||
|
svectordev.o: svectordev.h vectordev.h
|
||||||
|
dvectordev.o: dvectordev.h vectordev.h
|
||||||
|
cvectordev.o: cvectordev.h vectordev.h
|
||||||
|
zvectordev.o: zvectordev.h vectordev.h
|
||||||
|
psb_cuda_env_mod.o: base_cusparse_mod.o
|
||||||
|
psb_cuda_mod.o: psb_cuda_env_mod.o psb_i_cuda_vect_mod.o\
|
||||||
|
psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\
|
||||||
|
psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\
|
||||||
|
psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \
|
||||||
|
psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\
|
||||||
|
psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \
|
||||||
|
psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\
|
||||||
|
psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \
|
||||||
|
psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\
|
||||||
|
psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \
|
||||||
|
psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\
|
||||||
|
psb_d_cuda_diag_mat_mod.o \
|
||||||
|
psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\
|
||||||
|
psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \
|
||||||
|
psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o
|
||||||
|
|
||||||
|
iobjs: $(FOBJS)
|
||||||
|
$(MAKE) -C impl objs
|
||||||
|
cudaobjs: $(FOBJS)
|
||||||
|
$(MAKE) -C CUDA objs
|
||||||
|
|
||||||
|
ilib: objs
|
||||||
|
$(MAKE) -C impl lib LIBNAME=$(LIBNAME)
|
||||||
|
cudalib: objs ilib
|
||||||
|
$(MAKE) -C CUDA lib LIBNAME=$(LIBNAME)
|
||||||
|
|
||||||
|
clean: cclean iclean cudaclean spgpuclean
|
||||||
|
/bin/rm -f $(FOBJS) *$(.mod) *.a
|
||||||
|
|
||||||
|
cclean:
|
||||||
|
/bin/rm -f $(COBJS)
|
||||||
|
iclean:
|
||||||
|
$(MAKE) -C impl clean
|
||||||
|
cudaclean:
|
||||||
|
$(MAKE) -C CUDA clean
|
||||||
|
spgpuclean:
|
||||||
|
$(MAKE) -C spgpu clean
|
||||||
|
|
||||||
|
veryclean: clean
|
@ -0,0 +1,113 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
module base_cusparse_mod
|
||||||
|
use iso_c_binding
|
||||||
|
! Interface to CUSPARSE.
|
||||||
|
|
||||||
|
enum, bind(c)
|
||||||
|
enumerator cusparse_status_success
|
||||||
|
enumerator cusparse_status_not_initialized
|
||||||
|
enumerator cusparse_status_alloc_failed
|
||||||
|
enumerator cusparse_status_invalid_value
|
||||||
|
enumerator cusparse_status_arch_mismatch
|
||||||
|
enumerator cusparse_status_mapping_error
|
||||||
|
enumerator cusparse_status_execution_failed
|
||||||
|
enumerator cusparse_status_internal_error
|
||||||
|
enumerator cusparse_status_matrix_type_not_supported
|
||||||
|
end enum
|
||||||
|
|
||||||
|
enum, bind(c)
|
||||||
|
enumerator cusparse_matrix_type_general
|
||||||
|
enumerator cusparse_matrix_type_symmetric
|
||||||
|
enumerator cusparse_matrix_type_hermitian
|
||||||
|
enumerator cusparse_matrix_type_triangular
|
||||||
|
end enum
|
||||||
|
|
||||||
|
enum, bind(c)
|
||||||
|
enumerator cusparse_fill_mode_lower
|
||||||
|
enumerator cusparse_fill_mode_upper
|
||||||
|
end enum
|
||||||
|
|
||||||
|
enum, bind(c)
|
||||||
|
enumerator cusparse_diag_type_non_unit
|
||||||
|
enumerator cusparse_diag_type_unit
|
||||||
|
end enum
|
||||||
|
|
||||||
|
enum, bind(c)
|
||||||
|
enumerator cusparse_index_base_zero
|
||||||
|
enumerator cusparse_index_base_one
|
||||||
|
end enum
|
||||||
|
|
||||||
|
enum, bind(c)
|
||||||
|
enumerator cusparse_operation_non_transpose
|
||||||
|
enumerator cusparse_operation_transpose
|
||||||
|
enumerator cusparse_operation_conjugate_transpose
|
||||||
|
end enum
|
||||||
|
|
||||||
|
enum, bind(c)
|
||||||
|
enumerator cusparse_direction_row
|
||||||
|
enumerator cusparse_direction_column
|
||||||
|
end enum
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FcusparseCreate() &
|
||||||
|
& bind(c,name="FcusparseCreate") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function FcusparseCreate
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FcusparseDestroy() &
|
||||||
|
& bind(c,name="FcusparseDestroy") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function FcusparseDestroy
|
||||||
|
end interface
|
||||||
|
|
||||||
|
contains
|
||||||
|
|
||||||
|
function initFcusparse() result(res)
|
||||||
|
implicit none
|
||||||
|
integer(c_int) :: res
|
||||||
|
|
||||||
|
res = FcusparseCreate()
|
||||||
|
end function initFcusparse
|
||||||
|
|
||||||
|
function closeFcusparse() result(res)
|
||||||
|
implicit none
|
||||||
|
integer(c_int) :: res
|
||||||
|
res = FcusparseDestroy()
|
||||||
|
end function closeFcusparse
|
||||||
|
|
||||||
|
end module base_cusparse_mod
|
@ -0,0 +1,312 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
module c_cusparse_mod
|
||||||
|
use base_cusparse_mod
|
||||||
|
|
||||||
|
type, bind(c) :: c_Cmat
|
||||||
|
type(c_ptr) :: Mat = c_null_ptr
|
||||||
|
end type c_Cmat
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
type, bind(c) :: c_Hmat
|
||||||
|
type(c_ptr) :: Mat = c_null_ptr
|
||||||
|
end type c_Hmat
|
||||||
|
#endif
|
||||||
|
|
||||||
|
interface CSRGDeviceFree
|
||||||
|
function c_CSRGDeviceFree(Mat) &
|
||||||
|
& bind(c,name="c_CSRGDeviceFree") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGDeviceFree
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDeviceSetMatType
|
||||||
|
function c_CSRGDeviceSetMatType(Mat,type) &
|
||||||
|
& bind(c,name="c_CSRGDeviceSetMatType") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGDeviceSetMatType
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDeviceSetMatFillMode
|
||||||
|
function c_CSRGDeviceSetMatFillMode(Mat,type) &
|
||||||
|
& bind(c,name="c_CSRGDeviceSetMatFillMode") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGDeviceSetMatFillMode
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDeviceSetMatDiagType
|
||||||
|
function c_CSRGDeviceSetMatDiagType(Mat,type) &
|
||||||
|
& bind(c,name="c_CSRGDeviceSetMatDiagType") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGDeviceSetMatDiagType
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDeviceSetMatIndexBase
|
||||||
|
function c_CSRGDeviceSetMatIndexBase(Mat,type) &
|
||||||
|
& bind(c,name="c_CSRGDeviceSetMatIndexBase") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGDeviceSetMatIndexBase
|
||||||
|
end interface
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
interface CSRGDeviceCsrsmAnalysis
|
||||||
|
function c_CSRGDeviceCsrsmAnalysis(Mat) &
|
||||||
|
& bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGDeviceCsrsmAnalysis
|
||||||
|
end interface
|
||||||
|
#else
|
||||||
|
interface CSRGIsNullSvBuffer
|
||||||
|
function c_CSRGIsNullSvBuffer(Mat) &
|
||||||
|
& bind(c,name="c_CSRGIsNullSvBuffer") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGIsNullSvBuffer
|
||||||
|
end interface
|
||||||
|
#endif
|
||||||
|
|
||||||
|
interface CSRGDeviceAlloc
|
||||||
|
function c_CSRGDeviceAlloc(Mat,nr,nc,nz) &
|
||||||
|
& bind(c,name="c_CSRGDeviceAlloc") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int), value :: nr, nc, nz
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGDeviceAlloc
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDeviceGetParms
|
||||||
|
function c_CSRGDeviceGetParms(Mat,nr,nc,nz) &
|
||||||
|
& bind(c,name="c_CSRGDeviceGetParms") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int) :: nr, nc, nz
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGDeviceGetParms
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface spsvCSRGDevice
|
||||||
|
function c_spsvCSRGDevice(Mat,alpha,x,beta,y) &
|
||||||
|
& bind(c,name="c_spsvCSRGDevice") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
type(c_ptr), value :: x
|
||||||
|
type(c_ptr), value :: y
|
||||||
|
complex(c_float_complex), value :: alpha,beta
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_spsvCSRGDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface spmvCSRGDevice
|
||||||
|
function c_spmvCSRGDevice(Mat,alpha,x,beta,y) &
|
||||||
|
& bind(c,name="c_spmvCSRGDevice") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
type(c_ptr), value :: x
|
||||||
|
type(c_ptr), value :: y
|
||||||
|
complex(c_float_complex), value :: alpha,beta
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_spmvCSRGDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGHost2Device
|
||||||
|
function c_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) &
|
||||||
|
& bind(c,name="c_CSRGHost2Device") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int), value :: m,n,nz
|
||||||
|
integer(c_int) :: irp(*), ja(*)
|
||||||
|
complex(c_float_complex) :: val(*)
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGHost2Device
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDevice2Host
|
||||||
|
function c_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) &
|
||||||
|
& bind(c,name="c_CSRGDevice2Host") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Cmat
|
||||||
|
type(c_Cmat) :: Mat
|
||||||
|
integer(c_int), value :: m,n,nz
|
||||||
|
integer(c_int) :: irp(*), ja(*)
|
||||||
|
complex(c_float_complex) :: val(*)
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_CSRGDevice2Host
|
||||||
|
end interface
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <=10
|
||||||
|
interface HYBGDeviceAlloc
|
||||||
|
function c_HYBGDeviceAlloc(Mat,nr,nc,nz) &
|
||||||
|
& bind(c,name="c_HYBGDeviceAlloc") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_hmat
|
||||||
|
type(c_Hmat) :: Mat
|
||||||
|
integer(c_int), value :: nr, nc, nz
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_HYBGDeviceAlloc
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceFree
|
||||||
|
function c_HYBGDeviceFree(Mat) &
|
||||||
|
& bind(c,name="c_HYBGDeviceFree") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Hmat
|
||||||
|
type(c_Hmat) :: Mat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_HYBGDeviceFree
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceSetMatType
|
||||||
|
function c_HYBGDeviceSetMatType(Mat,type) &
|
||||||
|
& bind(c,name="c_HYBGDeviceSetMatType") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Hmat
|
||||||
|
type(c_Hmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_HYBGDeviceSetMatType
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceSetMatFillMode
|
||||||
|
function c_HYBGDeviceSetMatFillMode(Mat,type) &
|
||||||
|
& bind(c,name="c_HYBGDeviceSetMatFillMode") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Hmat
|
||||||
|
type(c_Hmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_HYBGDeviceSetMatFillMode
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceSetMatDiagType
|
||||||
|
function c_HYBGDeviceSetMatDiagType(Mat,type) &
|
||||||
|
& bind(c,name="c_HYBGDeviceSetMatDiagType") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Hmat
|
||||||
|
type(c_Hmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_HYBGDeviceSetMatDiagType
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceSetMatIndexBase
|
||||||
|
function c_HYBGDeviceSetMatIndexBase(Mat,type) &
|
||||||
|
& bind(c,name="c_HYBGDeviceSetMatIndexBase") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Hmat
|
||||||
|
type(c_Hmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_HYBGDeviceSetMatIndexBase
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceHybsmAnalysis
|
||||||
|
function c_HYBGDeviceHybsmAnalysis(Mat) &
|
||||||
|
& bind(c,name="c_HYBGDeviceHybsmAnalysis") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Hmat
|
||||||
|
type(c_Hmat) :: Mat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_HYBGDeviceHybsmAnalysis
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface spsvHYBGDevice
|
||||||
|
function c_spsvHYBGDevice(Mat,alpha,x,beta,y) &
|
||||||
|
& bind(c,name="c_spsvHYBGDevice") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Hmat
|
||||||
|
type(c_Hmat) :: Mat
|
||||||
|
type(c_ptr), value :: x
|
||||||
|
type(c_ptr), value :: y
|
||||||
|
complex(c_float_complex), value :: alpha,beta
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_spsvHYBGDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface spmvHYBGDevice
|
||||||
|
function c_spmvHYBGDevice(Mat,alpha,x,beta,y) &
|
||||||
|
& bind(c,name="c_spmvHYBGDevice") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Hmat
|
||||||
|
type(c_Hmat) :: Mat
|
||||||
|
type(c_ptr), value :: x
|
||||||
|
type(c_ptr), value :: y
|
||||||
|
complex(c_float_complex), value :: alpha,beta
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_spmvHYBGDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGHost2Device
|
||||||
|
function c_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) &
|
||||||
|
& bind(c,name="c_HYBGHost2Device") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import c_Hmat
|
||||||
|
type(c_Hmat) :: Mat
|
||||||
|
integer(c_int), value :: m,n,nz
|
||||||
|
integer(c_int) :: irp(*), ja(*)
|
||||||
|
complex(c_float_complex) :: val(*)
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function c_HYBGHost2Device
|
||||||
|
end interface
|
||||||
|
#endif
|
||||||
|
|
||||||
|
end module c_cusparse_mod
|
@ -0,0 +1,99 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <cusparse_v2.h>
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "fcusparse.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* Double precision real */
|
||||||
|
#define TYPE float complex
|
||||||
|
#define CUSPARSE_BASE_TYPE CUDA_C_32F
|
||||||
|
#define T_CSRGDeviceMat c_CSRGDeviceMat
|
||||||
|
#define T_Cmat c_Cmat
|
||||||
|
#define T_spmvCSRGDevice c_spmvCSRGDevice
|
||||||
|
#define T_spsvCSRGDevice c_spsvCSRGDevice
|
||||||
|
#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc
|
||||||
|
#define T_CSRGDeviceFree c_CSRGDeviceFree
|
||||||
|
#define T_CSRGHost2Device c_CSRGHost2Device
|
||||||
|
#define T_CSRGDevice2Host c_CSRGDevice2Host
|
||||||
|
#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode
|
||||||
|
#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType
|
||||||
|
#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
|
||||||
|
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
|
||||||
|
#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis
|
||||||
|
#define cusparseTcsrmv cusparseCcsrmv
|
||||||
|
#define cusparseTcsrsv_solve cusparseCcsrsv_solve
|
||||||
|
#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis
|
||||||
|
#define T_HYBGDeviceMat c_HYBGDeviceMat
|
||||||
|
#define T_Hmat c_Hmat
|
||||||
|
#define T_HYBGDeviceFree c_HYBGDeviceFree
|
||||||
|
#define T_spmvHYBGDevice c_spmvHYBGDevice
|
||||||
|
#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc
|
||||||
|
#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType
|
||||||
|
#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase
|
||||||
|
#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType
|
||||||
|
#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode
|
||||||
|
#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis
|
||||||
|
#define T_spsvHYBGDevice c_spsvHYBGDevice
|
||||||
|
#define T_HYBGHost2Device c_HYBGHost2Device
|
||||||
|
#define cusparseThybmv cusparseChybmv
|
||||||
|
#define cusparseThybsv_solve cusparseChybsv_solve
|
||||||
|
#define cusparseThybsv_analysis cusparseChybsv_analysis
|
||||||
|
#define cusparseTcsr2hyb cusparseCcsr2hyb
|
||||||
|
|
||||||
|
#elif CUDA_VERSION < 11030
|
||||||
|
|
||||||
|
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
|
||||||
|
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
|
||||||
|
#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis
|
||||||
|
#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize
|
||||||
|
#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis
|
||||||
|
#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer
|
||||||
|
#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr
|
||||||
|
#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr
|
||||||
|
#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "fcusparse_fct.h"
|
||||||
|
|
@ -0,0 +1,47 @@
|
|||||||
|
/* Parallel Sparse BLAS SPGPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _CINTRF_H_
|
||||||
|
#define _CINTRF_H_
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "core.h"
|
||||||
|
#include "cuda_util.h"
|
||||||
|
#include "vector.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
|
||||||
|
#define ELL_PITCH_ALIGN_S 32
|
||||||
|
#define ELL_PITCH_ALIGN_D 16
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,53 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
module core_mod
|
||||||
|
use iso_c_binding
|
||||||
|
|
||||||
|
integer(c_int), parameter :: spgpu_type_int = 0
|
||||||
|
integer(c_int), parameter :: spgpu_type_float = 1
|
||||||
|
integer(c_int), parameter :: spgpu_type_double = 2
|
||||||
|
integer(c_int), parameter :: spgpu_type_complex_float = 3
|
||||||
|
integer(c_int), parameter :: spgpu_type_complex_double = 4
|
||||||
|
integer(c_int), parameter :: spgpu_success = 0
|
||||||
|
integer(c_int), parameter :: spgpu_unsupported = 1
|
||||||
|
integer(c_int), parameter :: spgpu_unspecified = 2
|
||||||
|
integer(c_int), parameter :: spgpu_outofmem = 3
|
||||||
|
|
||||||
|
interface
|
||||||
|
subroutine psb_cudaSync() &
|
||||||
|
& bind(c,name='cudaSync')
|
||||||
|
use iso_c_binding
|
||||||
|
end subroutine psb_cudaSync
|
||||||
|
end interface
|
||||||
|
|
||||||
|
end module core_mod
|
@ -0,0 +1,799 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include "cuda_util.h"
|
||||||
|
|
||||||
|
|
||||||
|
static int hasUVA=-1;
|
||||||
|
static struct cudaDeviceProp *prop=NULL;
|
||||||
|
static spgpuHandle_t psb_cuda_handle = NULL;
|
||||||
|
static cublasHandle_t psb_cublas_handle = NULL;
|
||||||
|
|
||||||
|
|
||||||
|
int allocRemoteBuffer(void** buffer, int count)
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaMalloc(buffer, count);
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
{
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fprintf(stderr,"CUDA allocRemoteBuffer for %d bytes Error: %s \n",
|
||||||
|
count, cudaGetErrorString(err));
|
||||||
|
if(err == cudaErrorMemoryAllocation)
|
||||||
|
return SPGPU_OUTOFMEMORY;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int hostRegisterMapped(void *pointer, long size)
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaHostRegister(pointer, size, cudaHostRegisterMapped);
|
||||||
|
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
{
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fprintf(stderr,"CUDA hostRegisterMapped Error: %s\n", cudaGetErrorString(err));
|
||||||
|
if(err == cudaErrorMemoryAllocation)
|
||||||
|
return SPGPU_OUTOFMEMORY;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int getDevicePointer(void **d_p, void * h_p)
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaHostGetDevicePointer(d_p,h_p,0);
|
||||||
|
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
{
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fprintf(stderr,"CUDA getDevicePointer Error: %s\n", cudaGetErrorString(err));
|
||||||
|
if(err == cudaErrorMemoryAllocation)
|
||||||
|
return SPGPU_OUTOFMEMORY;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int registerMappedMemory(void *buffer, void **dp, int size)
|
||||||
|
{
|
||||||
|
//cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
|
||||||
|
cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterMapped);
|
||||||
|
if (err == cudaSuccess) err = cudaHostGetDevicePointer(dp,buffer,0);
|
||||||
|
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
{
|
||||||
|
err = cudaHostGetDevicePointer(dp,buffer,0);
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
{
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err));
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err));
|
||||||
|
if(err == cudaErrorMemoryAllocation)
|
||||||
|
return SPGPU_OUTOFMEMORY;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int allocMappedMemory(void **buffer, void **dp, int size)
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
|
||||||
|
if (err == 0) err = cudaHostGetDevicePointer(dp,*buffer,0);
|
||||||
|
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
{
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fprintf(stderr,"CUDA allocMappedMemory Error: %s\n", cudaGetErrorString(err));
|
||||||
|
if(err == cudaErrorMemoryAllocation)
|
||||||
|
return SPGPU_OUTOFMEMORY;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int unregisterMappedMemory(void *buffer)
|
||||||
|
{
|
||||||
|
//cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
|
||||||
|
cudaError_t err = cudaHostUnregister(buffer);
|
||||||
|
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
{
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fprintf(stderr,"CUDA unregisterMappedMemory Error: %s\n", cudaGetErrorString(err));
|
||||||
|
if(err == cudaErrorMemoryAllocation)
|
||||||
|
return SPGPU_OUTOFMEMORY;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeRemoteBuffer(void* hostSrc, void* buffer, int count)
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaMemcpy(buffer, hostSrc, count, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else {
|
||||||
|
fprintf(stderr,"CUDA Error writeRemoteBuffer: %s %p %p %d\n",
|
||||||
|
cudaGetErrorString(err),buffer, hostSrc, count);
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int readRemoteBuffer(void* hostDest, void* buffer, int count)
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
cudaError_t err1;
|
||||||
|
cudaError_t err;
|
||||||
|
#if 0
|
||||||
|
{
|
||||||
|
err1 =cudaGetLastError();
|
||||||
|
fprintf(stderr,"CUDA Error prior to readRemoteBuffer: %s %d\n",
|
||||||
|
cudaGetErrorString(err1),err1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
err = cudaMemcpy(hostDest, buffer, count, cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else {
|
||||||
|
fprintf(stderr,"CUDA Error readRemoteBuffer: %s %p %p %d %d\n",
|
||||||
|
cudaGetErrorString(err),hostDest,buffer,count,err);
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int freeRemoteBuffer(void* buffer)
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaFree(buffer);
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else {
|
||||||
|
fprintf(stderr,"CUDA Error freeRemoteBuffer: %s %p\n", cudaGetErrorString(err),buffer);
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int gpuInit(int dev)
|
||||||
|
{
|
||||||
|
|
||||||
|
int count,err;
|
||||||
|
|
||||||
|
if ((err=cudaSetDeviceFlags(cudaDeviceMapHost))!=cudaSuccess)
|
||||||
|
fprintf(stderr,"Error On SetDeviceFlags: %d '%s'\n",err,cudaGetErrorString(err));
|
||||||
|
if ((prop=(struct cudaDeviceProp *) malloc(sizeof(struct cudaDeviceProp)))==NULL) {
|
||||||
|
fprintf(stderr,"CUDA Error gpuInit3: not malloced prop\n");
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
err = setDevice(dev);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err));
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
if (!psb_cublas_handle)
|
||||||
|
psb_cudaCreateCublasHandle();
|
||||||
|
hasUVA=getDeviceHasUVA();
|
||||||
|
FcusparseCreate();
|
||||||
|
return err;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpuClose()
|
||||||
|
{
|
||||||
|
cudaStream_t st1, st2;
|
||||||
|
if (! psb_cuda_handle)
|
||||||
|
st1=spgpuGetStream(psb_cuda_handle);
|
||||||
|
if (! psb_cublas_handle)
|
||||||
|
cublasGetStream(psb_cublas_handle,&st2);
|
||||||
|
FcusparseDestroy();
|
||||||
|
psb_cudaDestroyHandle();
|
||||||
|
if (st1 != st2)
|
||||||
|
psb_cudaDestroyCublasHandle();
|
||||||
|
free(prop);
|
||||||
|
prop=NULL;
|
||||||
|
hasUVA=-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int setDevice(int dev)
|
||||||
|
{
|
||||||
|
int count,err,idev;
|
||||||
|
|
||||||
|
err = cudaGetDeviceCount(&count);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr,"CUDA Error setDevice: %s\n", cudaGetErrorString(err));
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((0<=dev)&&(dev<count))
|
||||||
|
idev = dev;
|
||||||
|
else
|
||||||
|
idev = 0;
|
||||||
|
err = cudaSetDevice(idev);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err));
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
err = cudaGetDeviceProperties(prop,idev);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr,"CUDA Error gpuInit4: %s\n", cudaGetErrorString(err));
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getDevice()
|
||||||
|
{ int count;
|
||||||
|
|
||||||
|
cudaGetDevice(&count);
|
||||||
|
return(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
int getDeviceHasUVA()
|
||||||
|
{ int count=0;
|
||||||
|
if (prop!=NULL)
|
||||||
|
count = prop->unifiedAddressing;
|
||||||
|
return(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
int getGPUMultiProcessors()
|
||||||
|
{ int count=0;
|
||||||
|
if (prop!=NULL)
|
||||||
|
count = prop->multiProcessorCount;
|
||||||
|
return(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int getGPUMemoryBusWidth()
|
||||||
|
{ int count=0;
|
||||||
|
#if CUDART_VERSION >= 5000
|
||||||
|
if (prop!=NULL)
|
||||||
|
count = prop->memoryBusWidth;
|
||||||
|
#endif
|
||||||
|
return(count);
|
||||||
|
}
|
||||||
|
int getGPUMemoryClockRate()
|
||||||
|
{ int count=0;
|
||||||
|
#if CUDART_VERSION >= 5000
|
||||||
|
if (prop!=NULL)
|
||||||
|
count = prop->memoryClockRate;
|
||||||
|
#endif
|
||||||
|
return(count);
|
||||||
|
}
|
||||||
|
int getGPUWarpSize()
|
||||||
|
{ int count=0;
|
||||||
|
if (prop!=NULL)
|
||||||
|
count = prop->warpSize;
|
||||||
|
return(count);
|
||||||
|
}
|
||||||
|
int getGPUMaxThreadsPerBlock()
|
||||||
|
{ int count=0;
|
||||||
|
if (prop!=NULL)
|
||||||
|
count = prop->maxThreadsPerBlock;
|
||||||
|
return(count);
|
||||||
|
}
|
||||||
|
int getGPUMaxThreadsPerMP()
|
||||||
|
{ int count=0;
|
||||||
|
if (prop!=NULL)
|
||||||
|
count = prop->maxThreadsPerMultiProcessor;
|
||||||
|
return(count);
|
||||||
|
}
|
||||||
|
int getGPUMaxRegistersPerBlock()
|
||||||
|
{ int count=0;
|
||||||
|
if (prop!=NULL)
|
||||||
|
count = prop->regsPerBlock;
|
||||||
|
return(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cpyGPUNameString(char *cstring)
|
||||||
|
{
|
||||||
|
*cstring='\0';
|
||||||
|
if (prop!=NULL)
|
||||||
|
strcpy(cstring,prop->name);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int DeviceHasUVA()
|
||||||
|
{
|
||||||
|
return(hasUVA == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int getDeviceCount()
|
||||||
|
{ int count;
|
||||||
|
cudaError_t err;
|
||||||
|
err = cudaGetDeviceCount(&count);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr,"CUDA Error getDeviceCount: %s\n", cudaGetErrorString(err));
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
return(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cudaSync()
|
||||||
|
{
|
||||||
|
cudaError_t err;
|
||||||
|
err = cudaDeviceSynchronize();
|
||||||
|
if (err == cudaSuccess)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else {
|
||||||
|
fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err));
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cudaReset()
|
||||||
|
{
|
||||||
|
cudaError_t err;
|
||||||
|
err = cudaDeviceReset();
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err));
|
||||||
|
return SPGPU_UNSPECIFIED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
spgpuHandle_t psb_cudaGetHandle()
|
||||||
|
{
|
||||||
|
return psb_cuda_handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
void psb_cudaCreateHandle()
|
||||||
|
{
|
||||||
|
if (!psb_cuda_handle)
|
||||||
|
spgpuCreate(&psb_cuda_handle, getDevice());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void psb_cudaDestroyHandle()
|
||||||
|
{
|
||||||
|
if (!psb_cuda_handle)
|
||||||
|
spgpuDestroy(psb_cuda_handle);
|
||||||
|
psb_cuda_handle = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaStream_t psb_cudaGetStream()
|
||||||
|
{
|
||||||
|
return spgpuGetStream(psb_cuda_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
void psb_cudaSetStream(cudaStream_t stream)
|
||||||
|
{
|
||||||
|
spgpuSetStream(psb_cuda_handle, stream);
|
||||||
|
return ;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cublasHandle_t psb_cudaGetCublasHandle()
|
||||||
|
{
|
||||||
|
if (!psb_cublas_handle)
|
||||||
|
psb_cudaCreateCublasHandle();
|
||||||
|
return psb_cublas_handle;
|
||||||
|
}
|
||||||
|
void psb_cudaCreateCublasHandle()
|
||||||
|
{ if (!psb_cublas_handle)
|
||||||
|
cublasCreate(&psb_cublas_handle);
|
||||||
|
}
|
||||||
|
void psb_cudaDestroyCublasHandle()
|
||||||
|
{
|
||||||
|
if (!psb_cublas_handle)
|
||||||
|
cublasDestroy(psb_cublas_handle);
|
||||||
|
psb_cublas_handle=NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Simple memory tools */
|
||||||
|
|
||||||
|
int allocateInt(void **d_int, int n)
|
||||||
|
{
|
||||||
|
return allocRemoteBuffer((void **)(d_int), n*sizeof(int));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeInt(void *d_int, int* h_int, int n)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
int *di;
|
||||||
|
i = writeRemoteBuffer((void*)h_int, (void*)d_int, n*sizeof(int));
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readInt(void* d_int, int* h_int, int n)
|
||||||
|
{ int i;
|
||||||
|
i = readRemoteBuffer((void *) h_int, (void *) d_int, n*sizeof(int));
|
||||||
|
//cudaSync();
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeIntFirst(int first, void *d_int, int* h_int, int n, int IndexBase)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
int *di=(int *) d_int;
|
||||||
|
di = &(di[first-IndexBase]);
|
||||||
|
i = writeRemoteBuffer((void*)h_int, (void*)di, n*sizeof(int));
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readIntFirst(int first,void* d_int, int* h_int, int n, int IndexBase)
|
||||||
|
{ int i;
|
||||||
|
int *di=(int *) d_int;
|
||||||
|
di = &(di[first-IndexBase]);
|
||||||
|
i = readRemoteBuffer((void *) h_int, (void *) di, n*sizeof(int));
|
||||||
|
//cudaSync();
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int allocateMultiInt(void **d_int, int m, int n)
|
||||||
|
{
|
||||||
|
return allocRemoteBuffer((void **)(d_int), m*n*sizeof(int));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeMultiInt(void *d_int, int* h_int, int m, int n)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
int *di;
|
||||||
|
i = writeRemoteBuffer((void*)h_int, (void*)d_int, m*n*sizeof(int));
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readMultiInt(void* d_int, int* h_int, int m, int n)
|
||||||
|
{ int i;
|
||||||
|
i = readRemoteBuffer((void *) h_int, (void *) d_int, m*n*sizeof(int));
|
||||||
|
//cudaSync();
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeInt(void *d_int)
|
||||||
|
{
|
||||||
|
//printf("Before freeInt\n");
|
||||||
|
freeRemoteBuffer(d_int);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int allocateFloat(void **d_float, int n)
|
||||||
|
{
|
||||||
|
return allocRemoteBuffer((void **)(d_float), n*sizeof(float));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeFloat(void *d_float, float* h_float, int n)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
i = writeRemoteBuffer((void*)h_float, (void*)d_float, n*sizeof(float));
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readFloat(void* d_float, float* h_float, int n)
|
||||||
|
{ int i;
|
||||||
|
i = readRemoteBuffer((void *) h_float, (void *) d_float, n*sizeof(float));
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeFloatFirst(int df, void *d_float, float* h_float, int n, int IndexBase)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
float *dv=(float *) d_float;
|
||||||
|
dv = &dv[df-IndexBase];
|
||||||
|
i = writeRemoteBuffer((void*)h_float, (void*)dv, n*sizeof(float));
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readFloatFirst(int df, void* d_float, float* h_float, int n, int IndexBase)
|
||||||
|
{ int i;
|
||||||
|
float *dv=(float *) d_float;
|
||||||
|
dv = &dv[df-IndexBase];
|
||||||
|
//fprintf(stderr,"readFloatFirst: %d %p %p %p %d \n",df,d_float,dv,h_float,n);
|
||||||
|
i = readRemoteBuffer((void *) h_float, (void *) dv, n*sizeof(float));
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int allocateMultiFloat(void **d_float, int m, int n)
|
||||||
|
{
|
||||||
|
return allocRemoteBuffer((void **)(d_float), m*n*sizeof(float));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeMultiFloat(void *d_float, float* h_float, int m, int n)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
i = writeRemoteBuffer((void*)h_float, (void*)d_float, m*n*sizeof(float));
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readMultiFloat(void* d_float, float* h_float, int m, int n)
|
||||||
|
{ int i;
|
||||||
|
i = readRemoteBuffer((void *) h_float, (void *) d_float, m*n*sizeof(float));
|
||||||
|
//cudaSync();
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeFloat(void *d_float)
|
||||||
|
{
|
||||||
|
freeRemoteBuffer(d_float);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int allocateDouble(void **d_double, int n)
|
||||||
|
{
|
||||||
|
return allocRemoteBuffer((void **)(d_double), n*sizeof(double));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeDouble(void *d_double, double* h_double, int n)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
i = writeRemoteBuffer((void*)h_double, (void*)d_double, n*sizeof(double));
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readDouble(void* d_double, double* h_double, int n)
|
||||||
|
{ int i;
|
||||||
|
i = readRemoteBuffer((void *) h_double, (void *) d_double, n*sizeof(double));
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeDoubleFirst(int df, void *d_double, double* h_double, int n, int IndexBase)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
double *dv=(double *) d_double;
|
||||||
|
dv = &dv[df-IndexBase];
|
||||||
|
i = writeRemoteBuffer((void*)h_double, (void*)dv, n*sizeof(double));
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readDoubleFirst(int df, void* d_double, double* h_double, int n, int IndexBase)
|
||||||
|
{ int i;
|
||||||
|
double *dv=(double *) d_double;
|
||||||
|
dv = &dv[df-IndexBase];
|
||||||
|
//fprintf(stderr,"readDoubleFirst: %d %p %p %p %d \n",df,d_double,dv,h_double,n);
|
||||||
|
i = readRemoteBuffer((void *) h_double, (void *) dv, n*sizeof(double));
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int allocateMultiDouble(void **d_double, int m, int n)
|
||||||
|
{
|
||||||
|
return allocRemoteBuffer((void **)(d_double), m*n*sizeof(double));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeMultiDouble(void *d_double, double* h_double, int m, int n)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
i = writeRemoteBuffer((void*)h_double, (void*)d_double, m*n*sizeof(double));
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readMultiDouble(void* d_double, double* h_double, int m, int n)
|
||||||
|
{ int i;
|
||||||
|
i = readRemoteBuffer((void *) h_double, (void *) d_double, m*n*sizeof(double));
|
||||||
|
//cudaSync();
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeDouble(void *d_double)
|
||||||
|
{
|
||||||
|
freeRemoteBuffer(d_double);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int allocateFloatComplex(void **d_FloatComplex, int n)
|
||||||
|
{
|
||||||
|
return allocRemoteBuffer((void **)(d_FloatComplex), n*sizeof(cuFloatComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int n)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, n*sizeof(cuFloatComplex));
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int n)
|
||||||
|
{ int i;
|
||||||
|
i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, n*sizeof(cuFloatComplex));
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int allocateMultiFloatComplex(void **d_FloatComplex, int m, int n)
|
||||||
|
{
|
||||||
|
return allocRemoteBuffer((void **)(d_FloatComplex), m*n*sizeof(cuFloatComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeMultiFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, m*n*sizeof(cuFloatComplex));
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readMultiFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n)
|
||||||
|
{ int i;
|
||||||
|
i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, m*n*sizeof(cuFloatComplex));
|
||||||
|
//cudaSync();
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeFloatComplexFirst(int df, void *d_floatComplex,
|
||||||
|
cuFloatComplex* h_floatComplex, int n, int IndexBase)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex;
|
||||||
|
dv = &dv[df-IndexBase];
|
||||||
|
i = writeRemoteBuffer((void*)h_floatComplex, (void*)dv, n*sizeof(cuFloatComplex));
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readFloatComplexFirst(int df, void* d_floatComplex, cuFloatComplex* h_floatComplex,
|
||||||
|
int n, int IndexBase)
|
||||||
|
{ int i;
|
||||||
|
cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex;
|
||||||
|
dv = &dv[df-IndexBase];
|
||||||
|
i = readRemoteBuffer((void *) h_floatComplex, (void *) dv, n*sizeof(cuFloatComplex));
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeFloatComplex(void *d_FloatComplex)
|
||||||
|
{
|
||||||
|
freeRemoteBuffer(d_FloatComplex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int allocateDoubleComplex(void **d_DoubleComplex, int n)
|
||||||
|
{
|
||||||
|
return allocRemoteBuffer((void **)(d_DoubleComplex), n*sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, n*sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n)
|
||||||
|
{ int i;
|
||||||
|
i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, n*sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeDoubleComplexFirst(int df, void *d_doubleComplex,
|
||||||
|
cuDoubleComplex* h_doubleComplex, int n, int IndexBase)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex;
|
||||||
|
dv = &dv[df-IndexBase];
|
||||||
|
i = writeRemoteBuffer((void*)h_doubleComplex, (void*)dv, n*sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readDoubleComplexFirst(int df, void* d_doubleComplex, cuDoubleComplex* h_doubleComplex,
|
||||||
|
int n, int IndexBase)
|
||||||
|
{ int i;
|
||||||
|
cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex;
|
||||||
|
dv = &dv[df-IndexBase];
|
||||||
|
i = readRemoteBuffer((void *) h_doubleComplex, (void *) dv, n*sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int allocateMultiDoubleComplex(void **d_DoubleComplex, int m, int n)
|
||||||
|
{
|
||||||
|
return allocRemoteBuffer((void **)(d_DoubleComplex), m*n*sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeMultiDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, m*n*sizeof(cuDoubleComplex));
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readMultiDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n)
|
||||||
|
{ int i;
|
||||||
|
i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, m*n*sizeof(cuDoubleComplex));
|
||||||
|
//cudaSync();
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeDoubleComplex(void *d_DoubleComplex)
|
||||||
|
{
|
||||||
|
freeRemoteBuffer(d_DoubleComplex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double etime()
|
||||||
|
{
|
||||||
|
struct timeval tt;
|
||||||
|
struct timezone tz;
|
||||||
|
double temp;
|
||||||
|
if (gettimeofday(&tt,&tz) != 0) {
|
||||||
|
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
|
||||||
|
return(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,137 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _CUDA_UTIL_H_
|
||||||
|
#define _CUDA_UTIL_H_
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "cuda_runtime.h"
|
||||||
|
#include "core.h"
|
||||||
|
#include "cuComplex.h"
|
||||||
|
#include "fcusparse.h"
|
||||||
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
|
int allocRemoteBuffer(void** buffer, int count);
|
||||||
|
int allocMappedMemory(void **buffer, void **dp, int size);
|
||||||
|
int registerMappedMemory(void *buffer, void **dp, int size);
|
||||||
|
int unregisterMappedMemory(void *buffer);
|
||||||
|
int writeRemoteBuffer(void* hostSrc, void* buffer, int count);
|
||||||
|
int readRemoteBuffer(void* hostDest, void* buffer, int count);
|
||||||
|
int freeRemoteBuffer(void* buffer);
|
||||||
|
int gpuInit(int dev);
|
||||||
|
int getDeviceCount();
|
||||||
|
int getDevice();
|
||||||
|
int setDevice(int dev);
|
||||||
|
int getGPUMultiProcessors();
|
||||||
|
int getGPUMemoryBusWidth();
|
||||||
|
int getGPUMemoryClockRate();
|
||||||
|
int getGPUWarpSize();
|
||||||
|
int getGPUMaxThreadsPerBlock();
|
||||||
|
int getGPUMaxThreadsPerMP();
|
||||||
|
int getGPUMaxRegistersPerBlock();
|
||||||
|
void cpyGPUNameString(char *cstring);
|
||||||
|
|
||||||
|
|
||||||
|
void cudaSync();
|
||||||
|
void cudaReset();
|
||||||
|
void gpuClose();
|
||||||
|
|
||||||
|
|
||||||
|
spgpuHandle_t psb_cudaGetHandle();
|
||||||
|
void psb_cudaCreateHandle();
|
||||||
|
void psb_cudaDestroyHandle();
|
||||||
|
cudaStream_t psb_cudaGetStream();
|
||||||
|
void psb_cudaSetStream(cudaStream_t stream);
|
||||||
|
|
||||||
|
cublasHandle_t psb_cudaGetCublasHandle();
|
||||||
|
void psb_cudaCreateCublasHandle();
|
||||||
|
void psb_cudaDestroyCublasHandle();
|
||||||
|
|
||||||
|
|
||||||
|
int allocateInt(void **, int);
|
||||||
|
int allocateMultiInt(void **, int, int);
|
||||||
|
int writeInt(void *, int *, int);
|
||||||
|
int writeMultiInt(void *, int* , int , int );
|
||||||
|
int readInt(void *, int *, int);
|
||||||
|
int readMultiInt(void*, int*, int, int );
|
||||||
|
int writeIntFirst(int,void *, int *, int,int);
|
||||||
|
int readIntFirst(int,void *, int *, int,int);
|
||||||
|
void freeInt(void *);
|
||||||
|
|
||||||
|
int allocateFloat(void **, int);
|
||||||
|
int allocateMultiFloat(void **, int, int);
|
||||||
|
int writeFloat(void *, float *, int);
|
||||||
|
int writeMultiFloat(void *, float* , int , int );
|
||||||
|
int readFloat(void *, float*, int);
|
||||||
|
int readMultiFloat(void*, float*, int, int );
|
||||||
|
int writeFloatFirst(int, void *, float*, int, int);
|
||||||
|
int readFloatFirst(int, void *, float*, int, int);
|
||||||
|
void freeFloat(void *);
|
||||||
|
|
||||||
|
int allocateDouble(void **, int);
|
||||||
|
int allocateMultiDouble(void **, int, int);
|
||||||
|
int writeDouble(void *, double*, int);
|
||||||
|
int writeMultiDouble(void *, double* , int , int );
|
||||||
|
int readDouble(void *, double*, int);
|
||||||
|
int readMultiDouble(void*, double*, int, int );
|
||||||
|
int writeDoubleFirst(int, void *, double*, int, int);
|
||||||
|
int readDoubleFirst(int, void *, double*, int, int);
|
||||||
|
void freeDouble(void *);
|
||||||
|
|
||||||
|
int allocateFloatComplex(void **, int);
|
||||||
|
int allocateMultiFloatComplex(void **, int, int);
|
||||||
|
int writeFloatComplex(void *, cuFloatComplex*, int);
|
||||||
|
int writeMultiFloatComplex(void *, cuFloatComplex* , int , int );
|
||||||
|
int readFloatComplex(void *, cuFloatComplex*, int);
|
||||||
|
int readMultiFloatComplex(void*, cuFloatComplex*, int, int );
|
||||||
|
int writeFloatComplexFirst(int, void *, cuFloatComplex*, int, int);
|
||||||
|
int readFloatComplexFirst(int, void *, cuFloatComplex*, int, int);
|
||||||
|
void freeFloatComplex(void *);
|
||||||
|
|
||||||
|
int allocateDoubleComplex(void **, int);
|
||||||
|
int allocateMultiDoubleComplex(void **, int, int);
|
||||||
|
int writeDoubleComplex(void *, cuDoubleComplex*, int);
|
||||||
|
int writeMultiDoubleComplex(void *, cuDoubleComplex* , int , int );
|
||||||
|
int readDoubleComplex(void *, cuDoubleComplex*, int);
|
||||||
|
int readMultiDoubleComplex(void*, cuDoubleComplex*, int, int );
|
||||||
|
int writeDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int);
|
||||||
|
int readDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int);
|
||||||
|
void freeDoubleComplex(void *);
|
||||||
|
|
||||||
|
double etime();
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,38 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
module cusparse_mod
|
||||||
|
use base_cusparse_mod
|
||||||
|
use s_cusparse_mod
|
||||||
|
use d_cusparse_mod
|
||||||
|
use c_cusparse_mod
|
||||||
|
use z_cusparse_mod
|
||||||
|
end module cusparse_mod
|
@ -0,0 +1,323 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <complex.h>
|
||||||
|
//#include "utils.h"
|
||||||
|
//#include "common.h"
|
||||||
|
#include "cvectordev.h"
|
||||||
|
|
||||||
|
|
||||||
|
int registerMappedFloatComplex(void *buff, void **d_p, int n, cuFloatComplex dummy)
|
||||||
|
{
|
||||||
|
return registerMappedMemory(buff,d_p,n*sizeof(cuFloatComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec)
|
||||||
|
{ int i;
|
||||||
|
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||||
|
// Ex updateFromHost vector function
|
||||||
|
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_,
|
||||||
|
devVec->pitch_*devVec->count_*sizeof(cuFloatComplex));
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld)
|
||||||
|
{ int i;
|
||||||
|
i = writeMultiVecDeviceFloatComplex(deviceVec, (void *) hostVec);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatComplexR2",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int readMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec)
|
||||||
|
{ int i,j;
|
||||||
|
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||||
|
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
|
||||||
|
devVec->pitch_*devVec->count_*sizeof(cuFloatComplex));
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int readMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld)
|
||||||
|
{ int i;
|
||||||
|
i = readMultiVecDeviceFloatComplex(deviceVec, hostVec);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatComplexR2",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last,
|
||||||
|
int indexBase, void* devMultiVecX)
|
||||||
|
{ int i=0;
|
||||||
|
int pitch = 0;
|
||||||
|
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiVecVal,
|
||||||
|
int dupl, int indexBase, void* devMultiVecX)
|
||||||
|
{ int j=0, i=0,nmin=0,nmax=0;
|
||||||
|
int pitch = 0;
|
||||||
|
cuFloatComplex beta;
|
||||||
|
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||||
|
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
|
||||||
|
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
pitch = devVecIrl->pitch_;
|
||||||
|
if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
|
||||||
|
//fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_);
|
||||||
|
|
||||||
|
if (dupl == INS_OVERWRITE)
|
||||||
|
beta = make_cuFloatComplex(0.0, 0.0);
|
||||||
|
else if (dupl == INS_ADD)
|
||||||
|
beta = make_cuFloatComplex(1.0, 0.0);
|
||||||
|
else
|
||||||
|
beta = make_cuFloatComplex(0.0, 0.0);
|
||||||
|
|
||||||
|
spgpuCscat(handle, (cuFloatComplex *) devVecX->v_, n, (cuFloatComplex*)devVecVal->v_,
|
||||||
|
(int*)devVecIrl->v_, indexBase, beta);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n,
|
||||||
|
int first, void* deviceIdx, int hfirst,
|
||||||
|
void* host_values, int indexBase)
|
||||||
|
{
|
||||||
|
int i, *idx;
|
||||||
|
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
|
||||||
|
|
||||||
|
i= igathMultiVecDeviceFloatComplex(deviceVec, vectorId, n,
|
||||||
|
first, (void*) devIdx->v_, hfirst, host_values, indexBase);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
|
||||||
|
int first, void* indexes, int hfirst,
|
||||||
|
void* host_values, int indexBase)
|
||||||
|
{
|
||||||
|
int i, *idx =(int *) indexes;;
|
||||||
|
cuFloatComplex *hv = (cuFloatComplex *) host_values;;
|
||||||
|
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
i=0;
|
||||||
|
hv = &(hv[hfirst-indexBase]);
|
||||||
|
idx = &(idx[first-indexBase]);
|
||||||
|
spgpuCgath(handle,hv, n, idx,indexBase,
|
||||||
|
(cuFloatComplex *) devVec->v_+vectorId*devVec->pitch_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n,
|
||||||
|
int first, void *deviceIdx,
|
||||||
|
int hfirst, void* host_values,
|
||||||
|
int indexBase, cuFloatComplex beta)
|
||||||
|
{
|
||||||
|
int i, *idx;
|
||||||
|
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
|
||||||
|
i= iscatMultiVecDeviceFloatComplex(deviceVec, vectorId, n, first,
|
||||||
|
(void*) devIdx->v_, hfirst,host_values,
|
||||||
|
indexBase, beta);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
|
||||||
|
int first, void *indexes,
|
||||||
|
int hfirst, void* host_values,
|
||||||
|
int indexBase, cuFloatComplex beta)
|
||||||
|
{ int i=0;
|
||||||
|
cuFloatComplex *hv = (cuFloatComplex *) host_values;
|
||||||
|
int *idx=(int *) indexes;
|
||||||
|
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
idx = &(idx[first-indexBase]);
|
||||||
|
hv = &(hv[hfirst-indexBase]);
|
||||||
|
spgpuCscat(handle, (cuFloatComplex *) devVec->v_, n, hv, idx, indexBase, beta);
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
|
||||||
|
{ int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||||
|
|
||||||
|
spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
|
||||||
|
devVecA->count_, devVecA->pitch_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
|
||||||
|
{ int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||||
|
|
||||||
|
spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
|
||||||
|
devVecA->count_, devVecA->pitch_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
|
||||||
|
{ int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||||
|
|
||||||
|
spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
|
||||||
|
devVecA->count_, devVecA->pitch_);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA)
|
||||||
|
{ int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||||
|
// Note: inner kernel can handle aliased input/output
|
||||||
|
spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_,
|
||||||
|
alpha, (cuFloatComplex *)devVecA->v_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n,
|
||||||
|
void* devMultiVecA, void* devMultiVecB)
|
||||||
|
{int i=0;
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||||
|
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_,
|
||||||
|
(cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVecX,
|
||||||
|
cuFloatComplex beta, void* devMultiVecY)
|
||||||
|
{ int j=0, i=0;
|
||||||
|
int pitch = 0;
|
||||||
|
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||||
|
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
pitch = devVecY->pitch_;
|
||||||
|
if ((n > devVecY->size_) || (n>devVecX->size_ ))
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
|
||||||
|
for(j=0;j<devVecY->count_;j++)
|
||||||
|
spgpuCaxpby(handle,(cuFloatComplex*)devVecY->v_+pitch*j, n, beta,
|
||||||
|
(cuFloatComplex*)devVecY->v_+pitch*j, alpha,
|
||||||
|
(cuFloatComplex*) devVecX->v_+pitch*j);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha,
|
||||||
|
void *deviceVecA, void *deviceVecB)
|
||||||
|
{ int i = 0;
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||||
|
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
if ((n > devVecA->size_) || (n>devVecB->size_ ))
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
|
||||||
|
spgpuCmaxy(handle, (cuFloatComplex*)devVecB->v_, n, alpha,
|
||||||
|
(cuFloatComplex*)devVecA->v_,
|
||||||
|
(cuFloatComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA,
|
||||||
|
void *deviceVecB, cuFloatComplex beta,
|
||||||
|
void *deviceVecZ)
|
||||||
|
{ int i=0;
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||||
|
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||||
|
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ ))
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
spgpuCmaxypbz(handle, (cuFloatComplex*)devVecZ->v_, n, beta,
|
||||||
|
(cuFloatComplex*)devVecZ->v_,
|
||||||
|
alpha, (cuFloatComplex*) devVecA->v_, (cuFloatComplex*) devVecB->v_,
|
||||||
|
devVecB->count_, devVecB->pitch_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA,
|
||||||
|
void *deviceVecB)
|
||||||
|
{ int i=0;
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||||
|
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||||
|
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
if ((n > devVecA->size_) || (n>devVecB->size_ ))
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
|
||||||
|
spgpuCabs(handle, (cuFloatComplex*)devVecB->v_, n,
|
||||||
|
alpha, (cuFloatComplex*)devVecA->v_);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA)
|
||||||
|
{ int i = 0;
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
if (n > devVecA->size_)
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
|
||||||
|
spgpuCabs(handle, (cuFloatComplex*)devVecA->v_, n,
|
||||||
|
alpha, (cuFloatComplex*)devVecA->v_);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,78 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
//#include "utils.h"
|
||||||
|
#include <complex.h>
|
||||||
|
#include "cuComplex.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
#include "cuda_runtime.h"
|
||||||
|
#include "core.h"
|
||||||
|
|
||||||
|
int registerMappedFloatComplex(void *, void **, int, cuFloatComplex);
|
||||||
|
int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
|
||||||
|
int writeMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld);
|
||||||
|
int readMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
|
||||||
|
int readMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld);
|
||||||
|
|
||||||
|
int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last,
|
||||||
|
int indexBase, void* devVecX);
|
||||||
|
|
||||||
|
int geinsMultiVecDeviceFloatComplex(int n, void* devVecIrl, void* devVecVal,
|
||||||
|
int dupl, int indexBase, void* devVecX);
|
||||||
|
|
||||||
|
int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n,
|
||||||
|
int first, void* deviceIdx, int hfirst,
|
||||||
|
void* host_values, int indexBase);
|
||||||
|
int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
|
||||||
|
int first, void* indexes, int hfirst, void* host_values,
|
||||||
|
int indexBase);
|
||||||
|
int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, int first,
|
||||||
|
void *deviceIdx, int hfirst, void* host_values,
|
||||||
|
int indexBase, cuFloatComplex beta);
|
||||||
|
int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int first, void *indexes,
|
||||||
|
int hfirst, void* host_values, int indexBase, cuFloatComplex beta);
|
||||||
|
|
||||||
|
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA);
|
||||||
|
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
|
||||||
|
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
|
||||||
|
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
|
||||||
|
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
|
||||||
|
|
||||||
|
int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);
|
||||||
|
int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB);
|
||||||
|
int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA,
|
||||||
|
void *deviceVecB, cuFloatComplex beta, void *deviceVecZ);
|
||||||
|
int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA);
|
||||||
|
int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha,
|
||||||
|
void *deviceVecA, void *deviceVecB);
|
||||||
|
|
@ -0,0 +1,313 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
module d_cusparse_mod
|
||||||
|
use base_cusparse_mod
|
||||||
|
|
||||||
|
type, bind(c) :: d_Cmat
|
||||||
|
type(c_ptr) :: Mat = c_null_ptr
|
||||||
|
end type d_Cmat
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
type, bind(c) :: d_Hmat
|
||||||
|
type(c_ptr) :: Mat = c_null_ptr
|
||||||
|
end type d_Hmat
|
||||||
|
#endif
|
||||||
|
|
||||||
|
interface CSRGDeviceFree
|
||||||
|
function d_CSRGDeviceFree(Mat) &
|
||||||
|
& bind(c,name="d_CSRGDeviceFree") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGDeviceFree
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDeviceSetMatType
|
||||||
|
function d_CSRGDeviceSetMatType(Mat,type) &
|
||||||
|
& bind(c,name="d_CSRGDeviceSetMatType") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGDeviceSetMatType
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDeviceSetMatFillMode
|
||||||
|
function d_CSRGDeviceSetMatFillMode(Mat,type) &
|
||||||
|
& bind(c,name="d_CSRGDeviceSetMatFillMode") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGDeviceSetMatFillMode
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDeviceSetMatDiagType
|
||||||
|
function d_CSRGDeviceSetMatDiagType(Mat,type) &
|
||||||
|
& bind(c,name="d_CSRGDeviceSetMatDiagType") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGDeviceSetMatDiagType
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDeviceSetMatIndexBase
|
||||||
|
function d_CSRGDeviceSetMatIndexBase(Mat,type) &
|
||||||
|
& bind(c,name="d_CSRGDeviceSetMatIndexBase") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGDeviceSetMatIndexBase
|
||||||
|
end interface
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
interface CSRGDeviceCsrsmAnalysis
|
||||||
|
function d_CSRGDeviceCsrsmAnalysis(Mat) &
|
||||||
|
& bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGDeviceCsrsmAnalysis
|
||||||
|
end interface
|
||||||
|
#else
|
||||||
|
interface CSRGIsNullSvBuffer
|
||||||
|
function d_CSRGIsNullSvBuffer(Mat) &
|
||||||
|
& bind(c,name="d_CSRGIsNullSvBuffer") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGIsNullSvBuffer
|
||||||
|
end interface
|
||||||
|
#endif
|
||||||
|
|
||||||
|
interface CSRGDeviceAlloc
|
||||||
|
function d_CSRGDeviceAlloc(Mat,nr,nc,nz) &
|
||||||
|
& bind(c,name="d_CSRGDeviceAlloc") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int), value :: nr, nc, nz
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGDeviceAlloc
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDeviceGetParms
|
||||||
|
function d_CSRGDeviceGetParms(Mat,nr,nc,nz) &
|
||||||
|
& bind(c,name="d_CSRGDeviceGetParms") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int) :: nr, nc, nz
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGDeviceGetParms
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface spsvCSRGDevice
|
||||||
|
function d_spsvCSRGDevice(Mat,alpha,x,beta,y) &
|
||||||
|
& bind(c,name="d_spsvCSRGDevice") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
type(c_ptr), value :: x
|
||||||
|
type(c_ptr), value :: y
|
||||||
|
real(c_double), value :: alpha,beta
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_spsvCSRGDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface spmvCSRGDevice
|
||||||
|
function d_spmvCSRGDevice(Mat,alpha,x,beta,y) &
|
||||||
|
& bind(c,name="d_spmvCSRGDevice") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
type(c_ptr), value :: x
|
||||||
|
type(c_ptr), value :: y
|
||||||
|
real(c_double), value :: alpha,beta
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_spmvCSRGDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGHost2Device
|
||||||
|
function d_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) &
|
||||||
|
& bind(c,name="d_CSRGHost2Device") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int), value :: m,n,nz
|
||||||
|
integer(c_int) :: irp(*), ja(*)
|
||||||
|
real(c_double) :: val(*)
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGHost2Device
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface CSRGDevice2Host
|
||||||
|
function d_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) &
|
||||||
|
& bind(c,name="d_CSRGDevice2Host") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Cmat
|
||||||
|
type(d_Cmat) :: Mat
|
||||||
|
integer(c_int), value :: m,n,nz
|
||||||
|
integer(c_int) :: irp(*), ja(*)
|
||||||
|
real(c_double) :: val(*)
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_CSRGDevice2Host
|
||||||
|
end interface
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
interface HYBGDeviceAlloc
|
||||||
|
function d_HYBGDeviceAlloc(Mat,nr,nc,nz) &
|
||||||
|
& bind(c,name="d_HYBGDeviceAlloc") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_hmat
|
||||||
|
type(d_Hmat) :: Mat
|
||||||
|
integer(c_int), value :: nr, nc, nz
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_HYBGDeviceAlloc
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceFree
|
||||||
|
function d_HYBGDeviceFree(Mat) &
|
||||||
|
& bind(c,name="d_HYBGDeviceFree") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Hmat
|
||||||
|
type(d_Hmat) :: Mat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_HYBGDeviceFree
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceSetMatType
|
||||||
|
function d_HYBGDeviceSetMatType(Mat,type) &
|
||||||
|
& bind(c,name="d_HYBGDeviceSetMatType") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Hmat
|
||||||
|
type(d_Hmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_HYBGDeviceSetMatType
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceSetMatFillMode
|
||||||
|
function d_HYBGDeviceSetMatFillMode(Mat,type) &
|
||||||
|
& bind(c,name="d_HYBGDeviceSetMatFillMode") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Hmat
|
||||||
|
type(d_Hmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_HYBGDeviceSetMatFillMode
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceSetMatDiagType
|
||||||
|
function d_HYBGDeviceSetMatDiagType(Mat,type) &
|
||||||
|
& bind(c,name="d_HYBGDeviceSetMatDiagType") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Hmat
|
||||||
|
type(d_Hmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_HYBGDeviceSetMatDiagType
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceSetMatIndexBase
|
||||||
|
function d_HYBGDeviceSetMatIndexBase(Mat,type) &
|
||||||
|
& bind(c,name="d_HYBGDeviceSetMatIndexBase") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Hmat
|
||||||
|
type(d_Hmat) :: Mat
|
||||||
|
integer(c_int),value :: type
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_HYBGDeviceSetMatIndexBase
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGDeviceHybsmAnalysis
|
||||||
|
function d_HYBGDeviceHybsmAnalysis(Mat) &
|
||||||
|
& bind(c,name="d_HYBGDeviceHybsmAnalysis") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Hmat
|
||||||
|
type(d_Hmat) :: Mat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_HYBGDeviceHybsmAnalysis
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface spsvHYBGDevice
|
||||||
|
function d_spsvHYBGDevice(Mat,alpha,x,beta,y) &
|
||||||
|
& bind(c,name="d_spsvHYBGDevice") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Hmat
|
||||||
|
type(d_Hmat) :: Mat
|
||||||
|
type(c_ptr), value :: x
|
||||||
|
type(c_ptr), value :: y
|
||||||
|
real(c_double), value :: alpha,beta
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_spsvHYBGDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface spmvHYBGDevice
|
||||||
|
function d_spmvHYBGDevice(Mat,alpha,x,beta,y) &
|
||||||
|
& bind(c,name="d_spmvHYBGDevice") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Hmat
|
||||||
|
type(d_Hmat) :: Mat
|
||||||
|
type(c_ptr), value :: x
|
||||||
|
type(c_ptr), value :: y
|
||||||
|
real(c_double), value :: alpha,beta
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_spmvHYBGDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface HYBGHost2Device
|
||||||
|
function d_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) &
|
||||||
|
& bind(c,name="d_HYBGHost2Device") result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
import d_Hmat
|
||||||
|
type(d_Hmat) :: Mat
|
||||||
|
integer(c_int), value :: m,n,nz
|
||||||
|
integer(c_int) :: irp(*), ja(*)
|
||||||
|
real(c_double) :: val(*)
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function d_HYBGHost2Device
|
||||||
|
end interface
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
end module d_cusparse_mod
|
@ -0,0 +1,99 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <cusparse_v2.h>
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "fcusparse.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* Double precision real */
|
||||||
|
#define TYPE double
|
||||||
|
#define CUSPARSE_BASE_TYPE CUDA_R_64F
|
||||||
|
#define T_CSRGDeviceMat d_CSRGDeviceMat
|
||||||
|
#define T_Cmat d_Cmat
|
||||||
|
#define T_spmvCSRGDevice d_spmvCSRGDevice
|
||||||
|
#define T_spsvCSRGDevice d_spsvCSRGDevice
|
||||||
|
#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc
|
||||||
|
#define T_CSRGDeviceFree d_CSRGDeviceFree
|
||||||
|
#define T_CSRGHost2Device d_CSRGHost2Device
|
||||||
|
#define T_CSRGDevice2Host d_CSRGDevice2Host
|
||||||
|
#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode
|
||||||
|
#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType
|
||||||
|
#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType
|
||||||
|
#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase
|
||||||
|
#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis
|
||||||
|
#define cusparseTcsrmv cusparseDcsrmv
|
||||||
|
#define cusparseTcsrsv_solve cusparseDcsrsv_solve
|
||||||
|
#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis
|
||||||
|
#define T_HYBGDeviceMat d_HYBGDeviceMat
|
||||||
|
#define T_Hmat d_Hmat
|
||||||
|
#define T_HYBGDeviceFree d_HYBGDeviceFree
|
||||||
|
#define T_spmvHYBGDevice d_spmvHYBGDevice
|
||||||
|
#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc
|
||||||
|
#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType
|
||||||
|
#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase
|
||||||
|
#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType
|
||||||
|
#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode
|
||||||
|
#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis
|
||||||
|
#define T_spsvHYBGDevice d_spsvHYBGDevice
|
||||||
|
#define T_HYBGHost2Device d_HYBGHost2Device
|
||||||
|
#define cusparseThybmv cusparseDhybmv
|
||||||
|
#define cusparseThybsv_solve cusparseDhybsv_solve
|
||||||
|
#define cusparseThybsv_analysis cusparseDhybsv_analysis
|
||||||
|
#define cusparseTcsr2hyb cusparseDcsr2hyb
|
||||||
|
|
||||||
|
#elif CUDA_VERSION < 11030
|
||||||
|
|
||||||
|
#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType
|
||||||
|
#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase
|
||||||
|
#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis
|
||||||
|
#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize
|
||||||
|
#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis
|
||||||
|
#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer
|
||||||
|
#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr
|
||||||
|
#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr
|
||||||
|
#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "fcusparse_fct.h"
|
||||||
|
|
@ -0,0 +1,261 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "diagdev.h"
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
//new
|
||||||
|
DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, unsigned int diags, unsigned int elementType)
|
||||||
|
{
|
||||||
|
DiagDeviceParams params;
|
||||||
|
|
||||||
|
params.elementType = elementType;
|
||||||
|
//numero di elementi di val
|
||||||
|
params.rows = rows;
|
||||||
|
params.columns = columns;
|
||||||
|
params.diags = diags;
|
||||||
|
|
||||||
|
return params;
|
||||||
|
|
||||||
|
}
|
||||||
|
//new
|
||||||
|
int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params)
|
||||||
|
{
|
||||||
|
struct DiagDevice *tmp = (struct DiagDevice *)malloc(sizeof(struct DiagDevice));
|
||||||
|
int ret=SPGPU_SUCCESS;
|
||||||
|
*remoteMatrix = (void *)tmp;
|
||||||
|
|
||||||
|
tmp->rows = params->rows;
|
||||||
|
|
||||||
|
tmp->cols = params->columns;
|
||||||
|
|
||||||
|
tmp->diags = params->diags;
|
||||||
|
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->off), tmp->diags*sizeof(int));
|
||||||
|
|
||||||
|
/* tmp->baseIndex = params->firstIndex; */
|
||||||
|
|
||||||
|
if (params->elementType == SPGPU_TYPE_INT)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(int));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_FLOAT)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(float));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(double));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuFloatComplex));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeDiagDevice(void* remoteMatrix)
|
||||||
|
{
|
||||||
|
struct DiagDevice *devMat = (struct DiagDevice *) remoteMatrix;
|
||||||
|
//fprintf(stderr,"freeHllDevice\n");
|
||||||
|
if (devMat != NULL) {
|
||||||
|
freeRemoteBuffer(devMat->off);
|
||||||
|
freeRemoteBuffer(devMat->cM);
|
||||||
|
free(remoteMatrix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//new
|
||||||
|
int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,unsigned int diags,unsigned int elementType)
|
||||||
|
{ int i;
|
||||||
|
DiagDeviceParams p;
|
||||||
|
|
||||||
|
p = getDiagDeviceParams(rows, columns, diags,elementType);
|
||||||
|
i = allocDiagDevice(deviceMat, &p);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n)
|
||||||
|
{ int i,fo,fa;
|
||||||
|
char buf_a[255], buf_o[255],tmp[255];
|
||||||
|
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||||
|
// Ex updateFromHost function
|
||||||
|
/* memset(buf_a,'\0',255); */
|
||||||
|
/* memset(buf_o,'\0',255); */
|
||||||
|
/* memset(tmp,'\0',255); */
|
||||||
|
|
||||||
|
/* strcat(buf_a,"mat_"); */
|
||||||
|
/* strcat(buf_o,"off_"); */
|
||||||
|
/* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */
|
||||||
|
/* strcat(buf_a,tmp); */
|
||||||
|
/* memset(tmp,'\0',255); */
|
||||||
|
/* sprintf(tmp,"%d.dat",devMat->cols); */
|
||||||
|
/* strcat(buf_o,tmp); */
|
||||||
|
|
||||||
|
/* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
|
||||||
|
/* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
|
||||||
|
|
||||||
|
/* i = write(fa, a, sizeof(double)*devMat->cols*devMat->rows); */
|
||||||
|
/* i = write(fo, off, sizeof(int)*devMat->cols); */
|
||||||
|
|
||||||
|
/* close(fa); */
|
||||||
|
/* close(fo); */
|
||||||
|
|
||||||
|
i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(double));
|
||||||
|
i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int));
|
||||||
|
|
||||||
|
if(i==0)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readDiagDeviceDouble(void* deviceMat, double* a, int* off)
|
||||||
|
{ int i;
|
||||||
|
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||||
|
i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double));
|
||||||
|
i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int));
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
//new
|
||||||
|
int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||||
|
double beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||||
|
#endif
|
||||||
|
/* spgpuDdiagspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */
|
||||||
|
|
||||||
|
spgpuDdiaspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta);
|
||||||
|
|
||||||
|
//cudaSync();
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n)
|
||||||
|
{ int i,fo,fa;
|
||||||
|
char buf_a[255], buf_o[255],tmp[255];
|
||||||
|
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||||
|
// Ex updateFromHost function
|
||||||
|
/* memset(buf_a,'\0',255); */
|
||||||
|
/* memset(buf_o,'\0',255); */
|
||||||
|
/* memset(tmp,'\0',255); */
|
||||||
|
|
||||||
|
/* strcat(buf_a,"mat_"); */
|
||||||
|
/* strcat(buf_o,"off_"); */
|
||||||
|
/* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */
|
||||||
|
/* strcat(buf_a,tmp); */
|
||||||
|
/* memset(tmp,'\0',255); */
|
||||||
|
/* sprintf(tmp,"%d.dat",devMat->cols); */
|
||||||
|
/* strcat(buf_o,tmp); */
|
||||||
|
|
||||||
|
/* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
|
||||||
|
/* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */
|
||||||
|
|
||||||
|
/* i = write(fa, a, sizeof(float)*devMat->cols*devMat->rows); */
|
||||||
|
/* i = write(fo, off, sizeof(int)*devMat->cols); */
|
||||||
|
|
||||||
|
/* close(fa); */
|
||||||
|
/* close(fo); */
|
||||||
|
|
||||||
|
i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(float));
|
||||||
|
i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int));
|
||||||
|
|
||||||
|
if(i==0)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readDiagDeviceFloat(void* deviceMat, float* a, int* off)
|
||||||
|
{ int i;
|
||||||
|
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||||
|
i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float));
|
||||||
|
i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int));
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
//new
|
||||||
|
int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||||
|
float beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||||
|
#endif
|
||||||
|
/* spgpuDdiagspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */
|
||||||
|
|
||||||
|
spgpuSdiaspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta);
|
||||||
|
|
||||||
|
//cudaSync();
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,90 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _DIAGDEV_H_
|
||||||
|
#define _DIAGDEV_H_
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "dia.h"
|
||||||
|
|
||||||
|
struct DiagDevice
|
||||||
|
{
|
||||||
|
// Compressed matrix
|
||||||
|
void *cM; //it can be float or double
|
||||||
|
|
||||||
|
// offset (same size of cM)
|
||||||
|
int *off;
|
||||||
|
|
||||||
|
int rows;
|
||||||
|
|
||||||
|
int cols;
|
||||||
|
|
||||||
|
int diags;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct DiagDeviceParams
|
||||||
|
{
|
||||||
|
|
||||||
|
unsigned int elementType;
|
||||||
|
|
||||||
|
// Number of rows.
|
||||||
|
// Used to allocate rS array
|
||||||
|
unsigned int rows;
|
||||||
|
//unsigned int hackOffsLength;
|
||||||
|
|
||||||
|
// Number of columns.
|
||||||
|
// Used for error-checking
|
||||||
|
unsigned int columns;
|
||||||
|
|
||||||
|
unsigned int diags;
|
||||||
|
|
||||||
|
} DiagDeviceParams;
|
||||||
|
DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns,
|
||||||
|
unsigned int elementType, unsigned int firstIndex);
|
||||||
|
int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int cols,
|
||||||
|
unsigned int elementType, unsigned int firstIndex);
|
||||||
|
int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params);
|
||||||
|
void freeDiagDevice(void* remoteMatrix);
|
||||||
|
|
||||||
|
int readDiagDeviceDouble(void* deviceMat, double* a, int* off);
|
||||||
|
int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n);
|
||||||
|
int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||||
|
double beta, void* deviceY);
|
||||||
|
|
||||||
|
int readDiagDeviceFloat(void* deviceMat, float* a, int* off);
|
||||||
|
int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n);
|
||||||
|
int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||||
|
float beta, void* deviceY);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,224 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
module diagdev_mod
|
||||||
|
use iso_c_binding
|
||||||
|
use core_mod
|
||||||
|
|
||||||
|
type, bind(c) :: diagdev_parms
|
||||||
|
integer(c_int) :: element_type
|
||||||
|
integer(c_int) :: rows
|
||||||
|
integer(c_int) :: columns
|
||||||
|
integer(c_int) :: firstIndex
|
||||||
|
end type diagdev_parms
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FgetDiagDeviceParams(rows, columns, elementType, firstIndex) &
|
||||||
|
& result(res) bind(c,name='getDiagDeviceParams')
|
||||||
|
use iso_c_binding
|
||||||
|
import :: diagdev_parms
|
||||||
|
type(diagdev_parms) :: res
|
||||||
|
integer(c_int), value :: rows,columns,elementType,firstIndex
|
||||||
|
end function FgetDiagDeviceParams
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FallocDiagDevice(deviceMat,rows,columns,&
|
||||||
|
& elementType,firstIndex) &
|
||||||
|
& result(res) bind(c,name='FallocDiagDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: rows,columns,elementType,firstIndex
|
||||||
|
type(c_ptr) :: deviceMat
|
||||||
|
end function FallocDiagDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface writeDiagDevice
|
||||||
|
|
||||||
|
function writeDiagDeviceFloat(deviceMat,a,off,n) &
|
||||||
|
& result(res) bind(c,name='writeDiagDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: n
|
||||||
|
real(c_float) :: a(n,*)
|
||||||
|
integer(c_int) :: off(*)!,irn(*)
|
||||||
|
end function writeDiagDeviceFloat
|
||||||
|
|
||||||
|
function writeDiagDeviceDouble(deviceMat,a,off,n) &
|
||||||
|
& result(res) bind(c,name='writeDiagDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int),value :: n
|
||||||
|
real(c_double) :: a(n,*)
|
||||||
|
integer(c_int) :: off(*)
|
||||||
|
end function writeDiagDeviceDouble
|
||||||
|
|
||||||
|
function writeDiagDeviceFloatComplex(deviceMat,a,off,n) &
|
||||||
|
& result(res) bind(c,name='writeDiagDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: n
|
||||||
|
complex(c_float_complex) :: a(n,*)
|
||||||
|
integer(c_int) :: off(*)!,irn(*)
|
||||||
|
end function writeDiagDeviceFloatComplex
|
||||||
|
|
||||||
|
function writeDiagDeviceDoubleComplex(deviceMat,a,off,n) &
|
||||||
|
& result(res) bind(c,name='writeDiagDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: n
|
||||||
|
complex(c_double_complex) :: a(n,*)
|
||||||
|
integer(c_int) :: off(*)!,irn(*)
|
||||||
|
end function writeDiagDeviceDoubleComplex
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface readDiagDevice
|
||||||
|
|
||||||
|
function readDiagDeviceFloat(deviceMat,a,off,n) &
|
||||||
|
& result(res) bind(c,name='readDiagDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_float) :: a(n,*)
|
||||||
|
integer(c_int) :: off(*)!,irn(*)
|
||||||
|
end function readDiagDeviceFloat
|
||||||
|
|
||||||
|
function readDiagDeviceDouble(deviceMat,a,off,n) &
|
||||||
|
& result(res) bind(c,name='readDiagDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int),value :: n
|
||||||
|
real(c_double) :: a(n,*)
|
||||||
|
integer(c_int) :: off(*)
|
||||||
|
end function readDiagDeviceDouble
|
||||||
|
|
||||||
|
function readDiagDeviceFloatComplex(deviceMat,a,off,n) &
|
||||||
|
& result(res) bind(c,name='readDiagDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: n
|
||||||
|
complex(c_float_complex) :: a(n,*)
|
||||||
|
integer(c_int) :: off(*)!,irn(*)
|
||||||
|
end function readDiagDeviceFloatComplex
|
||||||
|
|
||||||
|
function readDiagDeviceDoubleComplex(deviceMat,a,off,n) &
|
||||||
|
& result(res) bind(c,name='readDiagDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: n
|
||||||
|
complex(c_double_complex) :: a(n,*)
|
||||||
|
integer(c_int) :: off(*)!,irn(*)
|
||||||
|
end function readDiagDeviceDoubleComplex
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
subroutine freeDiagDevice(deviceMat) &
|
||||||
|
& bind(c,name='freeDiagDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
end subroutine freeDiagDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
subroutine resetDiagTimer() bind(c,name='resetDiagTimer')
|
||||||
|
use iso_c_binding
|
||||||
|
end subroutine resetDiagTimer
|
||||||
|
end interface
|
||||||
|
interface
|
||||||
|
function getDiagTimer() &
|
||||||
|
& bind(c,name='getDiagTimer') result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
real(c_double) :: res
|
||||||
|
end function getDiagTimer
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
function getDiagDevicePitch(deviceMat) &
|
||||||
|
& bind(c,name='getDiagDevicePitch') result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function getDiagDevicePitch
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
function getDiagDeviceMaxRowSize(deviceMat) &
|
||||||
|
& bind(c,name='getDiagDeviceMaxRowSize') result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function getDiagDeviceMaxRowSize
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface spmvDiagDevice
|
||||||
|
function spmvDiagDeviceFloat(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvDiagDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
real(c_float),value :: alpha, beta
|
||||||
|
end function spmvDiagDeviceFloat
|
||||||
|
function spmvDiagDeviceDouble(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvDiagDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
real(c_double),value :: alpha, beta
|
||||||
|
end function spmvDiagDeviceDouble
|
||||||
|
function spmvDiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvDiagDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
complex(c_float_complex),value :: alpha, beta
|
||||||
|
end function spmvDiagDeviceFloatComplex
|
||||||
|
function spmvDiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvDiagDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
complex(c_double_complex),value :: alpha, beta
|
||||||
|
end function spmvDiagDeviceDoubleComplex
|
||||||
|
end interface spmvDiagDevice
|
||||||
|
|
||||||
|
end module diagdev_mod
|
@ -0,0 +1,321 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include "dnsdev.h"
|
||||||
|
|
||||||
|
#define PASS_RS 0
|
||||||
|
|
||||||
|
#define IMIN(a,b) ((a)<(b) ? (a) : (b))
|
||||||
|
|
||||||
|
DnsDeviceParams getDnsDeviceParams(unsigned int rows, unsigned int columns,
|
||||||
|
unsigned int elementType, unsigned int firstIndex)
|
||||||
|
{
|
||||||
|
DnsDeviceParams params;
|
||||||
|
|
||||||
|
if (elementType == SPGPU_TYPE_DOUBLE)
|
||||||
|
{
|
||||||
|
params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S;
|
||||||
|
}
|
||||||
|
//For complex?
|
||||||
|
params.elementType = elementType;
|
||||||
|
params.rows = rows;
|
||||||
|
params.columns = columns;
|
||||||
|
params.firstIndex = firstIndex;
|
||||||
|
|
||||||
|
return params;
|
||||||
|
|
||||||
|
}
|
||||||
|
//new
|
||||||
|
int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params)
|
||||||
|
{
|
||||||
|
struct DnsDevice *tmp = (struct DnsDevice *)malloc(sizeof(struct DnsDevice));
|
||||||
|
*remoteMatrix = (void *)tmp;
|
||||||
|
tmp->rows = params->rows;
|
||||||
|
tmp->columns = params->columns;
|
||||||
|
tmp->cMPitch = params->pitch;
|
||||||
|
tmp->pitch= tmp->cMPitch;
|
||||||
|
tmp->allocsize = (int)tmp->columns * tmp->pitch;
|
||||||
|
tmp->baseIndex = params->firstIndex;
|
||||||
|
//fprintf(stderr,"allocDnsDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize);
|
||||||
|
if (params->elementType == SPGPU_TYPE_FLOAT)
|
||||||
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
|
||||||
|
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||||||
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
|
||||||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||||
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
|
||||||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||||
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||||
|
//fprintf(stderr,"From allocDnsDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
|
||||||
|
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeDnsDevice(void* remoteMatrix)
|
||||||
|
{
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) remoteMatrix;
|
||||||
|
//fprintf(stderr,"freeDnsDevice\n");
|
||||||
|
if (devMat != NULL) {
|
||||||
|
freeRemoteBuffer(devMat->cM);
|
||||||
|
free(remoteMatrix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//new
|
||||||
|
int FallocDnsDevice(void** deviceMat, unsigned int rows,
|
||||||
|
unsigned int columns, unsigned int elementType,
|
||||||
|
unsigned int firstIndex)
|
||||||
|
{ int i;
|
||||||
|
DnsDeviceParams p;
|
||||||
|
|
||||||
|
p = getDnsDeviceParams(rows, columns, elementType, firstIndex);
|
||||||
|
i = allocDnsDevice(deviceMat, &p);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","FallocDnsDevice",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha,
|
||||||
|
void *deviceMat, void* deviceX, float *beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
cublasHandle_t handle=psb_cudaGetCublasHandle();
|
||||||
|
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
|
||||||
|
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
|
||||||
|
if (n == 1) {
|
||||||
|
status = cublasSgemv(handle, trans, m,k,
|
||||||
|
alpha, devMat->cM,devMat->pitch, x->v_,1,
|
||||||
|
beta, y->v_,1);
|
||||||
|
} else {
|
||||||
|
status = cublasSgemm(handle, trans, CUBLAS_OP_N, m,n,k,
|
||||||
|
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
|
||||||
|
beta, y->v_,y->pitch_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status == CUBLAS_STATUS_SUCCESS)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha,
|
||||||
|
void *deviceMat, void* deviceX, double *beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
cublasHandle_t handle=psb_cudaGetCublasHandle();
|
||||||
|
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
|
||||||
|
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
|
||||||
|
if (n == 1) {
|
||||||
|
status = cublasDgemv(handle, trans, m,k,
|
||||||
|
alpha, devMat->cM,devMat->pitch, x->v_,1,
|
||||||
|
beta, y->v_,1);
|
||||||
|
} else {
|
||||||
|
status = cublasDgemm(handle, trans, CUBLAS_OP_N, m,n,k,
|
||||||
|
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
|
||||||
|
beta, y->v_,y->pitch_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status == CUBLAS_STATUS_SUCCESS)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *alpha,
|
||||||
|
void *deviceMat, void* deviceX, float complex *beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
cublasHandle_t handle=psb_cudaGetCublasHandle();
|
||||||
|
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
|
||||||
|
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
|
||||||
|
if (n == 1) {
|
||||||
|
status = cublasCgemv(handle, trans, m,k,
|
||||||
|
alpha, devMat->cM,devMat->pitch, x->v_,1,
|
||||||
|
beta, y->v_,1);
|
||||||
|
} else {
|
||||||
|
status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k,
|
||||||
|
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
|
||||||
|
beta, y->v_,y->pitch_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status == CUBLAS_STATUS_SUCCESS)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex *alpha,
|
||||||
|
void *deviceMat, void* deviceX, double complex *beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
cublasHandle_t handle=psb_cudaGetCublasHandle();
|
||||||
|
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
|
||||||
|
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
|
||||||
|
if (n == 1) {
|
||||||
|
status = cublasZgemv(handle, trans, m,k,
|
||||||
|
alpha, devMat->cM,devMat->pitch, x->v_,1,
|
||||||
|
beta, y->v_,1);
|
||||||
|
} else {
|
||||||
|
status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k,
|
||||||
|
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
|
||||||
|
beta, y->v_,y->pitch_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status == CUBLAS_STATUS_SUCCESS)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc)
|
||||||
|
{ int i;
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
int pitch=devMat->pitch;
|
||||||
|
i = cublasSetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloat",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc)
|
||||||
|
{ int i;
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
int pitch=devMat->pitch;
|
||||||
|
i = cublasSetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDouble",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc)
|
||||||
|
{ int i;
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
int pitch=devMat->pitch;
|
||||||
|
i = cublasSetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloatComplex",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc)
|
||||||
|
{ int i;
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
int pitch=devMat->pitch;
|
||||||
|
i = cublasSetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDoubleComplex",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc)
|
||||||
|
{ int i;
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
int pitch=devMat->pitch;
|
||||||
|
i = cublasGetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloat",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc)
|
||||||
|
{ int i;
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
int pitch=devMat->pitch;
|
||||||
|
i = cublasGetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDouble",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc)
|
||||||
|
{ int i;
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
int pitch=devMat->pitch;
|
||||||
|
i = cublasGetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloatComplex",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc)
|
||||||
|
{ int i;
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
int pitch=devMat->pitch;
|
||||||
|
i = cublasGetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDoubleComplex",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int getDnsDevicePitch(void* deviceMat)
|
||||||
|
{ int i;
|
||||||
|
struct DnsDevice *devMat = (struct DnsDevice *) deviceMat;
|
||||||
|
i = devMat->pitch;
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,117 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _DNSDEV_H_
|
||||||
|
#define _DNSDEV_H_
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "cuComplex.h"
|
||||||
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
|
|
||||||
|
struct DnsDevice
|
||||||
|
{
|
||||||
|
// Compressed matrix
|
||||||
|
void *cM; //it can be float or double
|
||||||
|
|
||||||
|
|
||||||
|
//matrix size (uncompressed)
|
||||||
|
int rows;
|
||||||
|
int columns;
|
||||||
|
|
||||||
|
int pitch; //old
|
||||||
|
|
||||||
|
int cMPitch;
|
||||||
|
|
||||||
|
//allocation size (in elements)
|
||||||
|
int allocsize;
|
||||||
|
|
||||||
|
/*(i.e. 0 for C, 1 for Fortran)*/
|
||||||
|
int baseIndex;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct DnsDeviceParams
|
||||||
|
{
|
||||||
|
// The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType)
|
||||||
|
unsigned int elementType;
|
||||||
|
|
||||||
|
// Pitch (in number of elements)
|
||||||
|
unsigned int pitch;
|
||||||
|
|
||||||
|
// Number of rows.
|
||||||
|
// Used to allocate rS array
|
||||||
|
unsigned int rows;
|
||||||
|
|
||||||
|
// Number of columns.
|
||||||
|
// Used for error-checking
|
||||||
|
unsigned int columns;
|
||||||
|
|
||||||
|
// First index (e.g 0 or 1)
|
||||||
|
unsigned int firstIndex;
|
||||||
|
} DnsDeviceParams;
|
||||||
|
|
||||||
|
int FallocDnsDevice(void** deviceMat, unsigned int rows,
|
||||||
|
unsigned int columns, unsigned int elementType,
|
||||||
|
unsigned int firstIndex);
|
||||||
|
int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params);
|
||||||
|
void freeDnsDevice(void* remoteMatrix);
|
||||||
|
|
||||||
|
int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc);
|
||||||
|
int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc);
|
||||||
|
int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc);
|
||||||
|
int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc);
|
||||||
|
|
||||||
|
int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc);
|
||||||
|
int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc);
|
||||||
|
int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc);
|
||||||
|
int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc);
|
||||||
|
|
||||||
|
int spmvDnsDeviceFloat(char transa, int m, int n, int k,
|
||||||
|
float *alpha, void *deviceMat, void* deviceX,
|
||||||
|
float *beta, void* deviceY);
|
||||||
|
int spmvDnsDeviceDouble(char transa, int m, int n, int k,
|
||||||
|
double *alpha, void *deviceMat, void* deviceX,
|
||||||
|
double *beta, void* deviceY);
|
||||||
|
int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k,
|
||||||
|
float complex *alpha, void *deviceMat, void* deviceX,
|
||||||
|
float complex *beta, void* deviceY);
|
||||||
|
int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k,
|
||||||
|
double complex *alpha, void *deviceMat, void* deviceX,
|
||||||
|
double complex *beta, void* deviceY);
|
||||||
|
|
||||||
|
int getDnsDevicePitch(void* deviceMat);
|
||||||
|
|
||||||
|
// sparse Dns matrix-vector product
|
||||||
|
//int spmvDnsDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY);
|
||||||
|
//int spmvDnsDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY);
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,270 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
module dnsdev_mod
|
||||||
|
use iso_c_binding
|
||||||
|
use core_mod
|
||||||
|
|
||||||
|
type, bind(c) :: dnsdev_parms
|
||||||
|
integer(c_int) :: element_type
|
||||||
|
integer(c_int) :: pitch
|
||||||
|
integer(c_int) :: rows
|
||||||
|
integer(c_int) :: columns
|
||||||
|
integer(c_int) :: maxRowSize
|
||||||
|
integer(c_int) :: avgRowSize
|
||||||
|
integer(c_int) :: firstIndex
|
||||||
|
end type dnsdev_parms
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FgetDnsDeviceParams(rows, columns, elementType, firstIndex) &
|
||||||
|
& result(res) bind(c,name='getDnsDeviceParams')
|
||||||
|
use iso_c_binding
|
||||||
|
import :: dnsdev_parms
|
||||||
|
type(dnsdev_parms) :: res
|
||||||
|
integer(c_int), value :: rows,columns,elementType,firstIndex
|
||||||
|
end function FgetDnsDeviceParams
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FallocDnsDevice(deviceMat,rows,columns,&
|
||||||
|
& elementType,firstIndex) &
|
||||||
|
& result(res) bind(c,name='FallocDnsDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: rows,columns,elementType,firstIndex
|
||||||
|
type(c_ptr) :: deviceMat
|
||||||
|
end function FallocDnsDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface writeDnsDevice
|
||||||
|
|
||||||
|
function writeDnsDeviceFloat(deviceMat,val,lda,nc) &
|
||||||
|
& result(res) bind(c,name='writeDnsDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: lda,nc
|
||||||
|
real(c_float) :: val(lda,*)
|
||||||
|
end function writeDnsDeviceFloat
|
||||||
|
|
||||||
|
|
||||||
|
function writeDnsDeviceDouble(deviceMat,val,lda,nc) &
|
||||||
|
& result(res) bind(c,name='writeDnsDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: lda,nc
|
||||||
|
real(c_double) :: val(lda,*)
|
||||||
|
end function writeDnsDeviceDouble
|
||||||
|
|
||||||
|
|
||||||
|
function writeDnsDeviceFloatComplex(deviceMat,val,lda,nc) &
|
||||||
|
& result(res) bind(c,name='writeDnsDeviceFloatComple')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: lda,nc
|
||||||
|
complex(c_float_complex) :: val(lda,*)
|
||||||
|
end function writeDnsDeviceFloatComplex
|
||||||
|
|
||||||
|
|
||||||
|
function writeDnsDeviceDoubleComplex(deviceMat,val,lda,nc) &
|
||||||
|
& result(res) bind(c,name='writeDnsDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: lda,nc
|
||||||
|
complex(c_double_complex) :: val(lda,*)
|
||||||
|
end function writeDnsDeviceDoubleComplex
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface readDnsDevice
|
||||||
|
|
||||||
|
function readDnsDeviceFloat(deviceMat,val,lda,nc) &
|
||||||
|
& result(res) bind(c,name='readDnsDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: lda,nc
|
||||||
|
real(c_float) :: val(lda,*)
|
||||||
|
end function readDnsDeviceFloat
|
||||||
|
|
||||||
|
|
||||||
|
function readDnsDeviceDouble(deviceMat,val,lda,nc) &
|
||||||
|
& result(res) bind(c,name='readDnsDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: lda,nc
|
||||||
|
real(c_double) :: val(lda,*)
|
||||||
|
end function readDnsDeviceDouble
|
||||||
|
|
||||||
|
|
||||||
|
function readDnsDeviceFloatComplex(deviceMat,val,lda,nc) &
|
||||||
|
& result(res) bind(c,name='readDnsDeviceFloatComple')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: lda,nc
|
||||||
|
complex(c_float_complex) :: val(lda,*)
|
||||||
|
end function readDnsDeviceFloatComplex
|
||||||
|
|
||||||
|
|
||||||
|
function readDnsDeviceDoubleComplex(deviceMat,val,lda,nc) &
|
||||||
|
& result(res) bind(c,name='readDnsDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: lda,nc
|
||||||
|
complex(c_double_complex) :: val(lda,*)
|
||||||
|
end function readDnsDeviceDoubleComplex
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
subroutine freeDnsDevice(deviceMat) &
|
||||||
|
& bind(c,name='freeDnsDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
end subroutine freeDnsDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
subroutine resetDnsTimer() bind(c,name='resetDnsTimer')
|
||||||
|
use iso_c_binding
|
||||||
|
end subroutine resetDnsTimer
|
||||||
|
end interface
|
||||||
|
interface
|
||||||
|
function getDnsTimer() &
|
||||||
|
& bind(c,name='getDnsTimer') result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
real(c_double) :: res
|
||||||
|
end function getDnsTimer
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface
|
||||||
|
function getDnsDevicePitch(deviceMat) &
|
||||||
|
& bind(c,name='getDnsDevicePitch') result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function getDnsDevicePitch
|
||||||
|
end interface
|
||||||
|
|
||||||
|
!!$ interface csputDnsDeviceFloat
|
||||||
|
!!$ function dev_csputDnsDeviceFloat(deviceMat, nnz, ia, ja, val) &
|
||||||
|
!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloat')
|
||||||
|
!!$ use iso_c_binding
|
||||||
|
!!$ integer(c_int) :: res
|
||||||
|
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
|
||||||
|
!!$ integer(c_int), value :: nnz
|
||||||
|
!!$ end function dev_csputDnsDeviceFloat
|
||||||
|
!!$ end interface
|
||||||
|
!!$
|
||||||
|
!!$ interface csputDnsDeviceDouble
|
||||||
|
!!$ function dev_csputDnsDeviceDouble(deviceMat, nnz, ia, ja, val) &
|
||||||
|
!!$ & result(res) bind(c,name='dev_csputDnsDeviceDouble')
|
||||||
|
!!$ use iso_c_binding
|
||||||
|
!!$ integer(c_int) :: res
|
||||||
|
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
|
||||||
|
!!$ integer(c_int), value :: nnz
|
||||||
|
!!$ end function dev_csputDnsDeviceDouble
|
||||||
|
!!$ end interface
|
||||||
|
!!$
|
||||||
|
!!$ interface csputDnsDeviceFloatComplex
|
||||||
|
!!$ function dev_csputDnsDeviceFloatComplex(deviceMat, nnz, ia, ja, val) &
|
||||||
|
!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloatComplex')
|
||||||
|
!!$ use iso_c_binding
|
||||||
|
!!$ integer(c_int) :: res
|
||||||
|
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
|
||||||
|
!!$ integer(c_int), value :: nnz
|
||||||
|
!!$ end function dev_csputDnsDeviceFloatComplex
|
||||||
|
!!$ end interface
|
||||||
|
!!$
|
||||||
|
!!$ interface csputDnsDeviceDoubleComplex
|
||||||
|
!!$ function dev_csputDnsDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) &
|
||||||
|
!!$ & result(res) bind(c,name='dev_csputDnsDeviceDoubleComplex')
|
||||||
|
!!$ use iso_c_binding
|
||||||
|
!!$ integer(c_int) :: res
|
||||||
|
!!$ type(c_ptr), value :: deviceMat , ia, ja, val
|
||||||
|
!!$ integer(c_int), value :: nnz
|
||||||
|
!!$ end function dev_csputDnsDeviceDoubleComplex
|
||||||
|
!!$ end interface
|
||||||
|
|
||||||
|
interface spmvDnsDevice
|
||||||
|
function spmvDnsDeviceFloat(transa,m,n,k,alpha,deviceMat,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvDnsDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
character(c_char), value :: transa
|
||||||
|
integer(c_int), value :: m, n, k
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
real(c_float) :: alpha, beta
|
||||||
|
end function spmvDnsDeviceFloat
|
||||||
|
|
||||||
|
function spmvDnsDeviceDouble(transa,m,n,k,alpha,deviceMat,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvDnsDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
character(c_char), value :: transa
|
||||||
|
integer(c_int), value :: m, n, k
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
real(c_double) :: alpha, beta
|
||||||
|
end function spmvDnsDeviceDouble
|
||||||
|
|
||||||
|
function spmvDnsDeviceFloatComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvDnsDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
character(c_char), value :: transa
|
||||||
|
integer(c_int), value :: m, n, k
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
complex(c_float_complex) :: alpha, beta
|
||||||
|
end function spmvDnsDeviceFloatComplex
|
||||||
|
|
||||||
|
function spmvDnsDeviceDoubleComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvDnsDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
character(c_char), value :: transa
|
||||||
|
integer(c_int), value :: m, n, k
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
complex(c_double_complex) :: alpha, beta
|
||||||
|
end function spmvDnsDeviceDoubleComplex
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
end module dnsdev_mod
|
@ -0,0 +1,301 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <complex.h>
|
||||||
|
//#include "utils.h"
|
||||||
|
//#include "common.h"
|
||||||
|
#include "dvectordev.h"
|
||||||
|
|
||||||
|
|
||||||
|
int registerMappedDouble(void *buff, void **d_p, int n, double dummy)
|
||||||
|
{
|
||||||
|
return registerMappedMemory(buff,d_p,n*sizeof(double));
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeMultiVecDeviceDouble(void* deviceVec, double* hostVec)
|
||||||
|
{ int i;
|
||||||
|
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||||
|
// Ex updateFromHost vector function
|
||||||
|
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(double));
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld)
|
||||||
|
{ int i;
|
||||||
|
i = writeMultiVecDeviceDouble(deviceVec, (void *) hostVec);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleR2",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int readMultiVecDeviceDouble(void* deviceVec, double* hostVec)
|
||||||
|
{ int i,j;
|
||||||
|
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||||
|
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
|
||||||
|
devVec->pitch_*devVec->count_*sizeof(double));
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDouble",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int readMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld)
|
||||||
|
{ int i;
|
||||||
|
i = readMultiVecDeviceDouble(deviceVec, hostVec);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleR2",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int setscalMultiVecDeviceDouble(double val, int first, int last,
|
||||||
|
int indexBase, void* devMultiVecX)
|
||||||
|
{ int i=0;
|
||||||
|
int pitch = 0;
|
||||||
|
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal,
|
||||||
|
int dupl, int indexBase, void* devMultiVecX)
|
||||||
|
{ int j=0, i=0,nmin=0,nmax=0;
|
||||||
|
int pitch = 0;
|
||||||
|
double beta;
|
||||||
|
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||||
|
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
|
||||||
|
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
pitch = devVecIrl->pitch_;
|
||||||
|
if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
|
||||||
|
//fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_);
|
||||||
|
|
||||||
|
if (dupl == INS_OVERWRITE)
|
||||||
|
beta = 0.0;
|
||||||
|
else if (dupl == INS_ADD)
|
||||||
|
beta = 1.0;
|
||||||
|
else
|
||||||
|
beta = 0.0;
|
||||||
|
|
||||||
|
spgpuDscat(handle, (double *) devVecX->v_, n, (double*)devVecVal->v_,
|
||||||
|
(int*)devVecIrl->v_, indexBase, beta);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n,
|
||||||
|
int first, void* deviceIdx, int hfirst,
|
||||||
|
void* host_values, int indexBase)
|
||||||
|
{
|
||||||
|
int i, *idx;
|
||||||
|
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
|
||||||
|
|
||||||
|
i= igathMultiVecDeviceDouble(deviceVec, vectorId, n,
|
||||||
|
first, (void*) devIdx->v_, hfirst, host_values, indexBase);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n,
|
||||||
|
int first, void* indexes, int hfirst, void* host_values, int indexBase)
|
||||||
|
{
|
||||||
|
int i, *idx =(int *) indexes;;
|
||||||
|
double *hv = (double *) host_values;;
|
||||||
|
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
i=0;
|
||||||
|
hv = &(hv[hfirst-indexBase]);
|
||||||
|
idx = &(idx[first-indexBase]);
|
||||||
|
spgpuDgath(handle,hv, n, idx,indexBase, (double *) devVec->v_+vectorId*devVec->pitch_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx,
|
||||||
|
int hfirst, void* host_values, int indexBase, double beta)
|
||||||
|
{
|
||||||
|
int i, *idx;
|
||||||
|
struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx;
|
||||||
|
i= iscatMultiVecDeviceDouble(deviceVec, vectorId, n, first,
|
||||||
|
(void*) devIdx->v_, hfirst,host_values, indexBase, beta);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes,
|
||||||
|
int hfirst, void* host_values, int indexBase, double beta)
|
||||||
|
{ int i=0;
|
||||||
|
double *hv = (double *) host_values;
|
||||||
|
int *idx=(int *) indexes;
|
||||||
|
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
idx = &(idx[first-indexBase]);
|
||||||
|
hv = &(hv[hfirst-indexBase]);
|
||||||
|
spgpuDscat(handle, (double *) devVec->v_, n, hv, idx, indexBase, beta);
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA)
|
||||||
|
{ int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||||
|
// Note: inner kernel can handle aliased input/output
|
||||||
|
spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_,
|
||||||
|
alpha, (double *)devVecA->v_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
|
||||||
|
{ int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||||
|
|
||||||
|
spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
|
||||||
|
{ int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||||
|
|
||||||
|
spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
|
||||||
|
{ int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||||
|
|
||||||
|
spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devMultiVecB)
|
||||||
|
{int i=0;
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
|
||||||
|
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
|
||||||
|
double beta, void* devMultiVecY)
|
||||||
|
{ int j=0, i=0;
|
||||||
|
int pitch = 0;
|
||||||
|
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
|
||||||
|
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
pitch = devVecY->pitch_;
|
||||||
|
if ((n > devVecY->size_) || (n>devVecX->size_ ))
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
|
||||||
|
for(j=0;j<devVecY->count_;j++)
|
||||||
|
spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta,
|
||||||
|
(double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB)
|
||||||
|
{ int i = 0;
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||||
|
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
if ((n > devVecA->size_) || (n>devVecB->size_ ))
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
|
||||||
|
spgpuDmaxy(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_,
|
||||||
|
(double*)devVecB->v_, devVecA->count_, devVecA->pitch_);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
|
||||||
|
void *deviceVecB, double beta, void *deviceVecZ)
|
||||||
|
{ int i=0;
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||||
|
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||||
|
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ ))
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
spgpuDmaxypbz(handle, (double*)devVecZ->v_, n, beta, (double*)devVecZ->v_,
|
||||||
|
alpha, (double*) devVecA->v_, (double*) devVecB->v_,
|
||||||
|
devVecB->count_, devVecB->pitch_);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA,
|
||||||
|
void *deviceVecB)
|
||||||
|
{ int i=0;
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||||
|
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
|
||||||
|
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
if ((n > devVecA->size_) || (n>devVecB->size_ ))
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
|
||||||
|
spgpuDabs(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA)
|
||||||
|
{ int i = 0;
|
||||||
|
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
if (n > devVecA->size_)
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
|
||||||
|
spgpuDabs(handle, (double*)devVecA->v_, n, alpha, (double*)devVecA->v_);
|
||||||
|
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,75 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
//#include "utils.h"
|
||||||
|
#include "vectordev.h"
|
||||||
|
#include "cuda_runtime.h"
|
||||||
|
#include "core.h"
|
||||||
|
|
||||||
|
int registerMappedDouble(void *, void **, int, double);
|
||||||
|
int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);
|
||||||
|
int writeMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld);
|
||||||
|
int readMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);
|
||||||
|
int readMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld);
|
||||||
|
|
||||||
|
int setscalMultiVecDeviceDouble(double val, int first, int last,
|
||||||
|
int indexBase, void* devVecX);
|
||||||
|
|
||||||
|
int geinsMultiVecDeviceDouble(int n, void* devVecIrl, void* devVecVal,
|
||||||
|
int dupl, int indexBase, void* devVecX);
|
||||||
|
|
||||||
|
int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n,
|
||||||
|
int first, void* deviceIdx, int hfirst,
|
||||||
|
void* host_values, int indexBase);
|
||||||
|
int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n,
|
||||||
|
int first, void* indexes, int hfirst, void* host_values,
|
||||||
|
int indexBase);
|
||||||
|
int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first,
|
||||||
|
void *deviceIdx, int hfirst, void* host_values,
|
||||||
|
int indexBase, double beta);
|
||||||
|
int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes,
|
||||||
|
int hfirst, void* host_values, int indexBase, double beta);
|
||||||
|
|
||||||
|
int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA);
|
||||||
|
int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devVecA);
|
||||||
|
int amaxMultiVecDeviceDouble(double* y_res, int n, void* devVecA);
|
||||||
|
int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA);
|
||||||
|
int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB);
|
||||||
|
|
||||||
|
int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY);
|
||||||
|
int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB);
|
||||||
|
int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
|
||||||
|
void *deviceVecB, double beta, void *deviceVecZ);
|
||||||
|
int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA);
|
||||||
|
int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, void *deviceVecB);
|
||||||
|
|
@ -0,0 +1,686 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include "elldev.h"
|
||||||
|
|
||||||
|
#define PASS_RS 0
|
||||||
|
|
||||||
|
EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize,
|
||||||
|
unsigned int nnzeros,
|
||||||
|
unsigned int columns, unsigned int elementType,
|
||||||
|
unsigned int firstIndex)
|
||||||
|
{
|
||||||
|
EllDeviceParams params;
|
||||||
|
|
||||||
|
if (elementType == SPGPU_TYPE_DOUBLE)
|
||||||
|
{
|
||||||
|
params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S;
|
||||||
|
}
|
||||||
|
//For complex?
|
||||||
|
params.elementType = elementType;
|
||||||
|
|
||||||
|
params.rows = rows;
|
||||||
|
params.maxRowSize = maxRowSize;
|
||||||
|
params.avgRowSize = (nnzeros+rows-1)/rows;
|
||||||
|
params.columns = columns;
|
||||||
|
params.firstIndex = firstIndex;
|
||||||
|
|
||||||
|
//params.pitch = computeEllAllocPitch(rows);
|
||||||
|
|
||||||
|
return params;
|
||||||
|
|
||||||
|
}
|
||||||
|
//new
|
||||||
|
int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params)
|
||||||
|
{
|
||||||
|
struct EllDevice *tmp = (struct EllDevice *)malloc(sizeof(struct EllDevice));
|
||||||
|
*remoteMatrix = (void *)tmp;
|
||||||
|
tmp->rows = params->rows;
|
||||||
|
tmp->cMPitch = computeEllAllocPitch(tmp->rows);
|
||||||
|
tmp->rPPitch = tmp->cMPitch;
|
||||||
|
tmp->pitch= tmp->cMPitch;
|
||||||
|
tmp->maxRowSize = params->maxRowSize;
|
||||||
|
tmp->avgRowSize = params->avgRowSize;
|
||||||
|
tmp->allocsize = (int)tmp->maxRowSize * tmp->pitch;
|
||||||
|
//tmp->allocsize = (int)params->maxRowSize * tmp->cMPitch;
|
||||||
|
allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int));
|
||||||
|
tmp->columns = params->columns;
|
||||||
|
tmp->baseIndex = params->firstIndex;
|
||||||
|
tmp->dataType = params->elementType;
|
||||||
|
//fprintf(stderr,"allocEllDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize);
|
||||||
|
if (params->elementType == SPGPU_TYPE_FLOAT)
|
||||||
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
|
||||||
|
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||||||
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
|
||||||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||||
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
|
||||||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||||
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||||
|
//fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
|
||||||
|
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
//new
|
||||||
|
void zeroEllDevice(void *remoteMatrix)
|
||||||
|
{
|
||||||
|
struct EllDevice *tmp = (struct EllDevice *) remoteMatrix;
|
||||||
|
|
||||||
|
if (tmp->dataType == SPGPU_TYPE_FLOAT)
|
||||||
|
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(float));
|
||||||
|
else if (tmp->dataType == SPGPU_TYPE_DOUBLE)
|
||||||
|
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(double));
|
||||||
|
else if (tmp->dataType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||||
|
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuFloatComplex));
|
||||||
|
else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||||
|
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex));
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||||
|
//fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
|
||||||
|
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void freeEllDevice(void* remoteMatrix)
|
||||||
|
{
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) remoteMatrix;
|
||||||
|
//fprintf(stderr,"freeEllDevice\n");
|
||||||
|
if (devMat != NULL) {
|
||||||
|
freeRemoteBuffer(devMat->rS);
|
||||||
|
freeRemoteBuffer(devMat->rP);
|
||||||
|
freeRemoteBuffer(devMat->cM);
|
||||||
|
free(remoteMatrix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//new
|
||||||
|
int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize,
|
||||||
|
unsigned int nnzeros,
|
||||||
|
unsigned int columns, unsigned int elementType,
|
||||||
|
unsigned int firstIndex)
|
||||||
|
{ int i;
|
||||||
|
EllDeviceParams p;
|
||||||
|
|
||||||
|
p = getEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex);
|
||||||
|
i = allocEllDevice(deviceMat, &p);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, int* rP, int* rS,
|
||||||
|
int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
for (i=0; i<s; i++)
|
||||||
|
{
|
||||||
|
if (PASS_RS) {
|
||||||
|
spgpuSellspmv (handle, (float*) z, (float*)y, alpha, (float*) cM, rP, pitch, pitch, rS,
|
||||||
|
NULL, avgRowSize, maxRowSize, rows, (float*)x, beta, firstIndex);
|
||||||
|
} else {
|
||||||
|
spgpuSellspmv (handle, (float*) z, (float*)y, alpha, (float*) cM, rP, pitch, pitch, NULL,
|
||||||
|
NULL, avgRowSize, maxRowSize, rows, (float*)x, beta, firstIndex);
|
||||||
|
}
|
||||||
|
z += vPitch;
|
||||||
|
y += vPitch;
|
||||||
|
x += vPitch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//new
|
||||||
|
int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||||
|
float beta, void* deviceY)
|
||||||
|
{ int i=SPGPU_SUCCESS;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");
|
||||||
|
__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");
|
||||||
|
__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");
|
||||||
|
#endif
|
||||||
|
/*spgpuSellspmv (handle, (float*) y->v_, (float*)y->v_, alpha,
|
||||||
|
(float*) devMat->cM, devMat->rP, devMat->cMPitch,
|
||||||
|
devMat->rPPitch, devMat->rS, devMat->rows,
|
||||||
|
(float*)x->v_, beta, devMat->baseIndex);*/
|
||||||
|
sspmdmm_gpu ( (float *)y->v_,y->count_, y->pitch_, (float *)y->v_, alpha, (float *)devMat->cM, devMat->rP, devMat->rS,
|
||||||
|
devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch,
|
||||||
|
(float *)x->v_, beta, devMat->baseIndex);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, int* rP,
|
||||||
|
int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
|
||||||
|
double *x, double beta, int firstIndex)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
for (i=0; i<s; i++)
|
||||||
|
{
|
||||||
|
if (PASS_RS) {
|
||||||
|
spgpuDellspmv (handle, (double*) z, (double*)y, alpha, (double*) cM, rP,
|
||||||
|
pitch, pitch, rS,
|
||||||
|
NULL, avgRowSize, maxRowSize, rows, (double*)x, beta, firstIndex);
|
||||||
|
} else {
|
||||||
|
spgpuDellspmv (handle, (double*) z, (double*)y, alpha, (double*) cM, rP,
|
||||||
|
pitch, pitch, NULL,
|
||||||
|
NULL, avgRowSize, maxRowSize, rows, (double*)x, beta, firstIndex);
|
||||||
|
}
|
||||||
|
z += vPitch;
|
||||||
|
y += vPitch;
|
||||||
|
x += vPitch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//new
|
||||||
|
int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||||
|
double beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
|
||||||
|
/*spgpuDellspmv (handle, (double*) y->v_, (double*)y->v_, alpha, (double*) devMat->cM, devMat->rP, devMat->cMPitch, devMat->rPPitch, devMat->rS, devMat->rows, (double*)x->v_, beta, devMat->baseIndex);*/
|
||||||
|
/* fprintf(stderr,"From spmvEllDouble: mat %d %d %d %d y %d %d \n", */
|
||||||
|
/* devMat->avgRowSize, devMat->maxRowSize, devMat->rows, */
|
||||||
|
/* devMat->pitch, y->count_, y->pitch_); */
|
||||||
|
dspmdmm_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_,
|
||||||
|
alpha, (double *)devMat->cM,
|
||||||
|
devMat->rP, devMat->rS, devMat->avgRowSize,
|
||||||
|
devMat->maxRowSize, devMat->rows, devMat->pitch,
|
||||||
|
(double *)x->v_, beta, devMat->baseIndex);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y,
|
||||||
|
cuFloatComplex alpha, cuFloatComplex* cM,
|
||||||
|
int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
|
||||||
|
cuFloatComplex *x, cuFloatComplex beta, int firstIndex)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
for (i=0; i<s; i++)
|
||||||
|
{
|
||||||
|
if (PASS_RS) {
|
||||||
|
spgpuCellspmv (handle, (cuFloatComplex *) z, (cuFloatComplex *)y, alpha, (cuFloatComplex *) cM, rP,
|
||||||
|
pitch, pitch, rS, NULL, avgRowSize, maxRowSize, rows, (cuFloatComplex *) x, beta, firstIndex);
|
||||||
|
} else {
|
||||||
|
spgpuCellspmv (handle, (cuFloatComplex *) z, (cuFloatComplex *)y, alpha, (cuFloatComplex *) cM, rP,
|
||||||
|
pitch, pitch, NULL, NULL, avgRowSize, maxRowSize, rows, (cuFloatComplex *) x, beta, firstIndex);
|
||||||
|
}
|
||||||
|
z += vPitch;
|
||||||
|
y += vPitch;
|
||||||
|
x += vPitch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
|
||||||
|
float complex beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
|
||||||
|
cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
|
||||||
|
cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta));
|
||||||
|
cspmdmm_gpu ((cuFloatComplex *)y->v_, y->count_, y->pitch_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM,
|
||||||
|
devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch,
|
||||||
|
(cuFloatComplex *)x->v_, b, devMat->baseIndex);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* cM,
|
||||||
|
int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
|
||||||
|
cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
for (i=0; i<s; i++)
|
||||||
|
{
|
||||||
|
if (PASS_RS) {
|
||||||
|
spgpuZellspmv (handle, (cuDoubleComplex *) z, (cuDoubleComplex *)y, alpha, (cuDoubleComplex *) cM, rP,
|
||||||
|
pitch, pitch, rS, NULL, avgRowSize, maxRowSize, rows, (cuDoubleComplex *) x, beta, firstIndex);
|
||||||
|
} else {
|
||||||
|
spgpuZellspmv (handle, (cuDoubleComplex *) z, (cuDoubleComplex *)y, alpha, (cuDoubleComplex *) cM, rP,
|
||||||
|
pitch, pitch, NULL, NULL, avgRowSize, maxRowSize, rows, (cuDoubleComplex *) x, beta, firstIndex);
|
||||||
|
}
|
||||||
|
z += vPitch;
|
||||||
|
y += vPitch;
|
||||||
|
x += vPitch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
|
||||||
|
double complex beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
|
||||||
|
cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
|
||||||
|
cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta));
|
||||||
|
zspmdmm_gpu ((cuDoubleComplex *)y->v_, y->count_, y->pitch_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM,
|
||||||
|
devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows,
|
||||||
|
devMat->pitch, (cuDoubleComplex *)x->v_, b, devMat->baseIndex);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
// Ex updateFromHost function
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
//i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
// Ex updateFromHost function
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
// Ex updateFromHost function
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
||||||
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
// Ex updateFromHost function
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
||||||
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||||||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
/*i = readEllDevice(deviceMat, (void *) val, ja, irn);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||||||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
||||||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
||||||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getEllDevicePitch(void* deviceMat)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
i = devMat->pitch; //old
|
||||||
|
//i = getPitchEllDevice(deviceMat);
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int getEllDeviceMaxRowSize(void* deviceMat)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
i = devMat->maxRowSize;
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// New copying interface
|
||||||
|
|
||||||
|
int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||||
|
int *idisp, int *ja, float *val, void *deviceMat)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
float *devVal;
|
||||||
|
int *devIdisp, *devJa;
|
||||||
|
spgpuHandle_t handle;
|
||||||
|
handle = psb_cudaGetHandle();
|
||||||
|
|
||||||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float));
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||||
|
|
||||||
|
if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
||||||
|
(int *) devMat->rS,devIdisp,devJa,devVal,
|
||||||
|
(int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM);
|
||||||
|
// Ex updateFromHost function
|
||||||
|
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||||||
|
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
|
||||||
|
|
||||||
|
freeRemoteBuffer(devIdisp);
|
||||||
|
freeRemoteBuffer(devJa);
|
||||||
|
freeRemoteBuffer(devVal);
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||||
|
int *idisp, int *ja, double *val, void *deviceMat)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
double *devVal;
|
||||||
|
int *devIdisp, *devJa;
|
||||||
|
spgpuHandle_t handle;
|
||||||
|
handle = psb_cudaGetHandle();
|
||||||
|
|
||||||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double));
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||||
|
|
||||||
|
if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
||||||
|
(int *) devMat->rS,devIdisp,devJa,devVal,
|
||||||
|
(int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM);
|
||||||
|
// Ex updateFromHost function
|
||||||
|
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||||||
|
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
|
||||||
|
|
||||||
|
freeRemoteBuffer(devIdisp);
|
||||||
|
freeRemoteBuffer(devJa);
|
||||||
|
freeRemoteBuffer(devVal);
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||||
|
int *idisp, int *ja, float complex *val, void *deviceMat)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
float complex *devVal;
|
||||||
|
int *devIdisp, *devJa;
|
||||||
|
spgpuHandle_t handle;
|
||||||
|
handle = psb_cudaGetHandle();
|
||||||
|
|
||||||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex));
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||||
|
|
||||||
|
if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
||||||
|
(int *) devMat->rS,devIdisp,devJa,devVal,
|
||||||
|
(int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM);
|
||||||
|
// Ex updateFromHost function
|
||||||
|
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float complex));
|
||||||
|
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
|
||||||
|
|
||||||
|
freeRemoteBuffer(devIdisp);
|
||||||
|
freeRemoteBuffer(devJa);
|
||||||
|
freeRemoteBuffer(devVal);
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloatComplex",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||||
|
int *idisp, int *ja, double complex *val, void *deviceMat)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
double complex *devVal;
|
||||||
|
int *devIdisp, *devJa;
|
||||||
|
spgpuHandle_t handle;
|
||||||
|
handle = psb_cudaGetHandle();
|
||||||
|
|
||||||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex));
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||||
|
|
||||||
|
if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
||||||
|
(int *) devMat->rS,devIdisp,devJa,devVal,
|
||||||
|
(int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM);
|
||||||
|
// Ex updateFromHost function
|
||||||
|
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double complex));
|
||||||
|
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
|
||||||
|
|
||||||
|
freeRemoteBuffer(devIdisp);
|
||||||
|
freeRemoteBuffer(devJa);
|
||||||
|
freeRemoteBuffer(devVal);
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDoubleComplex",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *val)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
||||||
|
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
||||||
|
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
||||||
|
float alpha=1.0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
if (nnz <=0) return SPGPU_SUCCESS;
|
||||||
|
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
||||||
|
|
||||||
|
spgpuSellcsput(handle,alpha,(float *) devMat->cM,
|
||||||
|
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
||||||
|
nnz, devIa->v_, devJa->v_, (float *) devVal->v_, 1);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void *val)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
||||||
|
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
||||||
|
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
||||||
|
double alpha=1.0;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
if (nnz <=0) return SPGPU_SUCCESS;
|
||||||
|
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
||||||
|
|
||||||
|
spgpuDellcsput(handle,alpha,(double *) devMat->cM,
|
||||||
|
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
||||||
|
nnz, devIa->v_, devJa->v_, (double *) devVal->v_, 1);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz,
|
||||||
|
void *ia, void *ja, void *val)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
||||||
|
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
||||||
|
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
||||||
|
cuFloatComplex alpha = make_cuFloatComplex(1.0, 0.0);
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
if (nnz <=0) return SPGPU_SUCCESS;
|
||||||
|
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
||||||
|
|
||||||
|
spgpuCellcsput(handle,alpha,(cuFloatComplex *) devMat->cM,
|
||||||
|
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
||||||
|
nnz, devIa->v_, devJa->v_, (cuFloatComplex *) devVal->v_, 1);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz,
|
||||||
|
void *ia, void *ja, void *val)
|
||||||
|
{ int i;
|
||||||
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
||||||
|
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
||||||
|
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
||||||
|
cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
if (nnz <=0) return SPGPU_SUCCESS;
|
||||||
|
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
||||||
|
|
||||||
|
spgpuZellcsput(handle,alpha,(cuDoubleComplex *) devMat->cM,
|
||||||
|
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
||||||
|
nnz, devIa->v_, devJa->v_, (cuDoubleComplex *) devVal->v_, 1);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,177 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _ELLDEV_H_
|
||||||
|
#define _ELLDEV_H_
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "cuComplex.h"
|
||||||
|
#include "ell.h"
|
||||||
|
|
||||||
|
struct EllDevice
|
||||||
|
{
|
||||||
|
// Compressed matrix
|
||||||
|
void *cM; //it can be float or double
|
||||||
|
|
||||||
|
// row pointers (same size of cM)
|
||||||
|
int *rP;
|
||||||
|
int *diag;
|
||||||
|
// row size
|
||||||
|
int *rS;
|
||||||
|
|
||||||
|
//matrix size (uncompressed)
|
||||||
|
int rows;
|
||||||
|
int columns;
|
||||||
|
|
||||||
|
int pitch; //old
|
||||||
|
|
||||||
|
int cMPitch;
|
||||||
|
|
||||||
|
int rPPitch;
|
||||||
|
|
||||||
|
int maxRowSize;
|
||||||
|
int avgRowSize;
|
||||||
|
|
||||||
|
//allocation size (in elements)
|
||||||
|
int allocsize;
|
||||||
|
|
||||||
|
/*(i.e. 0 for C, 1 for Fortran)*/
|
||||||
|
int baseIndex;
|
||||||
|
/* real/complex, single/double */
|
||||||
|
int dataType;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct EllDeviceParams
|
||||||
|
{
|
||||||
|
// The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType)
|
||||||
|
unsigned int elementType;
|
||||||
|
|
||||||
|
// Pitch (in number of elements)
|
||||||
|
unsigned int pitch;
|
||||||
|
|
||||||
|
// Number of rows.
|
||||||
|
// Used to allocate rS array
|
||||||
|
unsigned int rows;
|
||||||
|
|
||||||
|
// Number of columns.
|
||||||
|
// Used for error-checking
|
||||||
|
unsigned int columns;
|
||||||
|
|
||||||
|
// Largest row size
|
||||||
|
unsigned int maxRowSize;
|
||||||
|
unsigned int avgRowSize;
|
||||||
|
|
||||||
|
// First index (e.g 0 or 1)
|
||||||
|
unsigned int firstIndex;
|
||||||
|
} EllDeviceParams;
|
||||||
|
|
||||||
|
int FallocEllDevice(void** deviceMat, unsigned int rows, unsigned int maxRowSize,
|
||||||
|
unsigned int nnzeros,
|
||||||
|
unsigned int columns, unsigned int elementType,
|
||||||
|
unsigned int firstIndex);
|
||||||
|
int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params);
|
||||||
|
void freeEllDevice(void* remoteMatrix);
|
||||||
|
|
||||||
|
int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag);
|
||||||
|
int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag);
|
||||||
|
int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag);
|
||||||
|
int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag);
|
||||||
|
|
||||||
|
int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag);
|
||||||
|
int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag);
|
||||||
|
int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag);
|
||||||
|
int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag);
|
||||||
|
|
||||||
|
int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||||
|
float beta, void* deviceY);
|
||||||
|
int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||||
|
double beta, void* deviceY);
|
||||||
|
int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
|
||||||
|
float complex beta, void* deviceY);
|
||||||
|
int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
|
||||||
|
double complex beta, void* deviceY);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||||
|
int *idisp, int *ja, float *val, void *deviceMat);
|
||||||
|
|
||||||
|
int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||||
|
int *idisp, int *ja, double *val, void *deviceMat);
|
||||||
|
|
||||||
|
int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||||
|
int *idisp, int *ja, float complex *val, void *deviceMat);
|
||||||
|
|
||||||
|
int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
||||||
|
int *idisp, int *ja, double complex *val, void *deviceMat);
|
||||||
|
|
||||||
|
|
||||||
|
void psi_cuda_s_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
|
||||||
|
int hacksz, int ldv, int nzm,
|
||||||
|
int *rS,int *devIdisp, int *devJa, float *devVal,
|
||||||
|
int *idiag, int *rP, float *cM);
|
||||||
|
|
||||||
|
void psi_cuda_d_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
|
||||||
|
int hacksz, int ldv, int nzm,
|
||||||
|
int *rS,int *devIdisp, int *devJa, double *devVal,
|
||||||
|
int *idiag, int *rP, double *cM);
|
||||||
|
|
||||||
|
void psi_cuda_c_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
|
||||||
|
int hacksz, int ldv, int nzm,
|
||||||
|
int *rS,int *devIdisp, int *devJa, float complex *devVal,
|
||||||
|
int *idiag, int *rP, float complex *cM);
|
||||||
|
|
||||||
|
void psi_cuda_z_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx,
|
||||||
|
int hacksz, int ldv, int nzm,
|
||||||
|
int *rS,int *devIdisp, int *devJa, double complex *devVal,
|
||||||
|
int *idiag, int *rP, double complex *cM);
|
||||||
|
|
||||||
|
|
||||||
|
int dev_csputEllDeviceFloat(void* deviceMat, int nnz,
|
||||||
|
void *ia, void *ja, void *val);
|
||||||
|
int dev_csputEllDeviceDouble(void* deviceMat, int nnz,
|
||||||
|
void *ia, void *ja, void *val);
|
||||||
|
int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz,
|
||||||
|
void *ia, void *ja, void *val);
|
||||||
|
int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz,
|
||||||
|
void *ia, void *ja, void *val);
|
||||||
|
|
||||||
|
void zeroEllDevice(void* deviceMat);
|
||||||
|
|
||||||
|
int getEllDevicePitch(void* deviceMat);
|
||||||
|
|
||||||
|
// sparse Ell matrix-vector product
|
||||||
|
//int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY);
|
||||||
|
//int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY);
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,321 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
module elldev_mod
|
||||||
|
use iso_c_binding
|
||||||
|
use core_mod
|
||||||
|
|
||||||
|
type, bind(c) :: elldev_parms
|
||||||
|
integer(c_int) :: element_type
|
||||||
|
integer(c_int) :: pitch
|
||||||
|
integer(c_int) :: rows
|
||||||
|
integer(c_int) :: columns
|
||||||
|
integer(c_int) :: maxRowSize
|
||||||
|
integer(c_int) :: avgRowSize
|
||||||
|
integer(c_int) :: firstIndex
|
||||||
|
end type elldev_parms
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FgetEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex) &
|
||||||
|
& result(res) bind(c,name='getEllDeviceParams')
|
||||||
|
use iso_c_binding
|
||||||
|
import :: elldev_parms
|
||||||
|
type(elldev_parms) :: res
|
||||||
|
integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex
|
||||||
|
end function FgetEllDeviceParams
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FallocEllDevice(deviceMat,rows,maxRowSize,nnzeros,columns,&
|
||||||
|
& elementType,firstIndex) &
|
||||||
|
& result(res) bind(c,name='FallocEllDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex
|
||||||
|
type(c_ptr) :: deviceMat
|
||||||
|
end function FallocEllDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface writeEllDevice
|
||||||
|
|
||||||
|
function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='writeEllDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: ldj
|
||||||
|
real(c_float) :: val(ldj,*)
|
||||||
|
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||||
|
end function writeEllDeviceFloat
|
||||||
|
|
||||||
|
function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='writeEllDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: ldj
|
||||||
|
real(c_double) :: val(ldj,*)
|
||||||
|
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||||
|
end function writeEllDeviceDouble
|
||||||
|
|
||||||
|
function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='writeEllDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: ldj
|
||||||
|
complex(c_float_complex) :: val(ldj,*)
|
||||||
|
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||||
|
end function writeEllDeviceFloatComplex
|
||||||
|
|
||||||
|
function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='writeEllDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: ldj
|
||||||
|
complex(c_double_complex) :: val(ldj,*)
|
||||||
|
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||||
|
end function writeEllDeviceDoubleComplex
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface readEllDevice
|
||||||
|
|
||||||
|
function readEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='readEllDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: ldj
|
||||||
|
real(c_float) :: val(ldj,*)
|
||||||
|
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||||
|
end function readEllDeviceFloat
|
||||||
|
|
||||||
|
function readEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='readEllDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: ldj
|
||||||
|
real(c_double) :: val(ldj,*)
|
||||||
|
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||||
|
end function readEllDeviceDouble
|
||||||
|
|
||||||
|
function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='readEllDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: ldj
|
||||||
|
complex(c_float_complex) :: val(ldj,*)
|
||||||
|
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||||
|
end function readEllDeviceFloatComplex
|
||||||
|
|
||||||
|
function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='readEllDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int), value :: ldj
|
||||||
|
complex(c_double_complex) :: val(ldj,*)
|
||||||
|
integer(c_int) :: ja(ldj,*),irn(*),idiag(*)
|
||||||
|
end function readEllDeviceDoubleComplex
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
subroutine freeEllDevice(deviceMat) &
|
||||||
|
& bind(c,name='freeEllDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
end subroutine freeEllDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
subroutine zeroEllDevice(deviceMat) &
|
||||||
|
& bind(c,name='zeroEllDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
end subroutine zeroEllDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
subroutine resetEllTimer() bind(c,name='resetEllTimer')
|
||||||
|
use iso_c_binding
|
||||||
|
end subroutine resetEllTimer
|
||||||
|
end interface
|
||||||
|
interface
|
||||||
|
function getEllTimer() &
|
||||||
|
& bind(c,name='getEllTimer') result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
real(c_double) :: res
|
||||||
|
end function getEllTimer
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface
|
||||||
|
function getEllDevicePitch(deviceMat) &
|
||||||
|
& bind(c,name='getEllDevicePitch') result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function getEllDevicePitch
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
function getEllDeviceMaxRowSize(deviceMat) &
|
||||||
|
& bind(c,name='getEllDeviceMaxRowSize') result(res)
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int) :: res
|
||||||
|
end function getEllDeviceMaxRowSize
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface psi_CopyCooToElg
|
||||||
|
function psiCopyCooToElgFloat(nr, nc, nza, hacksz, ldv, nzm, irn, &
|
||||||
|
& idisp, ja, val, deviceMat) &
|
||||||
|
& result(res) bind(c,name='psiCopyCooToElgFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_float) :: val(*)
|
||||||
|
integer(c_int) :: irn(*),idisp(*),ja(*)
|
||||||
|
end function psiCopyCooToElgFloat
|
||||||
|
function psiCopyCooToElgDouble(nr, nc, nza, hacksz, ldv, nzm, irn, &
|
||||||
|
& idisp, ja, val, deviceMat) &
|
||||||
|
& result(res) bind(c,name='psiCopyCooToElgDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_double) :: val(*)
|
||||||
|
integer(c_int) :: irn(*),idisp(*),ja(*)
|
||||||
|
end function psiCopyCooToElgDouble
|
||||||
|
function psiCopyCooToElgFloatComplex(nr, nc, nza, hacksz, ldv, nzm, irn, &
|
||||||
|
& idisp, ja, val, deviceMat) &
|
||||||
|
& result(res) bind(c,name='psiCopyCooToElgFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
complex(c_float_complex) :: val(*)
|
||||||
|
integer(c_int) :: irn(*),idisp(*),ja(*)
|
||||||
|
end function psiCopyCooToElgFloatComplex
|
||||||
|
function psiCopyCooToElgDoubleComplex(nr, nc, nza, hacksz, ldv, nzm, irn, &
|
||||||
|
& idisp, ja, val, deviceMat) &
|
||||||
|
& result(res) bind(c,name='psiCopyCooToElgDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
complex(c_double_complex) :: val(*)
|
||||||
|
integer(c_int) :: irn(*),idisp(*),ja(*)
|
||||||
|
end function psiCopyCooToElgDoubleComplex
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface csputEllDeviceFloat
|
||||||
|
function dev_csputEllDeviceFloat(deviceMat, nnz, ia, ja, val) &
|
||||||
|
& result(res) bind(c,name='dev_csputEllDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat , ia, ja, val
|
||||||
|
integer(c_int), value :: nnz
|
||||||
|
end function dev_csputEllDeviceFloat
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface csputEllDeviceDouble
|
||||||
|
function dev_csputEllDeviceDouble(deviceMat, nnz, ia, ja, val) &
|
||||||
|
& result(res) bind(c,name='dev_csputEllDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat , ia, ja, val
|
||||||
|
integer(c_int), value :: nnz
|
||||||
|
end function dev_csputEllDeviceDouble
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface csputEllDeviceFloatComplex
|
||||||
|
function dev_csputEllDeviceFloatComplex(deviceMat, nnz, ia, ja, val) &
|
||||||
|
& result(res) bind(c,name='dev_csputEllDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat , ia, ja, val
|
||||||
|
integer(c_int), value :: nnz
|
||||||
|
end function dev_csputEllDeviceFloatComplex
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface csputEllDeviceDoubleComplex
|
||||||
|
function dev_csputEllDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) &
|
||||||
|
& result(res) bind(c,name='dev_csputEllDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat , ia, ja, val
|
||||||
|
integer(c_int), value :: nnz
|
||||||
|
end function dev_csputEllDeviceDoubleComplex
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface spmvEllDevice
|
||||||
|
function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvEllDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
real(c_float),value :: alpha, beta
|
||||||
|
end function spmvEllDeviceFloat
|
||||||
|
function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvEllDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
real(c_double),value :: alpha, beta
|
||||||
|
end function spmvEllDeviceDouble
|
||||||
|
function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvEllDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
complex(c_float_complex),value :: alpha, beta
|
||||||
|
end function spmvEllDeviceFloatComplex
|
||||||
|
function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvEllDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
complex(c_double_complex),value :: alpha, beta
|
||||||
|
end function spmvEllDeviceDoubleComplex
|
||||||
|
end interface
|
||||||
|
|
||||||
|
end module elldev_mod
|
@ -0,0 +1,76 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "fcusparse.h"
|
||||||
|
|
||||||
|
static cusparseHandle_t *cusparse_handle=NULL;
|
||||||
|
|
||||||
|
|
||||||
|
void setHandle(cusparseHandle_t);
|
||||||
|
|
||||||
|
int FcusparseCreate()
|
||||||
|
{
|
||||||
|
int ret=CUSPARSE_STATUS_SUCCESS;
|
||||||
|
cusparseHandle_t *handle;
|
||||||
|
if (cusparse_handle == NULL) {
|
||||||
|
if ((handle = (cusparseHandle_t *)malloc(sizeof(cusparseHandle_t)))==NULL)
|
||||||
|
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
|
||||||
|
ret = (int)cusparseCreate(handle);
|
||||||
|
if (ret == CUSPARSE_STATUS_SUCCESS)
|
||||||
|
cusparse_handle = handle;
|
||||||
|
}
|
||||||
|
fprintf(stderr,"Created cusparses_handle\n");
|
||||||
|
return (ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
int FcusparseDestroy()
|
||||||
|
{
|
||||||
|
int val;
|
||||||
|
if (cusparse_handle!=NULL){
|
||||||
|
val = (int) cusparseDestroy(*cusparse_handle);
|
||||||
|
free(cusparse_handle);
|
||||||
|
}
|
||||||
|
cusparse_handle=NULL;
|
||||||
|
return(val);
|
||||||
|
}
|
||||||
|
cusparseHandle_t *getHandle()
|
||||||
|
{
|
||||||
|
if (cusparse_handle == NULL)
|
||||||
|
FcusparseCreate();
|
||||||
|
return(cusparse_handle);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,68 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef FCUSPARSE_
|
||||||
|
#define FCUSPARSE_
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
#include <cusparse_v2.h>
|
||||||
|
#else
|
||||||
|
#include <cusparse.h>
|
||||||
|
#endif
|
||||||
|
#include "cintrf.h"
|
||||||
|
|
||||||
|
int FcusparseCreate();
|
||||||
|
int FcusparseDestroy();
|
||||||
|
cusparseHandle_t *getHandle();
|
||||||
|
|
||||||
|
#define CHECK_CUDA(func) \
|
||||||
|
{ \
|
||||||
|
cudaError_t status = (func); \
|
||||||
|
if (status != cudaSuccess) { \
|
||||||
|
printf("CUDA API failed at line %d with error: %s (%d)\n", \
|
||||||
|
__LINE__, cudaGetErrorString(status), status); \
|
||||||
|
return EXIT_FAILURE; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CHECK_CUSPARSE(func) \
|
||||||
|
{ \
|
||||||
|
cusparseStatus_t status = (func); \
|
||||||
|
if (status != CUSPARSE_STATUS_SUCCESS) { \
|
||||||
|
printf("CUSPARSE API failed at line %d with error: %s (%d)\n", \
|
||||||
|
__LINE__, cusparseGetErrorString(status), status); \
|
||||||
|
return EXIT_FAILURE; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,824 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
typedef struct T_CSRGDeviceMat
|
||||||
|
{
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
cusparseMatDescr_t descr;
|
||||||
|
cusparseSolveAnalysisInfo_t triang;
|
||||||
|
#elif CUDA_VERSION < 11030
|
||||||
|
cusparseMatDescr_t descr;
|
||||||
|
csrsv2Info_t triang;
|
||||||
|
size_t mvbsize, svbsize;
|
||||||
|
void *mvbuffer, *svbuffer;
|
||||||
|
#else
|
||||||
|
cusparseSpMatDescr_t *spmvDescr;
|
||||||
|
cusparseSpSVDescr_t *spsvDescr;
|
||||||
|
size_t mvbsize, svbsize;
|
||||||
|
void *mvbuffer, *svbuffer;
|
||||||
|
#endif
|
||||||
|
int m, n, nz;
|
||||||
|
TYPE *val;
|
||||||
|
int *irp;
|
||||||
|
int *ja;
|
||||||
|
} T_CSRGDeviceMat;
|
||||||
|
|
||||||
|
/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */
|
||||||
|
typedef struct T_Cmat
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *mat;
|
||||||
|
} T_Cmat;
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
typedef struct T_HYBGDeviceMat
|
||||||
|
{
|
||||||
|
cusparseMatDescr_t descr;
|
||||||
|
cusparseSolveAnalysisInfo_t triang;
|
||||||
|
cusparseHybMat_t hybA;
|
||||||
|
int m, n, nz;
|
||||||
|
TYPE *val;
|
||||||
|
int *irp;
|
||||||
|
int *ja;
|
||||||
|
} T_HYBGDeviceMat;
|
||||||
|
|
||||||
|
|
||||||
|
/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */
|
||||||
|
typedef struct T_Hmat
|
||||||
|
{
|
||||||
|
T_HYBGDeviceMat *mat;
|
||||||
|
} T_Hmat;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX,
|
||||||
|
TYPE beta, void *deviceY);
|
||||||
|
int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX,
|
||||||
|
TYPE beta, void *deviceY);
|
||||||
|
int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz);
|
||||||
|
int T_CSRGDeviceFree(T_Cmat *Mat);
|
||||||
|
|
||||||
|
|
||||||
|
int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz,
|
||||||
|
int *irp, int *ja, TYPE *val);
|
||||||
|
int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz,
|
||||||
|
int *irp, int *ja, TYPE *val);
|
||||||
|
|
||||||
|
int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz);
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type);
|
||||||
|
int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type);
|
||||||
|
int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type);
|
||||||
|
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type);
|
||||||
|
int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat);
|
||||||
|
#elif CUDA_VERSION < 11030
|
||||||
|
int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type);
|
||||||
|
int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type);
|
||||||
|
int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type);
|
||||||
|
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type);
|
||||||
|
#else
|
||||||
|
|
||||||
|
int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat);
|
||||||
|
int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat);
|
||||||
|
int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat);
|
||||||
|
int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
|
||||||
|
|
||||||
|
int T_HYBGDeviceFree(T_Hmat *Matrix);
|
||||||
|
int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
|
||||||
|
TYPE beta, void *deviceY);
|
||||||
|
int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz);
|
||||||
|
int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type);
|
||||||
|
int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type);
|
||||||
|
int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type);
|
||||||
|
int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type);
|
||||||
|
int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix);
|
||||||
|
int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
|
||||||
|
TYPE beta, void *deviceY);
|
||||||
|
int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz,
|
||||||
|
int *irp, int *ja, TYPE *val);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
|
||||||
|
TYPE beta, void *deviceY)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat=Matrix->mat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
void *vX, *vY;
|
||||||
|
int r,n;
|
||||||
|
cusparseHandle_t *my_handle=getHandle();
|
||||||
|
TYPE ealpha=alpha, ebeta=beta;
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
/* getAddrMultiVecDevice(deviceX, &vX); */
|
||||||
|
/* getAddrMultiVecDevice(deviceY, &vY); */
|
||||||
|
vX=x->v_;
|
||||||
|
vY=y->v_;
|
||||||
|
|
||||||
|
CHECK_CUSPARSE(cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr,
|
||||||
|
cMat->val, cMat->irp, cMat->ja,
|
||||||
|
(const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY));
|
||||||
|
|
||||||
|
#elif CUDA_VERSION < 11030
|
||||||
|
size_t bfsz;
|
||||||
|
vX=x->v_;
|
||||||
|
vY=y->v_;
|
||||||
|
#if 1
|
||||||
|
CHECK_CUSPARSE(cusparseCsrmvEx_bufferSize(*my_handle,CUSPARSE_ALG_MERGE_PATH,
|
||||||
|
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
cMat->m,cMat->n,cMat->nz,
|
||||||
|
(const void *) &ealpha,CUSPARSE_BASE_TYPE,
|
||||||
|
cMat->descr,
|
||||||
|
(const void *) cMat->val,
|
||||||
|
CUSPARSE_BASE_TYPE,
|
||||||
|
(const int *) cMat->irp,
|
||||||
|
(const int *) cMat->ja,
|
||||||
|
(const void *) vX, CUSPARSE_BASE_TYPE,
|
||||||
|
(const void *) &ebeta, CUSPARSE_BASE_TYPE,
|
||||||
|
(void *) vY, CUSPARSE_BASE_TYPE,
|
||||||
|
CUSPARSE_BASE_TYPE, &bfsz));
|
||||||
|
#else
|
||||||
|
bfsz=cMat->nz;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (bfsz > cMat->mvbsize) {
|
||||||
|
if (cMat->mvbuffer != NULL) {
|
||||||
|
CHECK_CUDA(cudaFree(cMat->mvbuffer));
|
||||||
|
cMat->mvbuffer = NULL;
|
||||||
|
}
|
||||||
|
CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz));
|
||||||
|
cMat->mvbsize = bfsz;
|
||||||
|
}
|
||||||
|
CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle,
|
||||||
|
CUSPARSE_ALG_MERGE_PATH,
|
||||||
|
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
cMat->m,cMat->n,cMat->nz,
|
||||||
|
(const void *) &ealpha,CUSPARSE_BASE_TYPE,
|
||||||
|
cMat->descr,
|
||||||
|
(const void *) cMat->val, CUSPARSE_BASE_TYPE,
|
||||||
|
(const int *) cMat->irp, (const int *) cMat->ja,
|
||||||
|
(const void *) vX, CUSPARSE_BASE_TYPE,
|
||||||
|
(const void *) &ebeta, CUSPARSE_BASE_TYPE,
|
||||||
|
(void *) vY, CUSPARSE_BASE_TYPE,
|
||||||
|
CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer));
|
||||||
|
|
||||||
|
#else
|
||||||
|
cusparseDnVecDescr_t vecX, vecY;
|
||||||
|
size_t bfsz;
|
||||||
|
|
||||||
|
if (T_CSRGIsNullMvDescr(cMat)) {
|
||||||
|
cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *));
|
||||||
|
}
|
||||||
|
T_CSRGCreateSpMVDescr(cMat);
|
||||||
|
vX=x->v_;
|
||||||
|
vY=y->v_;
|
||||||
|
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
|
||||||
|
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
|
||||||
|
CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
&alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
|
||||||
|
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
|
||||||
|
&bfsz));
|
||||||
|
if (bfsz > cMat->mvbsize) {
|
||||||
|
if (cMat->mvbuffer != NULL) {
|
||||||
|
CHECK_CUDA(cudaFree(cMat->mvbuffer));
|
||||||
|
cMat->mvbuffer = NULL;
|
||||||
|
}
|
||||||
|
CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz));
|
||||||
|
cMat->mvbsize = bfsz;
|
||||||
|
}
|
||||||
|
CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
&alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
|
||||||
|
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
|
||||||
|
cMat->mvbuffer));
|
||||||
|
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
|
||||||
|
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
|
||||||
|
CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr)));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
|
||||||
|
TYPE beta, void *deviceY)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat=Matrix->mat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
void *vX, *vY;
|
||||||
|
int r,n;
|
||||||
|
cusparseHandle_t *my_handle=getHandle();
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
vX=x->v_;
|
||||||
|
vY=y->v_;
|
||||||
|
|
||||||
|
return cusparseTcsrsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
cMat->m,(const TYPE *) &alpha,cMat->descr,
|
||||||
|
cMat->val, cMat->irp, cMat->ja, cMat->triang,
|
||||||
|
(const TYPE *) vX, (TYPE *) vY);
|
||||||
|
#elif CUDA_VERSION < 11030
|
||||||
|
vX=x->v_;
|
||||||
|
vY=y->v_;
|
||||||
|
CHECK_CUSPARSE(cusparseTcsrsv2_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
cMat->m,cMat->nz,
|
||||||
|
(const TYPE *) &alpha,
|
||||||
|
cMat->descr,
|
||||||
|
cMat->val, cMat->irp, cMat->ja,
|
||||||
|
cMat->triang,
|
||||||
|
(const TYPE *) vX, (TYPE *) vY,
|
||||||
|
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||||
|
(void *) cMat->svbuffer));
|
||||||
|
#else
|
||||||
|
cusparseDnVecDescr_t vecX, vecY;
|
||||||
|
size_t bfsz;
|
||||||
|
vX=x->v_;
|
||||||
|
vY=y->v_;
|
||||||
|
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
|
||||||
|
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
|
||||||
|
if (T_CSRGIsNullMvDescr(cMat)) {
|
||||||
|
cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *));
|
||||||
|
}
|
||||||
|
T_CSRGCreateSpMVDescr(cMat);
|
||||||
|
// fprintf(stderr,"Entry to SpSVDevice: %d %p\n",
|
||||||
|
// T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr);
|
||||||
|
if (T_CSRGIsNullSvDescr(cMat)) {
|
||||||
|
cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *));
|
||||||
|
cMat->svbsize=0;
|
||||||
|
CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) );
|
||||||
|
//fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n",
|
||||||
|
// T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize);
|
||||||
|
CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
&alpha,*(cMat->spmvDescr),vecX,vecY,
|
||||||
|
CUSPARSE_BASE_TYPE,
|
||||||
|
CUSPARSE_SPSV_ALG_DEFAULT,
|
||||||
|
*(cMat->spsvDescr),
|
||||||
|
&bfsz));
|
||||||
|
if (bfsz > cMat->svbsize) {
|
||||||
|
if (cMat->svbuffer != NULL) {
|
||||||
|
CHECK_CUDA(cudaFree(cMat->svbuffer));
|
||||||
|
cMat->svbuffer = NULL;
|
||||||
|
}
|
||||||
|
CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz));
|
||||||
|
cMat->svbsize=bfsz;
|
||||||
|
CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle,
|
||||||
|
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
&alpha,
|
||||||
|
*(cMat->spmvDescr),
|
||||||
|
vecX, vecY,
|
||||||
|
CUSPARSE_BASE_TYPE,
|
||||||
|
CUSPARSE_SPSV_ALG_DEFAULT,
|
||||||
|
*(cMat->spsvDescr),
|
||||||
|
cMat->svbuffer));
|
||||||
|
}
|
||||||
|
if (T_CSRGIsNullSvBuffer(cMat)) {
|
||||||
|
fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
&alpha,*(cMat->spmvDescr),vecX,vecY,
|
||||||
|
CUSPARSE_BASE_TYPE,
|
||||||
|
CUSPARSE_SPSV_ALG_DEFAULT,
|
||||||
|
*(cMat->spsvDescr)));
|
||||||
|
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
|
||||||
|
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
|
||||||
|
CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr)));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#if CUDA_VERSION >= 11030
|
||||||
|
T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat)
|
||||||
|
{
|
||||||
|
int64_t tr,tc,tz;
|
||||||
|
tr = cMat->m;
|
||||||
|
tc = cMat->n;
|
||||||
|
tz = cMat->nz;
|
||||||
|
CHECK_CUSPARSE(cusparseCreateCsr(cMat->spmvDescr,
|
||||||
|
tr,tc,tz,
|
||||||
|
(void *) cMat->irp,
|
||||||
|
(void *) cMat->ja,
|
||||||
|
(void *) cMat->val,
|
||||||
|
CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_BASE_ONE,
|
||||||
|
CUSPARSE_BASE_TYPE) );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat;
|
||||||
|
int nr1=nr, nz1=nz, rc;
|
||||||
|
cusparseHandle_t *my_handle=getHandle();
|
||||||
|
int bfsz;
|
||||||
|
|
||||||
|
if ((nr<0)||(nc<0)||(nz<0))
|
||||||
|
return((int) CUSPARSE_STATUS_INVALID_VALUE);
|
||||||
|
if ((cMat=(T_CSRGDeviceMat *) malloc(sizeof(T_CSRGDeviceMat)))==NULL)
|
||||||
|
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
|
||||||
|
cMat->m = nr;
|
||||||
|
cMat->n = nc;
|
||||||
|
cMat->nz = nz;
|
||||||
|
if (nr1 == 0) nr1 = 1;
|
||||||
|
if (nz1 == 0) nz1 = 1;
|
||||||
|
if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), ((nr1+1)*sizeof(int)))) != 0)
|
||||||
|
return(rc);
|
||||||
|
if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), ((nz1)*sizeof(int)))) != 0)
|
||||||
|
return(rc);
|
||||||
|
if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), ((nz1)*sizeof(TYPE)))) != 0)
|
||||||
|
return(rc);
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0)
|
||||||
|
return(rc);
|
||||||
|
if ((rc= cusparseCreateSolveAnalysisInfo(&(cMat->triang))) !=0)
|
||||||
|
return(rc);
|
||||||
|
#elif CUDA_VERSION < 11030
|
||||||
|
if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0)
|
||||||
|
return(rc);
|
||||||
|
CHECK_CUSPARSE(cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL));
|
||||||
|
CHECK_CUSPARSE(cusparseSetMatDiagType(cMat->descr,CUSPARSE_DIAG_TYPE_NON_UNIT));
|
||||||
|
CHECK_CUSPARSE(cusparseSetMatIndexBase(cMat->descr,CUSPARSE_INDEX_BASE_ONE));
|
||||||
|
CHECK_CUSPARSE(cusparseCreateCsrsv2Info(&(cMat->triang)));
|
||||||
|
if (cMat->nz > 0) {
|
||||||
|
CHECK_CUSPARSE(cusparseTcsrsv2_bufferSize(*my_handle,
|
||||||
|
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
cMat->m,cMat->nz, cMat->descr,
|
||||||
|
cMat->val, cMat->irp, cMat->ja,
|
||||||
|
cMat->triang, &bfsz));
|
||||||
|
} else {
|
||||||
|
bfsz = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if (cMat->svbuffer != NULL) { */
|
||||||
|
/* fprintf(stderr,"Calling cudaFree\n"); */
|
||||||
|
/* CHECK_CUDA(cudaFree(cMat->svbuffer)); */
|
||||||
|
/* cMat->svbuffer = NULL; */
|
||||||
|
/* } */
|
||||||
|
if (bfsz > 0) {
|
||||||
|
CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz));
|
||||||
|
} else {
|
||||||
|
cMat->svbuffer=NULL;
|
||||||
|
}
|
||||||
|
cMat->svbsize=bfsz;
|
||||||
|
|
||||||
|
cMat->mvbuffer=NULL;
|
||||||
|
cMat->mvbsize = 0;
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
cMat->spmvDescr=NULL;
|
||||||
|
cMat->spsvDescr=NULL;
|
||||||
|
cMat->mvbuffer=NULL;
|
||||||
|
cMat->svbuffer=NULL;
|
||||||
|
cMat->mvbsize=0;
|
||||||
|
cMat->svbsize=0;
|
||||||
|
#endif
|
||||||
|
Matrix->mat = cMat;
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDeviceFree(T_Cmat *Matrix)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
|
||||||
|
if (cMat!=NULL) {
|
||||||
|
freeRemoteBuffer(cMat->irp);
|
||||||
|
freeRemoteBuffer(cMat->ja);
|
||||||
|
freeRemoteBuffer(cMat->val);
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
cusparseDestroyMatDescr(cMat->descr);
|
||||||
|
cusparseDestroySolveAnalysisInfo(cMat->triang);
|
||||||
|
#elif CUDA_VERSION < 11030
|
||||||
|
cusparseDestroyMatDescr(cMat->descr);
|
||||||
|
cusparseDestroyCsrsv2Info(cMat->triang);
|
||||||
|
#else
|
||||||
|
if (!T_CSRGIsNullMvDescr(cMat)) {
|
||||||
|
// already destroyed spmvDescr, just free the pointer
|
||||||
|
free(cMat->spmvDescr);
|
||||||
|
cMat->spmvDescr=NULL;
|
||||||
|
}
|
||||||
|
if (cMat->mvbuffer!=NULL)
|
||||||
|
CHECK_CUDA( cudaFree(cMat->mvbuffer));
|
||||||
|
cMat->mvbuffer=NULL;
|
||||||
|
cMat->mvbsize=0;
|
||||||
|
if (!T_CSRGIsNullSvDescr(cMat)) {
|
||||||
|
CHECK_CUSPARSE(cusparseSpSV_destroyDescr(*(cMat->spsvDescr)));
|
||||||
|
free(cMat->spsvDescr);
|
||||||
|
cMat->spsvDescr=NULL;
|
||||||
|
}
|
||||||
|
if (cMat->svbuffer!=NULL)
|
||||||
|
CHECK_CUDA( cudaFree(cMat->svbuffer));
|
||||||
|
cMat->svbuffer=NULL;
|
||||||
|
cMat->svbsize=0;
|
||||||
|
#endif
|
||||||
|
free(cMat);
|
||||||
|
Matrix->mat = NULL;
|
||||||
|
}
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDeviceGetParms(T_Cmat *Matrix,int *nr, int *nc, int *nz)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
|
||||||
|
if (cMat!=NULL) {
|
||||||
|
*nr = cMat->m ;
|
||||||
|
*nc = cMat->n ;
|
||||||
|
*nz = cMat->nz ;
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
} else {
|
||||||
|
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
|
||||||
|
int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatType(cMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatFillMode(cMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatDiagType(cMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatIndexBase(cMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Matrix)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
int rc, buffersize;
|
||||||
|
cusparseHandle_t *my_handle=getHandle();
|
||||||
|
cusparseSolveAnalysisInfo_t info;
|
||||||
|
|
||||||
|
rc= (int) cusparseTcsrsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
cMat->m,cMat->nz,cMat->descr,
|
||||||
|
cMat->val, cMat->irp, cMat->ja,
|
||||||
|
cMat->triang);
|
||||||
|
if (rc !=0) {
|
||||||
|
fprintf(stderr,"From csrsv_analysis: %d\n",rc);
|
||||||
|
}
|
||||||
|
return(rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif CUDA_VERSION < 11030
|
||||||
|
int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatType(cMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatFillMode(cMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatDiagType(cMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatIndexBase(cMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
cusparseFillMode_t mode=type;
|
||||||
|
|
||||||
|
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
|
||||||
|
CUSPARSE_SPMAT_FILL_MODE,
|
||||||
|
(const void*) &mode,
|
||||||
|
sizeof(cusparseFillMode_t)));
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
cusparseDiagType_t cutype=type;
|
||||||
|
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
|
||||||
|
CUSPARSE_SPMAT_DIAG_TYPE,
|
||||||
|
(const void*) &cutype,
|
||||||
|
sizeof(cusparseDiagType_t)));
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat)
|
||||||
|
{
|
||||||
|
return(cMat->spmvDescr == NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat)
|
||||||
|
{
|
||||||
|
return(cMat->svbuffer == NULL);
|
||||||
|
}
|
||||||
|
int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat)
|
||||||
|
{
|
||||||
|
return(cMat->spsvDescr == NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz,
|
||||||
|
int *irp, int *ja, TYPE *val)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
T_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
cusparseHandle_t *my_handle=getHandle();
|
||||||
|
|
||||||
|
if ((rc=writeRemoteBuffer((void *) irp, (void *) cMat->irp,
|
||||||
|
(m+1)*sizeof(int)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
|
||||||
|
if ((rc=writeRemoteBuffer((void *) ja,(void *) cMat->ja,
|
||||||
|
(nz)*sizeof(int)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
if ((rc=writeRemoteBuffer((void *) val, (void *) cMat->val,
|
||||||
|
(nz)*sizeof(TYPE)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
#if (CUDA_SHORT_VERSION > 10 ) && (CUDA_VERSION < 11030)
|
||||||
|
if (cusparseGetMatType(cMat->descr)== CUSPARSE_MATRIX_TYPE_TRIANGULAR) {
|
||||||
|
// Why do we need to set TYPE_GENERAL??? cuSPARSE can be misterious sometimes.
|
||||||
|
cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||||
|
CHECK_CUSPARSE(cusparseTcsrsv2_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
cMat->m,cMat->nz, cMat->descr,
|
||||||
|
cMat->val, cMat->irp, cMat->ja,
|
||||||
|
cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||||
|
cMat->svbuffer));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
//cusparseSetMatType(*(cMat->spmvDescr),CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||||
|
#endif
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_CSRGDevice2Host(T_Cmat *Matrix, int m, int n, int nz,
|
||||||
|
int *irp, int *ja, TYPE *val)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
T_CSRGDeviceMat *cMat = Matrix->mat;
|
||||||
|
|
||||||
|
if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, (m+1)*sizeof(int)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
|
||||||
|
if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, (nz)*sizeof(int)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, (nz)*sizeof(TYPE)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
int T_HYBGDeviceFree(T_Hmat *Matrix)
|
||||||
|
{
|
||||||
|
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||||
|
if (hMat != NULL) {
|
||||||
|
cusparseDestroyMatDescr(hMat->descr);
|
||||||
|
cusparseDestroySolveAnalysisInfo(hMat->triang);
|
||||||
|
cusparseDestroyHybMat(hMat->hybA);
|
||||||
|
free(hMat);
|
||||||
|
}
|
||||||
|
Matrix->mat = NULL;
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
|
||||||
|
TYPE beta, void *deviceY)
|
||||||
|
{
|
||||||
|
T_HYBGDeviceMat *hMat=Matrix->mat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
void *vX, *vY;
|
||||||
|
int r,n,rc;
|
||||||
|
cusparseMatrixType_t type;
|
||||||
|
cusparseHandle_t *my_handle=getHandle();
|
||||||
|
|
||||||
|
/*getAddrMultiVecDevice(deviceX, &vX);
|
||||||
|
getAddrMultiVecDevice(deviceY, &vY); */
|
||||||
|
vX=x->v_;
|
||||||
|
vY=y->v_;
|
||||||
|
|
||||||
|
/* rc = (int) cusparseGetMatType(hMat->descr); */
|
||||||
|
/* fprintf(stderr,"Spmv MatType: %d\n",rc); */
|
||||||
|
/* rc = (int) cusparseGetMatDiagType(hMat->descr); */
|
||||||
|
/* fprintf(stderr,"Spmv DiagType: %d\n",rc); */
|
||||||
|
/* rc = (int) cusparseGetMatFillMode(hMat->descr); */
|
||||||
|
/* fprintf(stderr,"Spmv FillMode: %d\n",rc); */
|
||||||
|
/* Dirty trick: apparently hybmv does not accept a triangular
|
||||||
|
matrix even though it should not make a difference. So
|
||||||
|
we claim it's general anyway */
|
||||||
|
type = cusparseGetMatType(hMat->descr);
|
||||||
|
rc = cusparseSetMatType(hMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||||
|
if (rc == 0)
|
||||||
|
rc = (int) cusparseThybmv(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
(const TYPE *) &alpha, hMat->descr, hMat->hybA,
|
||||||
|
(const TYPE *) vX, (const TYPE *) &beta,
|
||||||
|
(TYPE *) vY);
|
||||||
|
if (rc == 0)
|
||||||
|
rc = cusparseSetMatType(hMat->descr,type);
|
||||||
|
return(rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz)
|
||||||
|
{
|
||||||
|
T_HYBGDeviceMat *hMat;
|
||||||
|
int nr1=nr, nz1=nz, rc;
|
||||||
|
if ((nr<0)||(nc<0)||(nz<0))
|
||||||
|
return((int) CUSPARSE_STATUS_INVALID_VALUE);
|
||||||
|
if ((hMat=(T_HYBGDeviceMat *) malloc(sizeof(T_HYBGDeviceMat)))==NULL)
|
||||||
|
return((int) CUSPARSE_STATUS_ALLOC_FAILED);
|
||||||
|
hMat->m = nr;
|
||||||
|
hMat->n = nc;
|
||||||
|
hMat->nz = nz;
|
||||||
|
|
||||||
|
if ((rc= cusparseCreateMatDescr(&(hMat->descr))) !=0)
|
||||||
|
return(rc);
|
||||||
|
if ((rc= cusparseCreateSolveAnalysisInfo(&(hMat->triang))) !=0)
|
||||||
|
return(rc);
|
||||||
|
if((rc = cusparseCreateHybMat(&(hMat->hybA))) != 0)
|
||||||
|
return(rc);
|
||||||
|
Matrix->mat = hMat;
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatDiagType(hMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatIndexBase(hMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatType(hMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type)
|
||||||
|
{
|
||||||
|
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||||
|
return ((int) cusparseSetMatFillMode(hMat->descr,type));
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX,
|
||||||
|
TYPE beta, void *deviceY)
|
||||||
|
{
|
||||||
|
//beta??
|
||||||
|
T_HYBGDeviceMat *hMat=Matrix->mat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
void *vX, *vY;
|
||||||
|
int r,n;
|
||||||
|
cusparseHandle_t *my_handle=getHandle();
|
||||||
|
/*getAddrMultiVecDevice(deviceX, &vX);
|
||||||
|
getAddrMultiVecDevice(deviceY, &vY); */
|
||||||
|
vX=x->v_;
|
||||||
|
vY=y->v_;
|
||||||
|
|
||||||
|
return cusparseThybsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
(const TYPE *) &alpha, hMat->descr,
|
||||||
|
hMat->hybA, hMat->triang,
|
||||||
|
(const TYPE *) vX, (TYPE *) vY);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix)
|
||||||
|
{
|
||||||
|
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||||
|
cusparseSolveAnalysisInfo_t info;
|
||||||
|
int rc;
|
||||||
|
cusparseHandle_t *my_handle=getHandle();
|
||||||
|
|
||||||
|
/* rc = (int) cusparseGetMatType(hMat->descr); */
|
||||||
|
/* fprintf(stderr,"Analysis MatType: %d\n",rc); */
|
||||||
|
/* rc = (int) cusparseGetMatDiagType(hMat->descr); */
|
||||||
|
/* fprintf(stderr,"Analysis DiagType: %d\n",rc); */
|
||||||
|
/* rc = (int) cusparseGetMatFillMode(hMat->descr); */
|
||||||
|
/* fprintf(stderr,"Analysis FillMode: %d\n",rc); */
|
||||||
|
rc = (int) cusparseThybsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
hMat->descr, hMat->hybA, hMat->triang);
|
||||||
|
|
||||||
|
if (rc !=0) {
|
||||||
|
fprintf(stderr,"From csrsv_analysis: %d\n",rc);
|
||||||
|
}
|
||||||
|
return(rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz,
|
||||||
|
int *irp, int *ja, TYPE *val)
|
||||||
|
{
|
||||||
|
int rc; double t1,t2;
|
||||||
|
int nr1=m, nz1=nz;
|
||||||
|
T_HYBGDeviceMat *hMat= Matrix->mat;
|
||||||
|
cusparseHandle_t *my_handle=getHandle();
|
||||||
|
|
||||||
|
if (nr1 == 0) nr1 = 1;
|
||||||
|
if (nz1 == 0) nz1 = 1;
|
||||||
|
if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), ((nr1+1)*sizeof(int)))) != 0)
|
||||||
|
return(rc);
|
||||||
|
if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), ((nz1)*sizeof(int)))) != 0)
|
||||||
|
return(rc);
|
||||||
|
if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), ((nz1)*sizeof(TYPE)))) != 0)
|
||||||
|
return(rc);
|
||||||
|
|
||||||
|
if ((rc=writeRemoteBuffer((void *) irp, (void *) hMat->irp,
|
||||||
|
(m+1)*sizeof(int)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
|
||||||
|
if ((rc=writeRemoteBuffer((void *) ja,(void *) hMat->ja,
|
||||||
|
(nz)*sizeof(int)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
if ((rc=writeRemoteBuffer((void *) val, (void *) hMat->val,
|
||||||
|
(nz)*sizeof(TYPE)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
/* rc = (int) cusparseGetMatType(hMat->descr); */
|
||||||
|
/* fprintf(stderr,"Conversion MatType: %d\n",rc); */
|
||||||
|
/* rc = (int) cusparseGetMatDiagType(hMat->descr); */
|
||||||
|
/* fprintf(stderr,"Conversion DiagType: %d\n",rc); */
|
||||||
|
/* rc = (int) cusparseGetMatFillMode(hMat->descr); */
|
||||||
|
/* fprintf(stderr,"Conversion FillMode: %d\n",rc); */
|
||||||
|
//t1=etime();
|
||||||
|
rc = (int) cusparseTcsr2hyb(*my_handle, m, n,
|
||||||
|
hMat->descr,
|
||||||
|
(const TYPE *)hMat->val,
|
||||||
|
(const int *)hMat->irp, (const int *)hMat->ja,
|
||||||
|
hMat->hybA,0,
|
||||||
|
CUSPARSE_HYB_PARTITION_AUTO);
|
||||||
|
|
||||||
|
freeRemoteBuffer(hMat->irp); hMat->irp = NULL;
|
||||||
|
freeRemoteBuffer(hMat->ja); hMat->ja = NULL;
|
||||||
|
freeRemoteBuffer(hMat->val); hMat->val = NULL;
|
||||||
|
|
||||||
|
//cudaSync();
|
||||||
|
//t2 = etime();
|
||||||
|
//fprintf(stderr,"Inner call to cusparseTcsr2hyb: %lf\n",(t2-t1));
|
||||||
|
if (rc != 0) {
|
||||||
|
fprintf(stderr,"From csr2hyb: %d\n",rc);
|
||||||
|
}
|
||||||
|
return(rc);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,386 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "hdiagdev.h"
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#define DEBUG 0
|
||||||
|
void freeHdiagDevice(void* remoteMatrix)
|
||||||
|
{
|
||||||
|
struct HdiagDevice *devMat = (struct HdiagDevice *) remoteMatrix;
|
||||||
|
//fprintf(stderr,"freeHllDevice\n");
|
||||||
|
if (devMat != NULL) {
|
||||||
|
freeRemoteBuffer(devMat->hackOffsets);
|
||||||
|
freeRemoteBuffer(devMat->cM);
|
||||||
|
free(remoteMatrix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns,
|
||||||
|
unsigned int allocationHeight, unsigned int hackSize,
|
||||||
|
unsigned int hackCount, unsigned int elementType)
|
||||||
|
{
|
||||||
|
HdiagDeviceParams params;
|
||||||
|
|
||||||
|
params.elementType = elementType;
|
||||||
|
//numero di elementi di val
|
||||||
|
params.rows = rows;
|
||||||
|
params.columns = columns;
|
||||||
|
params.allocationHeight = allocationHeight;
|
||||||
|
params.hackSize = hackSize;
|
||||||
|
params.hackCount = hackCount;
|
||||||
|
|
||||||
|
return params;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int allocHdiagDevice(void **remoteMatrix, HdiagDeviceParams* params)
|
||||||
|
{
|
||||||
|
struct HdiagDevice *tmp = (struct HdiagDevice *)malloc(sizeof(struct HdiagDevice));
|
||||||
|
int ret=SPGPU_SUCCESS;
|
||||||
|
int *tmpOff = NULL;
|
||||||
|
|
||||||
|
*remoteMatrix = (void *) tmp;
|
||||||
|
#if DEBUG
|
||||||
|
fprintf(stderr,"From alloc: %p\n",*remoteMatrix);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
tmp->rows = params->rows;
|
||||||
|
|
||||||
|
tmp->hackSize = params->hackSize;
|
||||||
|
|
||||||
|
tmp->cols = params->columns;
|
||||||
|
|
||||||
|
tmp->allocationHeight = params->allocationHeight;
|
||||||
|
|
||||||
|
tmp->hackCount = params->hackCount;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if DEBUG
|
||||||
|
fprintf(stderr,"hackcount %d allocationHeight %d\n",tmp->hackCount,tmp->allocationHeight);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->hackOffsets), (tmp->hackCount+1)*sizeof(int));
|
||||||
|
|
||||||
|
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets), tmp->allocationHeight*sizeof(int));
|
||||||
|
|
||||||
|
/* tmp->baseIndex = params->firstIndex; */
|
||||||
|
|
||||||
|
if (params->elementType == SPGPU_TYPE_INT)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(int));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_FLOAT)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(float));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(double));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuFloatComplex));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols,
|
||||||
|
unsigned int allocationHeight, unsigned int hackSize,
|
||||||
|
unsigned int hackCount, unsigned int elementType)
|
||||||
|
{ int i=0;
|
||||||
|
HdiagDeviceParams p;
|
||||||
|
|
||||||
|
p = getHdiagDeviceParams(rows, cols, allocationHeight, hackSize, hackCount,elementType);
|
||||||
|
|
||||||
|
i = allocHdiagDevice(deviceMat, &p);
|
||||||
|
#if DEBUG
|
||||||
|
fprintf(stderr," Falloc %p \n",*deviceMat);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets)
|
||||||
|
{ int i=0,fo,fa,j,k,p;
|
||||||
|
char buf_a[255], buf_o[255],tmp[255];
|
||||||
|
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||||
|
|
||||||
|
i=SPGPU_SUCCESS;
|
||||||
|
|
||||||
|
|
||||||
|
#if DEBUG
|
||||||
|
fprintf(stderr," Write %p \n",devMat);
|
||||||
|
|
||||||
|
fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n",
|
||||||
|
devMat->allocationHeight,devMat->hackCount);
|
||||||
|
fprintf(stderr,"HackOffsets: ");
|
||||||
|
for (j=0; j<devMat->hackCount+1; j++)
|
||||||
|
fprintf(stderr," %d",hackOffsets[j]);
|
||||||
|
fprintf(stderr,"\n");
|
||||||
|
fprintf(stderr,"diaOffsets: ");
|
||||||
|
for (j=0; j<devMat->allocationHeight; j++)
|
||||||
|
fprintf(stderr," %d",hdiaOffsets[j]);
|
||||||
|
fprintf(stderr,"\n");
|
||||||
|
#if 1
|
||||||
|
fprintf(stderr,"values: \n");
|
||||||
|
p=0;
|
||||||
|
for (j=0; j<devMat->hackCount; j++){
|
||||||
|
fprintf(stderr,"Hack no: %d\n",j+1);
|
||||||
|
for (k=0; k<devMat->hackSize*(devMat->allocationHeight/devMat->hackCount); k++){
|
||||||
|
fprintf(stderr," %d %lf\n",p+1,val[p]); p++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stderr,"\n");
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
if(i== SPGPU_SUCCESS)
|
||||||
|
i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets,
|
||||||
|
(devMat->hackCount+1)*sizeof(int));
|
||||||
|
|
||||||
|
if(i== SPGPU_SUCCESS)
|
||||||
|
i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets,
|
||||||
|
devMat->allocationHeight*sizeof(int));
|
||||||
|
if(i== SPGPU_SUCCESS)
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
|
||||||
|
devMat->allocationHeight*devMat->hackSize*sizeof(double));
|
||||||
|
if (i!=0)
|
||||||
|
fprintf(stderr,"Error in writeHdiagDeviceDouble %d\n",i);
|
||||||
|
|
||||||
|
#if DEBUG
|
||||||
|
fprintf(stderr," EndWrite %p \n",devMat);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(i==0)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
long long int sizeofHdiagDeviceDouble(void* deviceMat)
|
||||||
|
{ int i=0,fo,fa;
|
||||||
|
int *hoff=NULL,*hackoff=NULL;
|
||||||
|
long long int memsize=0;
|
||||||
|
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||||
|
|
||||||
|
|
||||||
|
memsize += (devMat->hackCount+1)*sizeof(int);
|
||||||
|
memsize += devMat->allocationHeight*sizeof(int);
|
||||||
|
memsize += devMat->allocationHeight*devMat->hackSize*sizeof(double);
|
||||||
|
return(memsize);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int readHdiagDeviceDouble(void* deviceMat, double* a, int* off)
|
||||||
|
{ int i;
|
||||||
|
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||||
|
/* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); */
|
||||||
|
/* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */
|
||||||
|
|
||||||
|
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||||
|
double beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||||
|
#endif
|
||||||
|
#if DEBUG
|
||||||
|
fprintf(stderr," First %p \n",devMat);
|
||||||
|
fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize,
|
||||||
|
devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM);
|
||||||
|
#endif
|
||||||
|
spgpuDhdiaspmv (handle, (double*)y->v_, (double *)y->v_, alpha,
|
||||||
|
(double *)devMat->cM,devMat->hdiaOffsets,
|
||||||
|
devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols,
|
||||||
|
x->v_, beta);
|
||||||
|
|
||||||
|
//cudaSync();
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets)
|
||||||
|
{ int i=0,fo,fa,j,k,p;
|
||||||
|
char buf_a[255], buf_o[255],tmp[255];
|
||||||
|
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||||
|
|
||||||
|
i=SPGPU_SUCCESS;
|
||||||
|
|
||||||
|
|
||||||
|
#if DEBUG
|
||||||
|
fprintf(stderr," Write %p \n",devMat);
|
||||||
|
|
||||||
|
fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n",
|
||||||
|
devMat->allocationHeight,devMat->hackCount);
|
||||||
|
fprintf(stderr,"HackOffsets: ");
|
||||||
|
for (j=0; j<devMat->hackCount+1; j++)
|
||||||
|
fprintf(stderr," %d",hackOffsets[j]);
|
||||||
|
fprintf(stderr,"\n");
|
||||||
|
fprintf(stderr,"diaOffsets: ");
|
||||||
|
for (j=0; j<devMat->allocationHeight; j++)
|
||||||
|
fprintf(stderr," %d",hdiaOffsets[j]);
|
||||||
|
fprintf(stderr,"\n");
|
||||||
|
#if 1
|
||||||
|
fprintf(stderr,"values: \n");
|
||||||
|
p=0;
|
||||||
|
for (j=0; j<devMat->hackCount; j++){
|
||||||
|
fprintf(stderr,"Hack no: %d\n",j+1);
|
||||||
|
for (k=0; k<devMat->hackSize*(devMat->allocationHeight/devMat->hackCount); k++){
|
||||||
|
fprintf(stderr," %d %lf\n",p+1,val[p]); p++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stderr,"\n");
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
if(i== SPGPU_SUCCESS)
|
||||||
|
i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets,
|
||||||
|
(devMat->hackCount+1)*sizeof(int));
|
||||||
|
|
||||||
|
if(i== SPGPU_SUCCESS)
|
||||||
|
i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets,
|
||||||
|
devMat->allocationHeight*sizeof(int));
|
||||||
|
if(i== SPGPU_SUCCESS)
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
|
||||||
|
devMat->allocationHeight*devMat->hackSize*sizeof(float));
|
||||||
|
if (i!=0)
|
||||||
|
fprintf(stderr,"Error in writeHdiagDeviceFloat %d\n",i);
|
||||||
|
|
||||||
|
#if DEBUG
|
||||||
|
fprintf(stderr," EndWrite %p \n",devMat);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(i==0)
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
long long int sizeofHdiagDeviceFloat(void* deviceMat)
|
||||||
|
{ int i=0,fo,fa;
|
||||||
|
int *hoff=NULL,*hackoff=NULL;
|
||||||
|
long long int memsize=0;
|
||||||
|
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||||
|
|
||||||
|
|
||||||
|
memsize += (devMat->hackCount+1)*sizeof(int);
|
||||||
|
memsize += devMat->allocationHeight*sizeof(int);
|
||||||
|
memsize += devMat->allocationHeight*devMat->hackSize*sizeof(float);
|
||||||
|
|
||||||
|
return(memsize);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int readHdiagDeviceFloat(void* deviceMat, float* a, int* off)
|
||||||
|
{ int i;
|
||||||
|
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||||
|
/* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); */
|
||||||
|
/* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */
|
||||||
|
|
||||||
|
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||||
|
float beta, void* deviceY)
|
||||||
|
{
|
||||||
|
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||||
|
#endif
|
||||||
|
#if DEBUG
|
||||||
|
fprintf(stderr," First %p \n",devMat);
|
||||||
|
fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize,
|
||||||
|
devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM);
|
||||||
|
#endif
|
||||||
|
spgpuShdiaspmv (handle, (float*)y->v_, (float *)y->v_, alpha,
|
||||||
|
(float *)devMat->cM,devMat->hdiaOffsets,
|
||||||
|
devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols,
|
||||||
|
x->v_, beta);
|
||||||
|
|
||||||
|
//cudaSync();
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,106 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _HDIAGDEV_H_
|
||||||
|
#define _HDIAGDEV_H_
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "hdia.h"
|
||||||
|
|
||||||
|
struct HdiagDevice
|
||||||
|
{
|
||||||
|
// Compressed matrix
|
||||||
|
void *cM; //it can be float or double
|
||||||
|
|
||||||
|
// offset (same size of cM)
|
||||||
|
int *hdiaOffsets;
|
||||||
|
|
||||||
|
int *hackOffsets;
|
||||||
|
|
||||||
|
int hackCount;
|
||||||
|
|
||||||
|
int rows;
|
||||||
|
|
||||||
|
int cols;
|
||||||
|
|
||||||
|
|
||||||
|
int hackSize;
|
||||||
|
|
||||||
|
int allocationHeight;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct HdiagDeviceParams
|
||||||
|
{
|
||||||
|
|
||||||
|
unsigned int elementType;
|
||||||
|
|
||||||
|
// Number of rows.
|
||||||
|
// Used to allocate rS array
|
||||||
|
unsigned int rows;
|
||||||
|
//unsigned int hackOffsLength;
|
||||||
|
|
||||||
|
// Number of columns.
|
||||||
|
// Used for error-checking
|
||||||
|
unsigned int columns;
|
||||||
|
|
||||||
|
unsigned int hackSize;
|
||||||
|
unsigned int hackCount;
|
||||||
|
unsigned int allocationHeight;
|
||||||
|
|
||||||
|
|
||||||
|
} HdiagDeviceParams;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns,
|
||||||
|
unsigned int allocationHeight, unsigned int hackSize,
|
||||||
|
unsigned int hackCount, unsigned int elementType);
|
||||||
|
|
||||||
|
int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols,
|
||||||
|
unsigned int allocationHeight, unsigned int hackSize,
|
||||||
|
unsigned int hackCount, unsigned int elementType);
|
||||||
|
|
||||||
|
int allocHdiagDevice(void ** remoteMatrix, HdiagDeviceParams* params);
|
||||||
|
|
||||||
|
|
||||||
|
void freeHdiagDevice(void* remoteMatrix);
|
||||||
|
|
||||||
|
int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets);
|
||||||
|
int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||||
|
float beta, void* deviceY);
|
||||||
|
|
||||||
|
int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets);
|
||||||
|
int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||||
|
double beta, void* deviceY);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,199 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
module hdiagdev_mod
|
||||||
|
use iso_c_binding
|
||||||
|
use core_mod
|
||||||
|
|
||||||
|
type, bind(c) :: hdiagdev_parms
|
||||||
|
integer(c_int) :: element_type
|
||||||
|
integer(c_int) :: rows
|
||||||
|
integer(c_int) :: columns
|
||||||
|
integer(c_int) :: hackSize
|
||||||
|
integer(c_int) :: hackCount
|
||||||
|
integer(c_int) :: allocationHeight
|
||||||
|
end type hdiagdev_parms
|
||||||
|
|
||||||
|
! interface computeHdiaHacksCount
|
||||||
|
! function computeHdiaHacksCountDouble(allocationHeight,hackOffsets,hackSize, &
|
||||||
|
! & diaValues,diaValuesPitch,diags,rows)&
|
||||||
|
! & result(res) bind(c,name='computeHdiaHackOffsetsDouble')
|
||||||
|
! use iso_c_binding
|
||||||
|
! integer(c_int) :: res
|
||||||
|
! integer(c_int), value :: rows,diags,diaValuesPitch,hackSize,elementType
|
||||||
|
! real(c_double) :: diaValues(rows,:)
|
||||||
|
! integer(c_int) :: hackOffsets,allocationHeight
|
||||||
|
! end function computeHdiaHacksCountDouble
|
||||||
|
! end interface computeHdiaHacksCount
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FgetHdiagDeviceParams(rows, columns, allocationHeight,hackSize, &
|
||||||
|
& hackCount, elementType) &
|
||||||
|
& result(res) bind(c,name='getHdiagDeviceParams')
|
||||||
|
use iso_c_binding
|
||||||
|
import :: hdiagdev_parms
|
||||||
|
type(hdiagdev_parms) :: res
|
||||||
|
integer(c_int), value :: rows,columns,allocationHeight,&
|
||||||
|
& elementType,hackSize,hackCount
|
||||||
|
end function FgetHdiagDeviceParams
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FallocHdiagDevice(deviceMat,rows,columns,allocationHeight,&
|
||||||
|
& hackSize,hackCount,elementType) &
|
||||||
|
& result(res) bind(c,name='FallocHdiagDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: rows,columns,allocationHeight,hackSize,&
|
||||||
|
& hackCount,elementType
|
||||||
|
type(c_ptr) :: deviceMat
|
||||||
|
end function FallocHdiagDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface
|
||||||
|
function sizeofHdiagDeviceDouble(deviceMat) &
|
||||||
|
& result(res) bind(c,name='sizeofHdiagDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_long_long) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
end function sizeofHdiagDeviceDouble
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface writeHdiagDevice
|
||||||
|
|
||||||
|
function writeHdiagDeviceFloat(deviceMat,val,hdiaOffsets, hackOffsets) &
|
||||||
|
& result(res) bind(c,name='writeHdiagDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_float) :: val(*)
|
||||||
|
integer(c_int) :: hdiaOffsets(*), hackOffsets(*)
|
||||||
|
end function writeHdiagDeviceFloat
|
||||||
|
|
||||||
|
function writeHdiagDeviceDouble(deviceMat,val,hdiaOffsets, hackOffsets) &
|
||||||
|
& result(res) bind(c,name='writeHdiagDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_double) :: val(*)
|
||||||
|
integer(c_int) :: hdiaOffsets(*), hackOffsets(*)
|
||||||
|
end function writeHdiagDeviceDouble
|
||||||
|
|
||||||
|
end interface writeHdiagDevice
|
||||||
|
|
||||||
|
!!$ interface readHdiagDevice
|
||||||
|
!!$
|
||||||
|
!!$ function readHdiagDeviceFloat(deviceMat,val,ja,ldj,irn) &
|
||||||
|
!!$ & result(res) bind(c,name='readHdiagDeviceFloat')
|
||||||
|
!!$ use iso_c_binding
|
||||||
|
!!$ integer(c_int) :: res
|
||||||
|
!!$ type(c_ptr), value :: deviceMat
|
||||||
|
!!$ integer(c_int), value :: ldj
|
||||||
|
!!$ real(c_float) :: val(ldj,*)
|
||||||
|
!!$ integer(c_int) :: ja(ldj,*),irn(*)
|
||||||
|
!!$ end function readHdiagDeviceFloat
|
||||||
|
!!$
|
||||||
|
!!$ function readHdiagDeviceDouble(deviceMat,a,off,n) &
|
||||||
|
!!$ & result(res) bind(c,name='readHdiagDeviceDouble')
|
||||||
|
!!$ use iso_c_binding
|
||||||
|
!!$ integer(c_int) :: res
|
||||||
|
!!$ type(c_ptr), value :: deviceMat
|
||||||
|
!!$ integer(c_int),value :: n
|
||||||
|
!!$ real(c_double) :: a(n,*)
|
||||||
|
!!$ integer(c_int) :: off(*)
|
||||||
|
!!$ end function readHdiagDeviceDouble
|
||||||
|
!!$
|
||||||
|
!!$ function readHdiagDeviceFloatComplex(deviceMat,val,ja,ldj,irn) &
|
||||||
|
!!$ & result(res) bind(c,name='readHdiagDeviceFloatComplex')
|
||||||
|
!!$ use iso_c_binding
|
||||||
|
!!$ integer(c_int) :: res
|
||||||
|
!!$ type(c_ptr), value :: deviceMat
|
||||||
|
!!$ integer(c_int), value :: ldj
|
||||||
|
!!$ complex(c_float_complex) :: val(ldj,*)
|
||||||
|
!!$ integer(c_int) :: ja(ldj,*),irn(*)
|
||||||
|
!!$ end function readHdiagDeviceFloatComplex
|
||||||
|
!!$
|
||||||
|
!!$ function readHdiagDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) &
|
||||||
|
!!$ & result(res) bind(c,name='readHdiagDeviceDoubleComplex')
|
||||||
|
!!$ use iso_c_binding
|
||||||
|
!!$ integer(c_int) :: res
|
||||||
|
!!$ type(c_ptr), value :: deviceMat
|
||||||
|
!!$ integer(c_int), value :: ldj
|
||||||
|
!!$ complex(c_double_complex) :: val(ldj,*)
|
||||||
|
!!$ integer(c_int) :: ja(ldj,*),irn(*)
|
||||||
|
!!$ end function readHdiagDeviceDoubleComplex
|
||||||
|
!!$
|
||||||
|
!!$ end interface readHdiagDevice
|
||||||
|
!!$
|
||||||
|
interface
|
||||||
|
subroutine freeHdiagDevice(deviceMat) &
|
||||||
|
& bind(c,name='freeHdiagDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
end subroutine freeHdiagDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface spmvHdiagDevice
|
||||||
|
function spmvHdiagDeviceFloat(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvHdiagDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
real(c_float),value :: alpha, beta
|
||||||
|
end function spmvHdiagDeviceFloat
|
||||||
|
function spmvHdiagDeviceDouble(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvHdiagDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
real(c_double),value :: alpha, beta
|
||||||
|
end function spmvHdiagDeviceDouble
|
||||||
|
!!$ function spmvHdiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
|
||||||
|
!!$ & result(res) bind(c,name='spmvHdiagDeviceFloatComplex')
|
||||||
|
!!$ use iso_c_binding
|
||||||
|
!!$ integer(c_int) :: res
|
||||||
|
!!$ type(c_ptr), value :: deviceMat, x, y
|
||||||
|
!!$ complex(c_float_complex),value :: alpha, beta
|
||||||
|
!!$ end function spmvHdiagDeviceFloatComplex
|
||||||
|
!!$ function spmvHdiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
|
||||||
|
!!$ & result(res) bind(c,name='spmvHdiagDeviceDoubleComplex')
|
||||||
|
!!$ use iso_c_binding
|
||||||
|
!!$ integer(c_int) :: res
|
||||||
|
!!$ type(c_ptr), value :: deviceMat, x, y
|
||||||
|
!!$ complex(c_double_complex),value :: alpha, beta
|
||||||
|
!!$ end function spmvHdiagDeviceDoubleComplex
|
||||||
|
end interface spmvHdiagDevice
|
||||||
|
|
||||||
|
end module hdiagdev_mod
|
@ -0,0 +1,540 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "hlldev.h"
|
||||||
|
//new
|
||||||
|
HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros,
|
||||||
|
unsigned int allocsize, unsigned int elementType, unsigned int firstIndex)
|
||||||
|
{
|
||||||
|
HllDeviceParams params;
|
||||||
|
|
||||||
|
params.elementType = elementType;
|
||||||
|
params.hackSize = hksize;
|
||||||
|
//numero di elementi di val
|
||||||
|
params.allocsize = allocsize;
|
||||||
|
params.rows = rows;
|
||||||
|
params.nzt = nzeros;
|
||||||
|
params.avgNzr = (nzeros+rows-1)/rows;
|
||||||
|
params.firstIndex = firstIndex;
|
||||||
|
return params;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros,
|
||||||
|
int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr)
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
if (mat!=NULL) {
|
||||||
|
*hackOffsLength = mat->hackOffsLength ;
|
||||||
|
*hksize = mat->hackSize ;
|
||||||
|
*nzeros = mat->nzt ;
|
||||||
|
*allocsize = mat->allocsize ;
|
||||||
|
*rows = mat->rows ;
|
||||||
|
*avgnzr = mat->avgNzr ;
|
||||||
|
*firstIndex = mat->baseIndex ;
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
} else {
|
||||||
|
return SPGPU_UNSUPPORTED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//new
|
||||||
|
int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params)
|
||||||
|
{
|
||||||
|
HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice));
|
||||||
|
int ret=SPGPU_SUCCESS;
|
||||||
|
*remoteMatrix = (void *)tmp;
|
||||||
|
|
||||||
|
tmp->hackSize = params->hackSize;
|
||||||
|
|
||||||
|
tmp->allocsize = params->allocsize;
|
||||||
|
|
||||||
|
tmp->rows = params->rows;
|
||||||
|
tmp->avgNzr = params->avgNzr;
|
||||||
|
tmp->nzt = params->nzt;
|
||||||
|
tmp->baseIndex = params->firstIndex;
|
||||||
|
//fprintf(stderr,"Allocating HLG with %d avgNzr\n",params->avgNzr);
|
||||||
|
tmp->hackOffsLength = (int)(tmp->rows+tmp->hackSize-1)/tmp->hackSize;
|
||||||
|
|
||||||
|
//printf("hackOffsLength %d\n",tmp->hackOffsLength);
|
||||||
|
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int));
|
||||||
|
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int));
|
||||||
|
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int));
|
||||||
|
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->hackOffs), ((tmp->hackOffsLength+1)*sizeof(int)));
|
||||||
|
|
||||||
|
if (params->elementType == SPGPU_TYPE_INT)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(int));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_FLOAT)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
|
||||||
|
}
|
||||||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||||||
|
{
|
||||||
|
if (ret == SPGPU_SUCCESS)
|
||||||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return SPGPU_UNSUPPORTED; // Unsupported params
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeHllDevice(void* remoteMatrix)
|
||||||
|
{
|
||||||
|
HllDevice *devMat = (HllDevice *) remoteMatrix;
|
||||||
|
//fprintf(stderr,"freeHllDevice\n");
|
||||||
|
if (devMat != NULL) {
|
||||||
|
freeRemoteBuffer(devMat->rS);
|
||||||
|
freeRemoteBuffer(devMat->diag);
|
||||||
|
freeRemoteBuffer(devMat->rP);
|
||||||
|
freeRemoteBuffer(devMat->cM);
|
||||||
|
free(remoteMatrix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//new
|
||||||
|
int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros,
|
||||||
|
unsigned int allocsize,
|
||||||
|
unsigned int elementType, unsigned int firstIndex)
|
||||||
|
{ int i;
|
||||||
|
HllDeviceParams p;
|
||||||
|
|
||||||
|
p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex);
|
||||||
|
i = allocHllDevice(deviceMat, &p);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
|
||||||
|
}
|
||||||
|
return(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||||||
|
float beta, void* deviceY)
|
||||||
|
{
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||||
|
#endif
|
||||||
|
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
|
||||||
|
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
|
||||||
|
devMat->baseIndex);*/
|
||||||
|
|
||||||
|
spgpuShellspmv (handle, (float *)y->v_, (float *)y->v_, alpha, (float *)devMat->cM,
|
||||||
|
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||||||
|
devMat->avgNzr, devMat->rows, (float *)x->v_, beta, devMat->baseIndex);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
//new
|
||||||
|
int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||||||
|
double beta, void* deviceY)
|
||||||
|
{
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||||
|
#endif
|
||||||
|
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
|
||||||
|
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
|
||||||
|
devMat->baseIndex);*/
|
||||||
|
|
||||||
|
spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM,
|
||||||
|
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||||||
|
devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex);
|
||||||
|
//cudaSync();
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
|
||||||
|
float complex beta, void* deviceY)
|
||||||
|
{
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
|
||||||
|
cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta));
|
||||||
|
#ifdef VERBOSE
|
||||||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||||
|
#endif
|
||||||
|
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
|
||||||
|
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
|
||||||
|
devMat->baseIndex);*/
|
||||||
|
|
||||||
|
spgpuChellspmv (handle, (cuFloatComplex *)y->v_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM,
|
||||||
|
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||||||
|
devMat->avgNzr, devMat->rows, (cuFloatComplex *)x->v_, b, devMat->baseIndex);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
|
||||||
|
double complex beta, void* deviceY)
|
||||||
|
{
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
|
||||||
|
cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
|
||||||
|
cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta));
|
||||||
|
#ifdef VERBOSE
|
||||||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||||||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||||||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||||||
|
#endif
|
||||||
|
|
||||||
|
spgpuZhellspmv (handle, (cuDoubleComplex *)y->v_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM,
|
||||||
|
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||||||
|
devMat->avgNzr,devMat->rows, (cuDoubleComplex *)x->v_, b, devMat->baseIndex);
|
||||||
|
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
// Ex updateFromHost function
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||||||
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
//i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
// Ex updateFromHost function
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||||||
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
// Ex updateFromHost function
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
||||||
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
// Ex updateFromHost function
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
||||||
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||||||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
/*i = readEllDevice(deviceMat, (void *) val, ja, irn);
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||||||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
||||||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||||||
|
{ int i;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
||||||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||||||
|
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
/*if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||||||
|
}*/
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// New copy routines.
|
||||||
|
|
||||||
|
int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||||
|
int *irn, int *hoffs, int *idisp, int *ja,
|
||||||
|
float *val, void *deviceMat)
|
||||||
|
{ int i,j;
|
||||||
|
spgpuHandle_t handle;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
float *devVal;
|
||||||
|
int *devIdisp, *devJa;
|
||||||
|
int *tja;
|
||||||
|
//fprintf(stderr,"devMat: %p\n",devMat);
|
||||||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float));
|
||||||
|
|
||||||
|
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||||
|
//cudaSync();
|
||||||
|
|
||||||
|
handle = psb_cudaGetHandle();
|
||||||
|
psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||||||
|
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||||||
|
devIdisp,devJa,devVal,
|
||||||
|
(int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM);
|
||||||
|
|
||||||
|
freeRemoteBuffer(devIdisp);
|
||||||
|
freeRemoteBuffer(devJa);
|
||||||
|
freeRemoteBuffer(devVal);
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloat",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||||
|
int *irn, int *hoffs, int *idisp, int *ja,
|
||||||
|
double *val, void *deviceMat)
|
||||||
|
{ int i,j;
|
||||||
|
spgpuHandle_t handle;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
double *devVal;
|
||||||
|
int *devIdisp, *devJa;
|
||||||
|
int *tja;
|
||||||
|
//fprintf(stderr,"devMat: %p\n",devMat);
|
||||||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double));
|
||||||
|
|
||||||
|
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double));
|
||||||
|
//fprintf(stderr,"WriteRemoteBuffer val %d\n",i);
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||||
|
//fprintf(stderr,"WriteRemoteBuffer ja %d\n",i);
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
//fprintf(stderr,"WriteRemoteBuffer irn %d\n",i);
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
//fprintf(stderr,"WriteRemoteBuffer hoffs %d\n",i);
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||||
|
//fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i);
|
||||||
|
//cudaSync();
|
||||||
|
//fprintf(stderr," hacksz: %d \n",hacksz);
|
||||||
|
handle = psb_cudaGetHandle();
|
||||||
|
psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||||||
|
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||||||
|
devIdisp,devJa,devVal,
|
||||||
|
(int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM);
|
||||||
|
|
||||||
|
freeRemoteBuffer(devIdisp);
|
||||||
|
freeRemoteBuffer(devJa);
|
||||||
|
freeRemoteBuffer(devVal);
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDouble",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||||
|
int *irn, int *hoffs, int *idisp, int *ja,
|
||||||
|
float complex *val, void *deviceMat)
|
||||||
|
{ int i,j;
|
||||||
|
spgpuHandle_t handle;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
float complex *devVal;
|
||||||
|
int *devIdisp, *devJa;
|
||||||
|
int *tja;
|
||||||
|
//fprintf(stderr,"devMat: %p\n",devMat);
|
||||||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex));
|
||||||
|
|
||||||
|
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||||
|
//cudaSync();
|
||||||
|
|
||||||
|
handle = psb_cudaGetHandle();
|
||||||
|
psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||||||
|
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||||||
|
devIdisp,devJa,devVal,
|
||||||
|
(int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM);
|
||||||
|
|
||||||
|
freeRemoteBuffer(devIdisp);
|
||||||
|
freeRemoteBuffer(devJa);
|
||||||
|
freeRemoteBuffer(devVal);
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloatComplex",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||||
|
int *irn, int *hoffs, int *idisp, int *ja,
|
||||||
|
double complex *val, void *deviceMat)
|
||||||
|
{ int i,j;
|
||||||
|
spgpuHandle_t handle;
|
||||||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||||||
|
double complex *devVal;
|
||||||
|
int *devIdisp, *devJa;
|
||||||
|
int *tja;
|
||||||
|
//fprintf(stderr,"devMat: %p\n",devMat);
|
||||||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||||||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||||||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||||||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||||||
|
//cudaSync();
|
||||||
|
|
||||||
|
handle = psb_cudaGetHandle();
|
||||||
|
psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||||||
|
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||||||
|
devIdisp,devJa,devVal,
|
||||||
|
(int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM);
|
||||||
|
|
||||||
|
freeRemoteBuffer(devIdisp);
|
||||||
|
freeRemoteBuffer(devJa);
|
||||||
|
freeRemoteBuffer(devVal);
|
||||||
|
|
||||||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||||||
|
if (i != 0) {
|
||||||
|
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDoubleComplex",i);
|
||||||
|
}
|
||||||
|
return SPGPU_SUCCESS;
|
||||||
|
}
|
@ -0,0 +1,156 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _HLLDEV_H_
|
||||||
|
#define _HLLDEV_H_
|
||||||
|
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "hell.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct hlldevice
|
||||||
|
{
|
||||||
|
// Compressed matrix
|
||||||
|
void *cM; //it can be float or double
|
||||||
|
|
||||||
|
// row pointers (same size of cM)
|
||||||
|
int *rP;
|
||||||
|
|
||||||
|
// row size and diagonal position
|
||||||
|
int *rS;
|
||||||
|
int *diag;
|
||||||
|
|
||||||
|
int *hackOffs;
|
||||||
|
|
||||||
|
int rows;
|
||||||
|
int avgNzr;
|
||||||
|
int hackOffsLength;
|
||||||
|
int nzt;
|
||||||
|
int hackSize; //must be multiple of 32
|
||||||
|
|
||||||
|
//matrix size (uncompressed)
|
||||||
|
//int rows;
|
||||||
|
//int columns;
|
||||||
|
|
||||||
|
//allocation size
|
||||||
|
int allocsize;
|
||||||
|
|
||||||
|
/*(i.e. 0 for C, 1 for Fortran)*/
|
||||||
|
int baseIndex;
|
||||||
|
} HllDevice;
|
||||||
|
|
||||||
|
typedef struct hlldeviceparams
|
||||||
|
{
|
||||||
|
|
||||||
|
unsigned int elementType;
|
||||||
|
|
||||||
|
unsigned int hackSize;
|
||||||
|
|
||||||
|
// Number of rows.
|
||||||
|
// Used to allocate rS array
|
||||||
|
unsigned int rows;
|
||||||
|
unsigned int avgNzr;
|
||||||
|
unsigned int nzt;
|
||||||
|
//unsigned int hackOffsLength;
|
||||||
|
|
||||||
|
// Number of columns.
|
||||||
|
// Used for error-checking
|
||||||
|
// unsigned int columns;
|
||||||
|
|
||||||
|
unsigned int allocsize;
|
||||||
|
|
||||||
|
// First index (e.g 0 or 1)
|
||||||
|
unsigned int firstIndex;
|
||||||
|
|
||||||
|
} HllDeviceParams;
|
||||||
|
|
||||||
|
|
||||||
|
HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros,
|
||||||
|
unsigned int allocsize,
|
||||||
|
unsigned int elementType, unsigned int firstIndex);
|
||||||
|
int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros,
|
||||||
|
int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr);
|
||||||
|
int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros,
|
||||||
|
unsigned int allocsize, unsigned int elementType, unsigned int firstIndex);
|
||||||
|
int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params);
|
||||||
|
void freeHllDevice(void* remoteMatrix);
|
||||||
|
int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag);
|
||||||
|
int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag);
|
||||||
|
int writeHllDeviceFloatComplex(void* deviceMat, float complex* val,
|
||||||
|
int* ja, int *hkoffs, int* irn, int *idiag);
|
||||||
|
int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val,
|
||||||
|
int* ja, int *hkoffs, int* irn, int *idiag);
|
||||||
|
int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag);
|
||||||
|
int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag);
|
||||||
|
int readHllDeviceFloatComplex(void* deviceMat, float complex* val,
|
||||||
|
int* ja, int *hkoffs, int* irn, int *idiag);
|
||||||
|
int readHllDeviceDoubleComplex(void* deviceMat, double complex* val,
|
||||||
|
int* ja, int *hkoffs, int* irn, int *idiag);
|
||||||
|
|
||||||
|
|
||||||
|
int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||||
|
int *irn, int *hoffs, int *idisp, int *ja,
|
||||||
|
float *val, void *deviceMat);
|
||||||
|
int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||||||
|
int *irn, int *hoffs, int *idisp, int *ja,
|
||||||
|
double *val, void *deviceMat);
|
||||||
|
int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz,
|
||||||
|
int noffs, int isz, int *irn,
|
||||||
|
int *hoffs, int *idisp, int *ja,
|
||||||
|
float complex *val, void *deviceMat);
|
||||||
|
int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz,
|
||||||
|
int noffs, int isz, int *irn,
|
||||||
|
int *hoffs, int *idisp, int *ja,
|
||||||
|
double complex *val, void *deviceMat);
|
||||||
|
|
||||||
|
int psi_cuda_s_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
|
||||||
|
int baseIdx, int hacksz, int noffs, int isz,
|
||||||
|
int *irn, int *hoffs, int *idisp,
|
||||||
|
int *ja, float *val,
|
||||||
|
int *idiag, int *rP, float *cM);
|
||||||
|
int psi_cuda_d_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
|
||||||
|
int baseIdx, int hacksz, int noffs, int isz,
|
||||||
|
int *irn, int *hoffs, int *idisp,
|
||||||
|
int *ja, double *val,
|
||||||
|
int *idiag, int *rP, double *cM);
|
||||||
|
int psi_cuda_c_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
|
||||||
|
int baseIdx, int hacksz, int noffs, int isz,
|
||||||
|
int *irn, int *hoffs, int *idisp,
|
||||||
|
int *ja, float complex *val,
|
||||||
|
int *idiag, int *rP, float complex *cM);
|
||||||
|
int psi_cuda_z_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza,
|
||||||
|
int baseIdx, int hacksz, int noffs, int isz,
|
||||||
|
int *irn, int *hoffs, int *idisp,
|
||||||
|
int *ja, double complex *val,
|
||||||
|
int *idiag, int *rP, double complex *cM);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,268 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
module hlldev_mod
|
||||||
|
use iso_c_binding
|
||||||
|
use core_mod
|
||||||
|
|
||||||
|
type, bind(c) :: hlldev_parms
|
||||||
|
integer(c_int) :: element_type
|
||||||
|
integer(c_int) :: hackSize
|
||||||
|
integer(c_int) :: rows
|
||||||
|
integer(c_int) :: avgNzr
|
||||||
|
integer(c_int) :: allocsize
|
||||||
|
integer(c_int) :: firstIndex
|
||||||
|
end type hlldev_parms
|
||||||
|
|
||||||
|
interface
|
||||||
|
function bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex) &
|
||||||
|
& result(res) bind(c,name='bldHllDeviceParams')
|
||||||
|
use iso_c_binding
|
||||||
|
import :: hlldev_parms
|
||||||
|
type(hlldev_parms) :: res
|
||||||
|
integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex
|
||||||
|
end function BldHllDeviceParams
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
function getHllDeviceParams(deviceMat,hksize, rows, nzeros, allocsize,&
|
||||||
|
& hackOffsLength, firstIndex,avgnzr) &
|
||||||
|
& result(res) bind(c,name='getHllDeviceParams')
|
||||||
|
use iso_c_binding
|
||||||
|
import :: hlldev_parms
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
integer(c_int) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr
|
||||||
|
end function GetHllDeviceParams
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface
|
||||||
|
function FallocHllDevice(deviceMat,hksize,rows, nzeros,allocsize, &
|
||||||
|
& elementType,firstIndex) &
|
||||||
|
& result(res) bind(c,name='FallocHllDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex
|
||||||
|
type(c_ptr) :: deviceMat
|
||||||
|
end function FallocHllDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface writeHllDevice
|
||||||
|
|
||||||
|
function writeHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='writeHllDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_float) :: val(*)
|
||||||
|
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||||
|
end function writeHllDeviceFloat
|
||||||
|
|
||||||
|
function writeHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='writeHllDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_double) :: val(*)
|
||||||
|
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||||
|
end function writeHllDeviceDouble
|
||||||
|
|
||||||
|
function writeHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='writeHllDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
complex(c_float_complex) :: val(*)
|
||||||
|
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||||
|
end function writeHllDeviceFloatComplex
|
||||||
|
|
||||||
|
function writeHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='writeHllDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
complex(c_double_complex) :: val(*)
|
||||||
|
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||||
|
end function writeHllDeviceDoubleComplex
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface readHllDevice
|
||||||
|
|
||||||
|
function readHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='readHllDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_float) :: val(*)
|
||||||
|
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||||
|
end function readHllDeviceFloat
|
||||||
|
|
||||||
|
function readHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='readHllDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_double) :: val(*)
|
||||||
|
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||||
|
end function readHllDeviceDouble
|
||||||
|
|
||||||
|
function readHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='readHllDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
complex(c_float_complex) :: val(*)
|
||||||
|
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||||
|
end function readHllDeviceFloatComplex
|
||||||
|
|
||||||
|
function readHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) &
|
||||||
|
& result(res) bind(c,name='readHllDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
complex(c_double_complex) :: val(*)
|
||||||
|
integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*)
|
||||||
|
end function readHllDeviceDoubleComplex
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
interface
|
||||||
|
subroutine freeHllDevice(deviceMat) &
|
||||||
|
& bind(c,name='freeHllDevice')
|
||||||
|
use iso_c_binding
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
end subroutine freeHllDevice
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
interface psi_CopyCooToHlg
|
||||||
|
function psiCopyCooToHlgFloat(nr, nc, nza, hacksz, noffs, isz, irn, &
|
||||||
|
& hoffs, idisp, ja, val, deviceMat) &
|
||||||
|
& result(res) bind(c,name='psiCopyCooToHlgFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_float) :: val(*)
|
||||||
|
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
|
||||||
|
end function psiCopyCooToHlgFloat
|
||||||
|
function psiCopyCooToHlgDouble(nr, nc, nza, hacksz, noffs, isz, irn, &
|
||||||
|
& hoffs, idisp, ja, val, deviceMat) &
|
||||||
|
& result(res) bind(c,name='psiCopyCooToHlgDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
real(c_double) :: val(*)
|
||||||
|
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
|
||||||
|
end function psiCopyCooToHlgDouble
|
||||||
|
function psiCopyCooToHlgFloatComplex(nr, nc, nza, hacksz, noffs, isz, irn, &
|
||||||
|
& hoffs, idisp, ja, val, deviceMat) &
|
||||||
|
& result(res) bind(c,name='psiCopyCooToHlgFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
complex(c_float_complex) :: val(*)
|
||||||
|
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
|
||||||
|
end function psiCopyCooToHlgFloatComplex
|
||||||
|
function psiCopyCooToHlgDoubleComplex(nr, nc, nza, hacksz, noffs, isz, irn, &
|
||||||
|
& hoffs, idisp, ja, val, deviceMat) &
|
||||||
|
& result(res) bind(c,name='psiCopyCooToHlgDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz
|
||||||
|
type(c_ptr), value :: deviceMat
|
||||||
|
complex(c_double_complex) :: val(*)
|
||||||
|
integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*)
|
||||||
|
end function psiCopyCooToHlgDoubleComplex
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
!interface
|
||||||
|
! function getHllDevicePitch(deviceMat) &
|
||||||
|
! & bind(c,name='getHllDevicePitch') result(res)
|
||||||
|
! use iso_c_binding
|
||||||
|
! type(c_ptr), value :: deviceMat
|
||||||
|
! integer(c_int) :: res
|
||||||
|
! end function getHllDevicePitch
|
||||||
|
!end interface
|
||||||
|
|
||||||
|
!interface
|
||||||
|
! function getHllDeviceMaxRowSize(deviceMat) &
|
||||||
|
! & bind(c,name='getHllDeviceMaxRowSize') result(res)
|
||||||
|
! use iso_c_binding
|
||||||
|
! type(c_ptr), value :: deviceMat
|
||||||
|
! integer(c_int) :: res
|
||||||
|
! end function getHllDeviceMaxRowSize
|
||||||
|
!end interface
|
||||||
|
|
||||||
|
interface spmvHllDevice
|
||||||
|
|
||||||
|
function spmvHllDeviceFloat(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvHllDeviceFloat')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
real(c_float),value :: alpha, beta
|
||||||
|
end function spmvHllDeviceFloat
|
||||||
|
|
||||||
|
function spmvHllDeviceDouble(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvHllDeviceDouble')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
real(c_double),value :: alpha, beta
|
||||||
|
end function spmvHllDeviceDouble
|
||||||
|
|
||||||
|
function spmvHllDeviceFloatComplex(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvHllDeviceFloatComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
complex(c_float_complex),value :: alpha, beta
|
||||||
|
end function spmvHllDeviceFloatComplex
|
||||||
|
|
||||||
|
function spmvHllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) &
|
||||||
|
& result(res) bind(c,name='spmvHllDeviceDoubleComplex')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int) :: res
|
||||||
|
type(c_ptr), value :: deviceMat, x, y
|
||||||
|
complex(c_double_complex),value :: alpha, beta
|
||||||
|
end function spmvHllDeviceDoubleComplex
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
end module hlldev_mod
|
@ -0,0 +1,297 @@
|
|||||||
|
include ../../Make.inc
|
||||||
|
LIBDIR=../../lib
|
||||||
|
INCDIR=../../include
|
||||||
|
MODDIR=../../modules
|
||||||
|
PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base
|
||||||
|
#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
|
||||||
|
LDLIBS=$(PSBLDLIBS)
|
||||||
|
#
|
||||||
|
# Compilers and such
|
||||||
|
#
|
||||||
|
#CCOPT= -g
|
||||||
|
FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG)..
|
||||||
|
CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR)
|
||||||
|
LIBNAME=libpsb_gpu.a
|
||||||
|
CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES)
|
||||||
|
CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES)
|
||||||
|
FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES)
|
||||||
|
|
||||||
|
OBJS= \
|
||||||
|
psb_d_cuda_cp_csrg_from_coo.o \
|
||||||
|
psb_d_cuda_cp_csrg_from_fmt.o \
|
||||||
|
psb_d_cuda_cp_elg_from_coo.o \
|
||||||
|
psb_d_cuda_cp_elg_from_fmt.o \
|
||||||
|
psb_s_cuda_cp_csrg_from_coo.o \
|
||||||
|
psb_s_cuda_cp_csrg_from_fmt.o \
|
||||||
|
psb_s_cuda_csrg_allocate_mnnz.o \
|
||||||
|
psb_s_cuda_csrg_csmm.o \
|
||||||
|
psb_s_cuda_csrg_csmv.o \
|
||||||
|
psb_s_cuda_csrg_mold.o \
|
||||||
|
psb_s_cuda_csrg_reallocate_nz.o \
|
||||||
|
psb_s_cuda_csrg_scal.o \
|
||||||
|
psb_s_cuda_csrg_scals.o \
|
||||||
|
psb_s_cuda_csrg_from_gpu.o \
|
||||||
|
psb_s_cuda_csrg_to_gpu.o \
|
||||||
|
psb_s_cuda_csrg_vect_mv.o \
|
||||||
|
psb_s_cuda_csrg_inner_vect_sv.o \
|
||||||
|
psb_d_cuda_csrg_allocate_mnnz.o \
|
||||||
|
psb_d_cuda_csrg_csmm.o \
|
||||||
|
psb_d_cuda_csrg_csmv.o \
|
||||||
|
psb_d_cuda_csrg_mold.o \
|
||||||
|
psb_d_cuda_csrg_reallocate_nz.o \
|
||||||
|
psb_d_cuda_csrg_scal.o \
|
||||||
|
psb_d_cuda_csrg_scals.o \
|
||||||
|
psb_d_cuda_csrg_from_gpu.o \
|
||||||
|
psb_d_cuda_csrg_to_gpu.o \
|
||||||
|
psb_d_cuda_csrg_vect_mv.o \
|
||||||
|
psb_d_cuda_csrg_inner_vect_sv.o \
|
||||||
|
psb_d_cuda_elg_allocate_mnnz.o \
|
||||||
|
psb_d_cuda_elg_asb.o \
|
||||||
|
psb_d_cuda_elg_csmm.o \
|
||||||
|
psb_d_cuda_elg_csmv.o \
|
||||||
|
psb_d_cuda_elg_csput.o \
|
||||||
|
psb_d_cuda_elg_from_gpu.o \
|
||||||
|
psb_d_cuda_elg_inner_vect_sv.o \
|
||||||
|
psb_d_cuda_elg_mold.o \
|
||||||
|
psb_d_cuda_elg_reallocate_nz.o \
|
||||||
|
psb_d_cuda_elg_scal.o \
|
||||||
|
psb_d_cuda_elg_scals.o \
|
||||||
|
psb_d_cuda_elg_to_gpu.o \
|
||||||
|
psb_d_cuda_elg_vect_mv.o \
|
||||||
|
psb_d_cuda_mv_csrg_from_coo.o \
|
||||||
|
psb_d_cuda_mv_csrg_from_fmt.o \
|
||||||
|
psb_d_cuda_mv_elg_from_coo.o \
|
||||||
|
psb_d_cuda_mv_elg_from_fmt.o \
|
||||||
|
psb_s_cuda_mv_csrg_from_coo.o \
|
||||||
|
psb_s_cuda_mv_csrg_from_fmt.o \
|
||||||
|
psb_s_cuda_cp_elg_from_coo.o \
|
||||||
|
psb_s_cuda_cp_elg_from_fmt.o \
|
||||||
|
psb_s_cuda_elg_allocate_mnnz.o \
|
||||||
|
psb_s_cuda_elg_asb.o \
|
||||||
|
psb_s_cuda_elg_csmm.o \
|
||||||
|
psb_s_cuda_elg_csmv.o \
|
||||||
|
psb_s_cuda_elg_csput.o \
|
||||||
|
psb_s_cuda_elg_inner_vect_sv.o \
|
||||||
|
psb_s_cuda_elg_mold.o \
|
||||||
|
psb_s_cuda_elg_reallocate_nz.o \
|
||||||
|
psb_s_cuda_elg_scal.o \
|
||||||
|
psb_s_cuda_elg_scals.o \
|
||||||
|
psb_s_cuda_elg_to_gpu.o \
|
||||||
|
psb_s_cuda_elg_from_gpu.o \
|
||||||
|
psb_s_cuda_elg_vect_mv.o \
|
||||||
|
psb_s_cuda_mv_elg_from_coo.o \
|
||||||
|
psb_s_cuda_mv_elg_from_fmt.o \
|
||||||
|
psb_s_cuda_cp_hlg_from_fmt.o \
|
||||||
|
psb_s_cuda_cp_hlg_from_coo.o \
|
||||||
|
psb_d_cuda_cp_hlg_from_fmt.o \
|
||||||
|
psb_d_cuda_cp_hlg_from_coo.o \
|
||||||
|
psb_d_cuda_hlg_allocate_mnnz.o \
|
||||||
|
psb_d_cuda_hlg_csmm.o \
|
||||||
|
psb_d_cuda_hlg_csmv.o \
|
||||||
|
psb_d_cuda_hlg_inner_vect_sv.o \
|
||||||
|
psb_d_cuda_hlg_mold.o \
|
||||||
|
psb_d_cuda_hlg_reallocate_nz.o \
|
||||||
|
psb_d_cuda_hlg_scal.o \
|
||||||
|
psb_d_cuda_hlg_scals.o \
|
||||||
|
psb_d_cuda_hlg_from_gpu.o \
|
||||||
|
psb_d_cuda_hlg_to_gpu.o \
|
||||||
|
psb_d_cuda_hlg_vect_mv.o \
|
||||||
|
psb_s_cuda_hlg_allocate_mnnz.o \
|
||||||
|
psb_s_cuda_hlg_csmm.o \
|
||||||
|
psb_s_cuda_hlg_csmv.o \
|
||||||
|
psb_s_cuda_hlg_inner_vect_sv.o \
|
||||||
|
psb_s_cuda_hlg_mold.o \
|
||||||
|
psb_s_cuda_hlg_reallocate_nz.o \
|
||||||
|
psb_s_cuda_hlg_scal.o \
|
||||||
|
psb_s_cuda_hlg_scals.o \
|
||||||
|
psb_s_cuda_hlg_from_gpu.o \
|
||||||
|
psb_s_cuda_hlg_to_gpu.o \
|
||||||
|
psb_s_cuda_hlg_vect_mv.o \
|
||||||
|
psb_s_cuda_mv_hlg_from_coo.o \
|
||||||
|
psb_s_cuda_cp_hlg_from_coo.o \
|
||||||
|
psb_s_cuda_mv_hlg_from_fmt.o \
|
||||||
|
psb_d_cuda_mv_hlg_from_coo.o \
|
||||||
|
psb_d_cuda_cp_hlg_from_coo.o \
|
||||||
|
psb_d_cuda_mv_hlg_from_fmt.o \
|
||||||
|
psb_s_cuda_hybg_allocate_mnnz.o \
|
||||||
|
psb_s_cuda_hybg_csmm.o \
|
||||||
|
psb_s_cuda_hybg_csmv.o \
|
||||||
|
psb_s_cuda_hybg_reallocate_nz.o \
|
||||||
|
psb_s_cuda_hybg_scal.o \
|
||||||
|
psb_s_cuda_hybg_scals.o \
|
||||||
|
psb_s_cuda_hybg_to_gpu.o \
|
||||||
|
psb_s_cuda_hybg_vect_mv.o \
|
||||||
|
psb_s_cuda_hybg_inner_vect_sv.o \
|
||||||
|
psb_s_cuda_cp_hybg_from_coo.o \
|
||||||
|
psb_s_cuda_cp_hybg_from_fmt.o \
|
||||||
|
psb_s_cuda_mv_hybg_from_fmt.o \
|
||||||
|
psb_s_cuda_mv_hybg_from_coo.o \
|
||||||
|
psb_s_cuda_hybg_mold.o \
|
||||||
|
psb_d_cuda_hybg_allocate_mnnz.o \
|
||||||
|
psb_d_cuda_hybg_csmm.o \
|
||||||
|
psb_d_cuda_hybg_csmv.o \
|
||||||
|
psb_d_cuda_hybg_reallocate_nz.o \
|
||||||
|
psb_d_cuda_hybg_scal.o \
|
||||||
|
psb_d_cuda_hybg_scals.o \
|
||||||
|
psb_d_cuda_hybg_to_gpu.o \
|
||||||
|
psb_d_cuda_hybg_vect_mv.o \
|
||||||
|
psb_d_cuda_hybg_inner_vect_sv.o \
|
||||||
|
psb_d_cuda_cp_hybg_from_coo.o \
|
||||||
|
psb_d_cuda_cp_hybg_from_fmt.o \
|
||||||
|
psb_d_cuda_mv_hybg_from_fmt.o \
|
||||||
|
psb_d_cuda_mv_hybg_from_coo.o \
|
||||||
|
psb_d_cuda_hybg_mold.o \
|
||||||
|
psb_z_cuda_cp_csrg_from_coo.o \
|
||||||
|
psb_z_cuda_cp_csrg_from_fmt.o \
|
||||||
|
psb_z_cuda_cp_elg_from_coo.o \
|
||||||
|
psb_z_cuda_cp_elg_from_fmt.o \
|
||||||
|
psb_c_cuda_cp_csrg_from_coo.o \
|
||||||
|
psb_c_cuda_cp_csrg_from_fmt.o \
|
||||||
|
psb_c_cuda_csrg_allocate_mnnz.o \
|
||||||
|
psb_c_cuda_csrg_csmm.o \
|
||||||
|
psb_c_cuda_csrg_csmv.o \
|
||||||
|
psb_c_cuda_csrg_mold.o \
|
||||||
|
psb_c_cuda_csrg_reallocate_nz.o \
|
||||||
|
psb_c_cuda_csrg_scal.o \
|
||||||
|
psb_c_cuda_csrg_scals.o \
|
||||||
|
psb_c_cuda_csrg_from_gpu.o \
|
||||||
|
psb_c_cuda_csrg_to_gpu.o \
|
||||||
|
psb_c_cuda_csrg_vect_mv.o \
|
||||||
|
psb_c_cuda_csrg_inner_vect_sv.o \
|
||||||
|
psb_z_cuda_csrg_allocate_mnnz.o \
|
||||||
|
psb_z_cuda_csrg_csmm.o \
|
||||||
|
psb_z_cuda_csrg_csmv.o \
|
||||||
|
psb_z_cuda_csrg_mold.o \
|
||||||
|
psb_z_cuda_csrg_reallocate_nz.o \
|
||||||
|
psb_z_cuda_csrg_scal.o \
|
||||||
|
psb_z_cuda_csrg_scals.o \
|
||||||
|
psb_z_cuda_csrg_from_gpu.o \
|
||||||
|
psb_z_cuda_csrg_to_gpu.o \
|
||||||
|
psb_z_cuda_csrg_vect_mv.o \
|
||||||
|
psb_z_cuda_csrg_inner_vect_sv.o \
|
||||||
|
psb_z_cuda_elg_allocate_mnnz.o \
|
||||||
|
psb_z_cuda_elg_asb.o \
|
||||||
|
psb_z_cuda_elg_csmm.o \
|
||||||
|
psb_z_cuda_elg_csmv.o \
|
||||||
|
psb_z_cuda_elg_csput.o \
|
||||||
|
psb_z_cuda_elg_inner_vect_sv.o \
|
||||||
|
psb_z_cuda_elg_mold.o \
|
||||||
|
psb_z_cuda_elg_reallocate_nz.o \
|
||||||
|
psb_z_cuda_elg_scal.o \
|
||||||
|
psb_z_cuda_elg_scals.o \
|
||||||
|
psb_z_cuda_elg_to_gpu.o \
|
||||||
|
psb_z_cuda_elg_from_gpu.o \
|
||||||
|
psb_z_cuda_elg_vect_mv.o \
|
||||||
|
psb_z_cuda_mv_csrg_from_coo.o \
|
||||||
|
psb_z_cuda_mv_csrg_from_fmt.o \
|
||||||
|
psb_z_cuda_mv_elg_from_coo.o \
|
||||||
|
psb_z_cuda_mv_elg_from_fmt.o \
|
||||||
|
psb_c_cuda_mv_csrg_from_coo.o \
|
||||||
|
psb_c_cuda_mv_csrg_from_fmt.o \
|
||||||
|
psb_c_cuda_cp_elg_from_coo.o \
|
||||||
|
psb_c_cuda_cp_elg_from_fmt.o \
|
||||||
|
psb_c_cuda_elg_allocate_mnnz.o \
|
||||||
|
psb_c_cuda_elg_asb.o \
|
||||||
|
psb_c_cuda_elg_csmm.o \
|
||||||
|
psb_c_cuda_elg_csmv.o \
|
||||||
|
psb_c_cuda_elg_csput.o \
|
||||||
|
psb_c_cuda_elg_inner_vect_sv.o \
|
||||||
|
psb_c_cuda_elg_mold.o \
|
||||||
|
psb_c_cuda_elg_reallocate_nz.o \
|
||||||
|
psb_c_cuda_elg_scal.o \
|
||||||
|
psb_c_cuda_elg_scals.o \
|
||||||
|
psb_c_cuda_elg_to_gpu.o \
|
||||||
|
psb_c_cuda_elg_from_gpu.o \
|
||||||
|
psb_c_cuda_elg_vect_mv.o \
|
||||||
|
psb_c_cuda_mv_elg_from_coo.o \
|
||||||
|
psb_c_cuda_mv_elg_from_fmt.o \
|
||||||
|
psb_c_cuda_cp_hlg_from_fmt.o \
|
||||||
|
psb_c_cuda_cp_hlg_from_coo.o \
|
||||||
|
psb_z_cuda_cp_hlg_from_fmt.o \
|
||||||
|
psb_z_cuda_cp_hlg_from_coo.o \
|
||||||
|
psb_z_cuda_hlg_allocate_mnnz.o \
|
||||||
|
psb_z_cuda_hlg_csmm.o \
|
||||||
|
psb_z_cuda_hlg_csmv.o \
|
||||||
|
psb_z_cuda_hlg_inner_vect_sv.o \
|
||||||
|
psb_z_cuda_hlg_mold.o \
|
||||||
|
psb_z_cuda_hlg_reallocate_nz.o \
|
||||||
|
psb_z_cuda_hlg_scal.o \
|
||||||
|
psb_z_cuda_hlg_scals.o \
|
||||||
|
psb_z_cuda_hlg_from_gpu.o \
|
||||||
|
psb_z_cuda_hlg_to_gpu.o \
|
||||||
|
psb_z_cuda_hlg_vect_mv.o \
|
||||||
|
psb_c_cuda_hlg_allocate_mnnz.o \
|
||||||
|
psb_c_cuda_hlg_csmm.o \
|
||||||
|
psb_c_cuda_hlg_csmv.o \
|
||||||
|
psb_c_cuda_hlg_inner_vect_sv.o \
|
||||||
|
psb_c_cuda_hlg_mold.o \
|
||||||
|
psb_c_cuda_hlg_reallocate_nz.o \
|
||||||
|
psb_c_cuda_hlg_scal.o \
|
||||||
|
psb_c_cuda_hlg_scals.o \
|
||||||
|
psb_c_cuda_hlg_from_gpu.o \
|
||||||
|
psb_c_cuda_hlg_to_gpu.o \
|
||||||
|
psb_c_cuda_hlg_vect_mv.o \
|
||||||
|
psb_c_cuda_mv_hlg_from_coo.o \
|
||||||
|
psb_c_cuda_cp_hlg_from_coo.o \
|
||||||
|
psb_c_cuda_mv_hlg_from_fmt.o \
|
||||||
|
psb_z_cuda_mv_hlg_from_coo.o \
|
||||||
|
psb_z_cuda_cp_hlg_from_coo.o \
|
||||||
|
psb_z_cuda_mv_hlg_from_fmt.o \
|
||||||
|
psb_c_cuda_hybg_allocate_mnnz.o \
|
||||||
|
psb_c_cuda_hybg_csmm.o \
|
||||||
|
psb_c_cuda_hybg_csmv.o \
|
||||||
|
psb_c_cuda_hybg_reallocate_nz.o \
|
||||||
|
psb_c_cuda_hybg_scal.o \
|
||||||
|
psb_c_cuda_hybg_scals.o \
|
||||||
|
psb_c_cuda_hybg_to_gpu.o \
|
||||||
|
psb_c_cuda_hybg_vect_mv.o \
|
||||||
|
psb_c_cuda_hybg_inner_vect_sv.o \
|
||||||
|
psb_c_cuda_cp_hybg_from_coo.o \
|
||||||
|
psb_c_cuda_cp_hybg_from_fmt.o \
|
||||||
|
psb_c_cuda_mv_hybg_from_fmt.o \
|
||||||
|
psb_c_cuda_mv_hybg_from_coo.o \
|
||||||
|
psb_c_cuda_hybg_mold.o \
|
||||||
|
psb_z_cuda_hybg_allocate_mnnz.o \
|
||||||
|
psb_z_cuda_hybg_csmm.o \
|
||||||
|
psb_z_cuda_hybg_csmv.o \
|
||||||
|
psb_z_cuda_hybg_reallocate_nz.o \
|
||||||
|
psb_z_cuda_hybg_scal.o \
|
||||||
|
psb_z_cuda_hybg_scals.o \
|
||||||
|
psb_z_cuda_hybg_to_gpu.o \
|
||||||
|
psb_z_cuda_hybg_vect_mv.o \
|
||||||
|
psb_z_cuda_hybg_inner_vect_sv.o \
|
||||||
|
psb_z_cuda_cp_hybg_from_coo.o \
|
||||||
|
psb_z_cuda_cp_hybg_from_fmt.o \
|
||||||
|
psb_z_cuda_mv_hybg_from_fmt.o \
|
||||||
|
psb_z_cuda_mv_hybg_from_coo.o \
|
||||||
|
psb_z_cuda_hybg_mold.o \
|
||||||
|
psb_d_cuda_cp_diag_from_coo.o \
|
||||||
|
psb_d_cuda_mv_diag_from_coo.o \
|
||||||
|
psb_d_cuda_diag_to_gpu.o \
|
||||||
|
psb_d_cuda_diag_csmv.o \
|
||||||
|
psb_d_cuda_diag_mold.o \
|
||||||
|
psb_d_cuda_diag_vect_mv.o \
|
||||||
|
psb_d_cuda_cp_hdiag_from_coo.o \
|
||||||
|
psb_d_cuda_mv_hdiag_from_coo.o \
|
||||||
|
psb_d_cuda_hdiag_to_gpu.o \
|
||||||
|
psb_d_cuda_hdiag_csmv.o \
|
||||||
|
psb_d_cuda_hdiag_mold.o \
|
||||||
|
psb_d_cuda_hdiag_vect_mv.o \
|
||||||
|
psb_s_cuda_cp_hdiag_from_coo.o \
|
||||||
|
psb_s_cuda_mv_hdiag_from_coo.o \
|
||||||
|
psb_s_cuda_hdiag_to_gpu.o \
|
||||||
|
psb_s_cuda_hdiag_csmv.o \
|
||||||
|
psb_s_cuda_hdiag_mold.o \
|
||||||
|
psb_s_cuda_hdiag_vect_mv.o \
|
||||||
|
psb_s_cuda_dnsg_mat_impl.o \
|
||||||
|
psb_d_cuda_dnsg_mat_impl.o \
|
||||||
|
psb_c_cuda_dnsg_mat_impl.o \
|
||||||
|
psb_z_cuda_dnsg_mat_impl.o
|
||||||
|
|
||||||
|
|
||||||
|
objs: $(OBJS)
|
||||||
|
lib: objs
|
||||||
|
ar cur ../$(LIBNAME) $(OBJS)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
/bin/rm -f $(OBJS)
|
@ -0,0 +1,56 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_coo
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
call a%psb_c_csr_sparse_mat%cp_from_coo(b,info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call a%to_gpu(info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 continue
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_csrg_from_coo
|
@ -0,0 +1,55 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_fmt
|
||||||
|
!use iso_c_binding
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
select type(b)
|
||||||
|
type is (psb_c_coo_sparse_mat)
|
||||||
|
call a%cp_from_coo(b,info)
|
||||||
|
class default
|
||||||
|
call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info)
|
||||||
|
if (info /= 0) return
|
||||||
|
call a%to_gpu(info)
|
||||||
|
end select
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_csrg_from_fmt
|
@ -0,0 +1,58 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_cp_diag_from_coo(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use diagdev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_cp_diag_from_coo
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
!locals
|
||||||
|
info = psb_success_
|
||||||
|
call a%psb_c_dia_sparse_mat%cp_from_coo(b,info)
|
||||||
|
|
||||||
|
call a%to_gpu(info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 continue
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_diag_from_coo
|
@ -0,0 +1,161 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
subroutine psb_c_cuda_cp_elg_from_coo(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_coo
|
||||||
|
use psi_ext_util_mod
|
||||||
|
use psb_cuda_env_mod
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
!locals
|
||||||
|
Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, &
|
||||||
|
& ir, ic, ld, ldv, hacksize
|
||||||
|
integer(psb_ipk_) :: debug_level, debug_unit
|
||||||
|
character(len=20) :: name
|
||||||
|
type(psb_c_coo_sparse_mat) :: tmp
|
||||||
|
integer(psb_ipk_), allocatable :: idisp(:)
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
hacksize = max(1,psb_cuda_WarpSize())
|
||||||
|
if (b%is_dev()) call b%sync()
|
||||||
|
|
||||||
|
if (b%is_by_rows()) then
|
||||||
|
|
||||||
|
call psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize)
|
||||||
|
|
||||||
|
|
||||||
|
if (c_associated(a%deviceMat)) then
|
||||||
|
call freeEllDevice(a%deviceMat)
|
||||||
|
endif
|
||||||
|
|
||||||
|
nr = b%get_nrows()
|
||||||
|
nc = b%get_ncols()
|
||||||
|
nza = b%get_nzeros()
|
||||||
|
info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1)
|
||||||
|
|
||||||
|
if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, &
|
||||||
|
& a%irn,idisp,b%ja,b%val, a%deviceMat)
|
||||||
|
call a%set_dev()
|
||||||
|
else
|
||||||
|
call b%cp_to_coo(tmp,info)
|
||||||
|
call psi_c_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize)
|
||||||
|
|
||||||
|
|
||||||
|
if (c_associated(a%deviceMat)) then
|
||||||
|
call freeEllDevice(a%deviceMat)
|
||||||
|
endif
|
||||||
|
|
||||||
|
nr = b%get_nrows()
|
||||||
|
nc = b%get_ncols()
|
||||||
|
nza = b%get_nzeros()
|
||||||
|
info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1)
|
||||||
|
|
||||||
|
if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, &
|
||||||
|
& a%irn,idisp,tmp%ja,tmp%val, a%deviceMat)
|
||||||
|
|
||||||
|
call a%set_dev()
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (info /= psb_success_) goto 9999
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 continue
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
return
|
||||||
|
|
||||||
|
contains
|
||||||
|
|
||||||
|
subroutine psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use psi_ext_util_mod
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_ell_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), allocatable, intent(out) :: idisp(:)
|
||||||
|
integer(psb_ipk_), intent(out) :: info, nzm, ldv
|
||||||
|
integer(psb_ipk_), intent(in), optional :: hacksize
|
||||||
|
|
||||||
|
!locals
|
||||||
|
Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, &
|
||||||
|
& ir, ic, hsz_
|
||||||
|
real(psb_dpk_) :: t0,t1
|
||||||
|
logical, parameter :: timing=.true.
|
||||||
|
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
nr = b%get_nrows()
|
||||||
|
nc = b%get_ncols()
|
||||||
|
nza = b%get_nzeros()
|
||||||
|
|
||||||
|
hsz_ = 1
|
||||||
|
if (present(hacksize)) then
|
||||||
|
if (hacksize> 1) hsz_ = hacksize
|
||||||
|
end if
|
||||||
|
! Make ldv a multiple of hacksize
|
||||||
|
ldv = ((nr+hsz_-1)/hsz_)*hsz_
|
||||||
|
|
||||||
|
! If it is sorted then we can lessen memory impact
|
||||||
|
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
||||||
|
|
||||||
|
! First compute the number of nonzeros in each row.
|
||||||
|
call psb_realloc(nr,a%irn,info)
|
||||||
|
if (info == psb_success_) call psb_realloc(nr+1,idisp,info)
|
||||||
|
if (info /= psb_success_) return
|
||||||
|
if (timing) t0=psb_wtime()
|
||||||
|
|
||||||
|
a%irn = 0
|
||||||
|
do i=1, nza
|
||||||
|
ir = b%ia(i)
|
||||||
|
a%irn(ir) = a%irn(ir) + 1
|
||||||
|
end do
|
||||||
|
nzm = 0
|
||||||
|
a%nzt = 0
|
||||||
|
idisp(1) = 0
|
||||||
|
do i=1,nr
|
||||||
|
nzm = max(nzm,a%irn(i))
|
||||||
|
a%nzt = a%nzt + a%irn(i)
|
||||||
|
idisp(i+1) = a%nzt
|
||||||
|
end do
|
||||||
|
|
||||||
|
end subroutine psi_c_count_ell_from_coo
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_elg_from_coo
|
@ -0,0 +1,89 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_fmt
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
!locals
|
||||||
|
type(psb_c_coo_sparse_mat) :: tmp
|
||||||
|
Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m
|
||||||
|
integer(psb_ipk_) :: debug_level, debug_unit
|
||||||
|
character(len=20) :: name
|
||||||
|
type(elldev_parms) :: gpu_parms
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
if (b%is_dev()) call b%sync()
|
||||||
|
|
||||||
|
select type (b)
|
||||||
|
type is (psb_c_coo_sparse_mat)
|
||||||
|
call a%cp_from_coo(b,info)
|
||||||
|
|
||||||
|
class is (psb_c_ell_sparse_mat)
|
||||||
|
nzm = psb_size(b%ja,2)
|
||||||
|
m = b%get_nrows()
|
||||||
|
nc = b%get_ncols()
|
||||||
|
nza = b%get_nzeros()
|
||||||
|
gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1)
|
||||||
|
ld = gpu_parms%pitch
|
||||||
|
nzm = gpu_parms%maxRowSize
|
||||||
|
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
||||||
|
if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info)
|
||||||
|
if (info == 0) call psb_safe_cpy( b%irn, a%irn , info)
|
||||||
|
if (info == 0) call psb_safe_cpy( b%ja , a%ja , info)
|
||||||
|
if (info == 0) call psb_safe_cpy( b%val, a%val , info)
|
||||||
|
if (info == 0) call psb_realloc(ld,nzm,a%ja,info)
|
||||||
|
if (info == 0) then
|
||||||
|
a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm)
|
||||||
|
end if
|
||||||
|
if (info == 0) call psb_realloc(ld,nzm,a%val,info)
|
||||||
|
if (info == 0) then
|
||||||
|
a%val(1:m,1:nzm) = b%val(1:m,1:nzm)
|
||||||
|
end if
|
||||||
|
a%nzt = nza
|
||||||
|
call a%to_gpu(info)
|
||||||
|
|
||||||
|
class default
|
||||||
|
|
||||||
|
call b%cp_to_coo(tmp,info)
|
||||||
|
if (info == psb_success_) call a%mv_from_coo(tmp,info)
|
||||||
|
end select
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_elg_from_fmt
|
@ -0,0 +1,63 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use hdiagdev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_cp_hdiag_from_coo
|
||||||
|
use psb_cuda_env_mod
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
!locals
|
||||||
|
integer(psb_ipk_) :: debug_level, debug_unit
|
||||||
|
character(len=20) :: name
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
a%hacksize = psb_cuda_WarpSize()
|
||||||
|
|
||||||
|
call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info)
|
||||||
|
|
||||||
|
call a%to_gpu(info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 continue
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_hdiag_from_coo
|
@ -0,0 +1,190 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use hlldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_cuda_env_mod
|
||||||
|
use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_coo
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
!locals
|
||||||
|
type(psb_c_coo_sparse_mat) :: tmp
|
||||||
|
integer(psb_ipk_) :: debug_level, debug_unit, hksz
|
||||||
|
integer(psb_ipk_), allocatable :: idisp(:)
|
||||||
|
character(len=20) :: name='hll_from_coo'
|
||||||
|
Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs
|
||||||
|
integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc
|
||||||
|
integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:)
|
||||||
|
real(psb_dpk_), allocatable :: val(:)
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
debug_unit = psb_get_debug_unit()
|
||||||
|
debug_level = psb_get_debug_level()
|
||||||
|
hksz = max(1,psb_cuda_WarpSize())
|
||||||
|
|
||||||
|
if (b%is_by_rows()) then
|
||||||
|
|
||||||
|
nr = b%get_nrows()
|
||||||
|
nc = b%get_ncols()
|
||||||
|
nza = b%get_nzeros()
|
||||||
|
if (debug) write(0,*) 'Copying through GPU',nza
|
||||||
|
call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info)
|
||||||
|
if (info /=0) then
|
||||||
|
write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz
|
||||||
|
return
|
||||||
|
end if
|
||||||
|
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
|
||||||
|
|
||||||
|
if (c_associated(a%deviceMat)) then
|
||||||
|
call freeHllDevice(a%deviceMat)
|
||||||
|
endif
|
||||||
|
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
|
||||||
|
if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,&
|
||||||
|
& a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat)
|
||||||
|
call a%set_dev()
|
||||||
|
else
|
||||||
|
! This is to guarantee tmp%is_by_rows()
|
||||||
|
call b%cp_to_coo(tmp,info)
|
||||||
|
call tmp%fix(info)
|
||||||
|
|
||||||
|
nr = tmp%get_nrows()
|
||||||
|
nc = tmp%get_ncols()
|
||||||
|
nza = tmp%get_nzeros()
|
||||||
|
if (debug) write(0,*) 'Copying through GPU'
|
||||||
|
call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info)
|
||||||
|
if (info /=0) then
|
||||||
|
write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz
|
||||||
|
return
|
||||||
|
end if
|
||||||
|
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
|
||||||
|
|
||||||
|
if (c_associated(a%deviceMat)) then
|
||||||
|
call freeHllDevice(a%deviceMat)
|
||||||
|
endif
|
||||||
|
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
|
||||||
|
if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,&
|
||||||
|
& a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat)
|
||||||
|
|
||||||
|
call tmp%free()
|
||||||
|
call a%set_dev()
|
||||||
|
end if
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 continue
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
return
|
||||||
|
|
||||||
|
contains
|
||||||
|
subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info)
|
||||||
|
use psb_base_mod
|
||||||
|
use psi_ext_util_mod
|
||||||
|
implicit none
|
||||||
|
class(psb_c_hll_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), allocatable, intent(out) :: idisp(:)
|
||||||
|
integer(psb_ipk_), intent(in) :: hksz
|
||||||
|
integer(psb_ipk_), intent(out) :: info, noffs, isz
|
||||||
|
|
||||||
|
!locals
|
||||||
|
Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs
|
||||||
|
integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
nr = b%get_nrows()
|
||||||
|
nc = b%get_ncols()
|
||||||
|
nza = b%get_nzeros()
|
||||||
|
|
||||||
|
! If it is sorted then we can lessen memory impact
|
||||||
|
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
||||||
|
if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza
|
||||||
|
! First compute the number of nonzeros in each row.
|
||||||
|
call psb_realloc(nr,a%irn,info)
|
||||||
|
if (info == 0) call psb_realloc(nr+1,idisp,info)
|
||||||
|
if (info /= 0) return
|
||||||
|
a%irn = 0
|
||||||
|
if (debug) then
|
||||||
|
do i=1, nza
|
||||||
|
if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then
|
||||||
|
a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1
|
||||||
|
else
|
||||||
|
write(0,*) 'Out of bouds IA ',i,b%ia(i),nr
|
||||||
|
end if
|
||||||
|
end do
|
||||||
|
else
|
||||||
|
do i=1, nza
|
||||||
|
a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1
|
||||||
|
end do
|
||||||
|
end if
|
||||||
|
a%nzt = nza
|
||||||
|
|
||||||
|
|
||||||
|
! Second. Figure out the block offsets.
|
||||||
|
call a%set_hksz(hksz)
|
||||||
|
noffs = (nr+hksz-1)/hksz
|
||||||
|
call psb_realloc(noffs+1,a%hkoffs,info)
|
||||||
|
if (debug) write(0,*) ' noffsets ',noffs,info
|
||||||
|
if (info /= 0) return
|
||||||
|
a%hkoffs(1) = 0
|
||||||
|
j=1
|
||||||
|
idisp(1) = 0
|
||||||
|
do i=1,nr,hksz
|
||||||
|
ir = min(hksz,nr-i+1)
|
||||||
|
mxrwl = a%irn(i)
|
||||||
|
idisp(i+1) = idisp(i) + a%irn(i)
|
||||||
|
do k=1,ir-1
|
||||||
|
idisp(i+k+1) = idisp(i+k) + a%irn(i+k)
|
||||||
|
mxrwl = max(mxrwl,a%irn(i+k))
|
||||||
|
end do
|
||||||
|
a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz
|
||||||
|
j = j + 1
|
||||||
|
end do
|
||||||
|
|
||||||
|
!
|
||||||
|
! At this point a%hkoffs(noffs+1) contains the allocation
|
||||||
|
! size a%ja a%val.
|
||||||
|
!
|
||||||
|
isz = a%hkoffs(noffs+1)
|
||||||
|
!!$ write(*,*) 'End of psi_comput_hckoff ',info
|
||||||
|
end subroutine psi_compute_hckoff_from_coo
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_hlg_from_coo
|
@ -0,0 +1,62 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use hlldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_fmt
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
select type(b)
|
||||||
|
type is (psb_c_coo_sparse_mat)
|
||||||
|
call a%cp_from_coo(b,info)
|
||||||
|
class default
|
||||||
|
call a%psb_c_hll_sparse_mat%cp_from_fmt(b,info)
|
||||||
|
if (info == 0) call a%to_gpu(info)
|
||||||
|
end select
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 continue
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_hlg_from_fmt
|
@ -0,0 +1,58 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_coo
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
call a%psb_c_csr_sparse_mat%cp_from_coo(b,info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call a%to_gpu(info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 continue
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_hybg_from_coo
|
||||||
|
#endif
|
@ -0,0 +1,56 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
#if CUDA_SHORT_VERSION <= 10
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_fmt
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(inout) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
select type(b)
|
||||||
|
type is (psb_c_coo_sparse_mat)
|
||||||
|
call a%cp_from_coo(b,info)
|
||||||
|
class default
|
||||||
|
call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info)
|
||||||
|
if (info /= 0) return
|
||||||
|
call a%to_gpu(info)
|
||||||
|
end select
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_hybg_from_fmt
|
||||||
|
#endif
|
@ -0,0 +1,62 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_allocate_mnnz
|
||||||
|
implicit none
|
||||||
|
integer(psb_ipk_), intent(in) :: m,n
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_), intent(in), optional :: nz
|
||||||
|
Integer(Psb_ipk_) :: err_act, info, nz_,ld
|
||||||
|
character(len=20) :: name='allocate_mnz'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
call a%psb_c_csr_sparse_mat%allocate(m,n,nz)
|
||||||
|
|
||||||
|
info = initFcusparse()
|
||||||
|
if (info == 0) call a%to_gpu(info,nzrm=nz)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_csrg_allocate_mnnz
|
@ -0,0 +1,126 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmm
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
|
||||||
|
complex(psb_spk_), intent(inout) :: y(:,:)
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
|
||||||
|
character :: trans_
|
||||||
|
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
|
||||||
|
complex(psb_spk_), allocatable :: acc(:)
|
||||||
|
type(c_ptr) :: gpX, gpY
|
||||||
|
logical :: tra
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='d_csrg_csmm'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
m = a%get_ncols()
|
||||||
|
n = a%get_nrows()
|
||||||
|
else
|
||||||
|
n = a%get_ncols()
|
||||||
|
m = a%get_nrows()
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(x,1)<n) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(y,1)<m) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||||
|
else
|
||||||
|
!
|
||||||
|
! Just to test, move X/Y to/from the GPU.
|
||||||
|
!
|
||||||
|
nxy = min(size(x,2),size(y,2))
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_complex_float)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpX,x,nxy)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_complex_float)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpY,y,nxy)
|
||||||
|
|
||||||
|
if (info == 0) &
|
||||||
|
& info = spmvCSRGDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = readMultiVecDevice(gpY,y,nxy)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call freeMultiVecDevice(gpX)
|
||||||
|
call freeMultiVecDevice(gpY)
|
||||||
|
endif
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_csrg_csmm
|
@ -0,0 +1,131 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmv
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
|
||||||
|
complex(psb_spk_), intent(inout) :: y(:)
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
|
||||||
|
character :: trans_
|
||||||
|
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc
|
||||||
|
complex(psb_spk_) :: acc
|
||||||
|
type(c_ptr) :: gpX
|
||||||
|
type(c_ptr) :: gpY
|
||||||
|
logical :: tra
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_csrg_csmv'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
m = a%get_ncols()
|
||||||
|
n = a%get_nrows()
|
||||||
|
else
|
||||||
|
n = a%get_ncols()
|
||||||
|
m = a%get_nrows()
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(x,1)<n) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(y,1)<m) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||||
|
else
|
||||||
|
!
|
||||||
|
! Just to test, move X/Y to/from the GPU.
|
||||||
|
!
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_complex_float)
|
||||||
|
if (alpha /= dzero) then
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpX,x)
|
||||||
|
end if
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_complex_float)
|
||||||
|
if (beta /= dzero) then
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpY,y)
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (info == 0) &
|
||||||
|
& info = spmvCSRGDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = readMultiVecDevice(gpY,y)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call freeMultiVecDevice(gpX)
|
||||||
|
call freeMultiVecDevice(gpY)
|
||||||
|
endif
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_csrg_csmv
|
@ -0,0 +1,67 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_from_gpu(a,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_from_gpu
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: m, n, nz
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
|
||||||
|
if (.not.(c_associated(a%deviceMat%mat))) then
|
||||||
|
call a%free()
|
||||||
|
return
|
||||||
|
end if
|
||||||
|
|
||||||
|
info = CSRGDeviceGetParms(a%deviceMat,m,n,nz)
|
||||||
|
if (info /= psb_success_) return
|
||||||
|
|
||||||
|
if (info == 0) call psb_realloc(m+1,a%irp,info)
|
||||||
|
if (info == 0) call psb_realloc(nz,a%ja,info)
|
||||||
|
if (info == 0) call psb_realloc(nz,a%val,info)
|
||||||
|
if (info == 0) info = &
|
||||||
|
& CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
|
||||||
|
#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030)
|
||||||
|
a%irp(:) = a%irp(:)+1
|
||||||
|
a%ja(:) = a%ja(:)+1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
call a%set_sync()
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_csrg_from_gpu
|
@ -0,0 +1,125 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_inner_vect_sv
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
|
||||||
|
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||||
|
logical :: tra
|
||||||
|
character :: trans_
|
||||||
|
integer(psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_csrg_inner_vect_sv'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_get_erraction(err_act)
|
||||||
|
! This is the base version. If we get here
|
||||||
|
! it means the derived class is incomplete,
|
||||||
|
! so we throw an error.
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
|
||||||
|
if (tra.or.(beta/=dzero)) then
|
||||||
|
call x%sync()
|
||||||
|
call y%sync()
|
||||||
|
call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans)
|
||||||
|
call y%set_host()
|
||||||
|
else
|
||||||
|
select type (xx => x)
|
||||||
|
type is (psb_c_vect_cuda)
|
||||||
|
select type(yy => y)
|
||||||
|
type is (psb_c_vect_cuda)
|
||||||
|
if (xx%is_host()) call xx%sync()
|
||||||
|
if (beta /= dzero) then
|
||||||
|
if (yy%is_host()) call yy%sync()
|
||||||
|
end if
|
||||||
|
info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||||
|
& beta,yy%deviceVect)
|
||||||
|
if (info /= 0) then
|
||||||
|
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||||
|
& a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||||
|
info = psb_err_from_subroutine_ai_
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
call yy%set_dev()
|
||||||
|
class default
|
||||||
|
rx = xx%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
class default
|
||||||
|
rx = x%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
end if
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
info = psb_err_from_subroutine_
|
||||||
|
call psb_errpush(info,name, a_err='csrg_vect_sv')
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_csrg_inner_vect_sv
|
@ -0,0 +1,65 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_mold(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_mold
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='csrg_mold'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_get_erraction(err_act)
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
if (allocated(b)) then
|
||||||
|
call b%free()
|
||||||
|
deallocate(b,stat=info)
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(psb_c_cuda_csrg_sparse_mat :: b, stat=info)
|
||||||
|
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
call psb_errpush(info, name)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_csrg_mold
|
@ -0,0 +1,64 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_reallocate_nz(nz,a)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_reallocate_nz
|
||||||
|
implicit none
|
||||||
|
integer(psb_ipk_), intent(in) :: nz
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_) :: m, nzrm,ld
|
||||||
|
Integer(Psb_ipk_) :: err_act, info
|
||||||
|
character(len=20) :: name='c_cuda_csrg_reallocate_nz'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
!
|
||||||
|
! What should this really do???
|
||||||
|
!
|
||||||
|
call a%psb_c_csr_sparse_mat%reallocate(nz)
|
||||||
|
|
||||||
|
call a%to_gpu(info,nzrm=nz)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_csrg_reallocate_nz
|
@ -0,0 +1,67 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_scal(d,a,info,side)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scal
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: d(:)
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, intent(in), optional :: side
|
||||||
|
|
||||||
|
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='scal'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
|
||||||
|
call a%psb_c_csr_sparse_mat%scal(d,info,side=side)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
call a%to_gpu(info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_csrg_scal
|
@ -0,0 +1,65 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_scals(d,a,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scals
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: d
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='scal'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
call a%psb_c_csr_sparse_mat%scal(d,info)
|
||||||
|
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
call a%to_gpu(info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_csrg_scals
|
@ -0,0 +1,378 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
integer(psb_ipk_), intent(in), optional :: nzrm
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz
|
||||||
|
integer(psb_ipk_) :: nzdi,i,j,k,nrz
|
||||||
|
integer(psb_ipk_), allocatable :: irpdi(:),jadi(:)
|
||||||
|
complex(psb_spk_), allocatable :: valdi(:)
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
|
||||||
|
if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return
|
||||||
|
|
||||||
|
m = a%get_nrows()
|
||||||
|
n = a%get_ncols()
|
||||||
|
nz = a%get_nzeros()
|
||||||
|
if (c_associated(a%deviceMat%Mat)) then
|
||||||
|
info = CSRGDeviceFree(a%deviceMat)
|
||||||
|
end if
|
||||||
|
#if (CUDA_SHORT_VERSION <= 10 )
|
||||||
|
if (a%is_unit()) then
|
||||||
|
!
|
||||||
|
! CUSPARSE has the habit of storing the diagonal and then ignoring,
|
||||||
|
! whereas we do not store it. Hence this adapter code.
|
||||||
|
!
|
||||||
|
nzdi = nz + m
|
||||||
|
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
|
||||||
|
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
|
||||||
|
if (info == 0) then
|
||||||
|
if (a%is_unit()) then
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
!!! We are explicitly adding the diagonal
|
||||||
|
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
if ((info == 0) .and. a%is_triangle()) then
|
||||||
|
!info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||||
|
if ((info == 0).and.a%is_upper()) then
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
|
||||||
|
if (info == 0) then
|
||||||
|
irpdi(1) = 1
|
||||||
|
if (a%is_triangle().and.a%is_upper()) then
|
||||||
|
do i=1,m
|
||||||
|
j = irpdi(i)
|
||||||
|
jadi(j) = i
|
||||||
|
valdi(j) = cone
|
||||||
|
nrz = a%irp(i+1)-a%irp(i)
|
||||||
|
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||||
|
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||||
|
irpdi(i+1) = j + nrz + 1
|
||||||
|
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||||
|
end do
|
||||||
|
else
|
||||||
|
do i=1,m
|
||||||
|
j = irpdi(i)
|
||||||
|
nrz = a%irp(i+1)-a%irp(i)
|
||||||
|
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||||
|
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||||
|
jadi(j+nrz) = i
|
||||||
|
valdi(j+nrz) = cone
|
||||||
|
irpdi(i+1) = j + nrz + 1
|
||||||
|
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||||
|
end do
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
|
||||||
|
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
|
||||||
|
!!$ if (info == 0) then
|
||||||
|
!!$ if (a%is_unit()) then
|
||||||
|
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||||
|
!!$ else
|
||||||
|
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
!!$ end if
|
||||||
|
!!$ end if
|
||||||
|
if ((info == 0) .and. a%is_triangle()) then
|
||||||
|
!info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||||
|
if ((info == 0).and.a%is_upper()) then
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ((info == 0) .and. a%is_triangle()) then
|
||||||
|
info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
|
||||||
|
end if
|
||||||
|
|
||||||
|
#elif CUDA_VERSION < 11030
|
||||||
|
if (a%is_unit()) then
|
||||||
|
!
|
||||||
|
! CUSPARSE has the habit of storing the diagonal and then ignoring,
|
||||||
|
! whereas we do not store it. Hence this adapter code.
|
||||||
|
!
|
||||||
|
nzdi = nz + m
|
||||||
|
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
|
||||||
|
!!$ write(0,*) 'Done deviceAlloc'
|
||||||
|
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero)
|
||||||
|
!!$ write(0,*) 'Done SetIndexBase'
|
||||||
|
if (info == 0) then
|
||||||
|
if (a%is_unit()) then
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
!!! We are explicitly adding the diagonal
|
||||||
|
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
if ((info == 0) .and. a%is_triangle()) then
|
||||||
|
info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||||
|
if ((info == 0).and.a%is_upper()) then
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info)
|
||||||
|
if (info == 0) then
|
||||||
|
irpdi(1) = 0
|
||||||
|
if (a%is_triangle().and.a%is_upper()) then
|
||||||
|
do i=1,m
|
||||||
|
j = irpdi(i)
|
||||||
|
jadi(j) = i
|
||||||
|
valdi(j) = cone
|
||||||
|
nrz = a%irp(i+1)-a%irp(i)
|
||||||
|
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1
|
||||||
|
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||||
|
irpdi(i+1) = j + nrz + 1
|
||||||
|
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||||
|
end do
|
||||||
|
else
|
||||||
|
do i=1,m
|
||||||
|
j = irpdi(i)
|
||||||
|
nrz = a%irp(i+1)-a%irp(i)
|
||||||
|
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1
|
||||||
|
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||||
|
jadi(j+nrz) = i
|
||||||
|
valdi(j+nrz) = cone
|
||||||
|
irpdi(i+1) = j + nrz + 1
|
||||||
|
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||||
|
end do
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
|
||||||
|
!!$ write(0,*) 'Done deviceAlloc', info
|
||||||
|
if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,&
|
||||||
|
& cusparse_index_base_zero)
|
||||||
|
!!$ write(0,*) 'Done setIndexBase', info
|
||||||
|
if (info == 0) then
|
||||||
|
if (a%is_unit()) then
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if ((info == 0) .and. a%is_triangle()) then
|
||||||
|
info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||||
|
if ((info == 0).and.a%is_upper()) then
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
nzdi=a%irp(m+1)-1
|
||||||
|
if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info)
|
||||||
|
if (info == 0) then
|
||||||
|
irpdi(1:m+1) = a%irp(1:m+1) -1
|
||||||
|
jadi(1:nzdi) = a%ja(1:nzdi) -1
|
||||||
|
end if
|
||||||
|
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val)
|
||||||
|
!!$ write(0,*) 'Done Host2Device', info
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
#elif 0
|
||||||
|
|
||||||
|
if (a%is_unit()) then
|
||||||
|
!
|
||||||
|
! CUSPARSE has the habit of storing the diagonal and then ignoring,
|
||||||
|
! whereas we do not store it. Hence this adapter code.
|
||||||
|
!
|
||||||
|
nzdi = nz + m
|
||||||
|
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
|
||||||
|
if (info == 0) then
|
||||||
|
if (a%is_unit()) then
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
!!! We are explicitly adding the diagonal
|
||||||
|
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
if ((info == 0) .and. a%is_triangle()) then
|
||||||
|
!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||||
|
if ((info == 0).and.a%is_upper()) then
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
|
||||||
|
if (info == 0) then
|
||||||
|
irpdi(1) = 1
|
||||||
|
if (a%is_triangle().and.a%is_upper()) then
|
||||||
|
do i=1,m
|
||||||
|
j = irpdi(i)
|
||||||
|
jadi(j) = i
|
||||||
|
valdi(j) = cone
|
||||||
|
nrz = a%irp(i+1)-a%irp(i)
|
||||||
|
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||||
|
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||||
|
irpdi(i+1) = j + nrz + 1
|
||||||
|
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||||
|
end do
|
||||||
|
else
|
||||||
|
do i=1,m
|
||||||
|
j = irpdi(i)
|
||||||
|
nrz = a%irp(i+1)-a%irp(i)
|
||||||
|
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||||
|
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||||
|
jadi(j+nrz) = i
|
||||||
|
valdi(j+nrz) = cone
|
||||||
|
irpdi(i+1) = j + nrz + 1
|
||||||
|
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||||
|
end do
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
|
||||||
|
!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
|
||||||
|
if (info == 0) then
|
||||||
|
if (a%is_unit()) then
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if ((info == 0) .and. a%is_triangle()) then
|
||||||
|
!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
|
||||||
|
if ((info == 0).and.a%is_upper()) then
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
|
||||||
|
endif
|
||||||
|
|
||||||
|
!!$ if ((info == 0) .and. a%is_triangle()) then
|
||||||
|
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
|
||||||
|
!!$ end if
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
if (a%is_unit()) then
|
||||||
|
!
|
||||||
|
! CUSPARSE has the habit of storing the diagonal and then ignoring,
|
||||||
|
! whereas we do not store it. Hence this adapter code.
|
||||||
|
!
|
||||||
|
nzdi = nz + m
|
||||||
|
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
|
||||||
|
if (info == 0) then
|
||||||
|
if (a%is_unit()) then
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
!!! We are explicitly adding the diagonal
|
||||||
|
if ((info == 0) .and. a%is_triangle()) then
|
||||||
|
if ((info == 0).and.a%is_upper()) then
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
|
||||||
|
if (info == 0) then
|
||||||
|
irpdi(1) = 1
|
||||||
|
if (a%is_triangle().and.a%is_upper()) then
|
||||||
|
do i=1,m
|
||||||
|
j = irpdi(i)
|
||||||
|
jadi(j) = i
|
||||||
|
valdi(j) = cone
|
||||||
|
nrz = a%irp(i+1)-a%irp(i)
|
||||||
|
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||||
|
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||||
|
irpdi(i+1) = j + nrz + 1
|
||||||
|
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||||
|
end do
|
||||||
|
else
|
||||||
|
do i=1,m
|
||||||
|
j = irpdi(i)
|
||||||
|
nrz = a%irp(i+1)-a%irp(i)
|
||||||
|
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||||
|
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||||
|
jadi(j+nrz) = i
|
||||||
|
valdi(j+nrz) = cone
|
||||||
|
irpdi(i+1) = j + nrz + 1
|
||||||
|
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||||
|
end do
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
|
||||||
|
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
|
||||||
|
endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
call a%set_sync()
|
||||||
|
|
||||||
|
if (info /= 0) then
|
||||||
|
write(0,*) 'Error in CSRG_TO_GPU ',info
|
||||||
|
end if
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_csrg_to_gpu
|
@ -0,0 +1,117 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_vect_mv
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: x
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: y
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
|
||||||
|
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||||
|
logical :: tra
|
||||||
|
character :: trans_
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_csrg_vect_mv'
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
if (.not.x%is_host()) call x%sync()
|
||||||
|
if (beta /= czero) then
|
||||||
|
if (.not.y%is_host()) call y%sync()
|
||||||
|
end if
|
||||||
|
call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||||
|
call y%set_host()
|
||||||
|
else
|
||||||
|
if (a%is_host()) call a%sync()
|
||||||
|
select type (xx => x)
|
||||||
|
type is (psb_c_vect_cuda)
|
||||||
|
select type(yy => y)
|
||||||
|
type is (psb_c_vect_cuda)
|
||||||
|
if (xx%is_host()) call xx%sync()
|
||||||
|
if (beta /= czero) then
|
||||||
|
if (yy%is_host()) call yy%sync()
|
||||||
|
end if
|
||||||
|
info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||||
|
& beta,yy%deviceVect)
|
||||||
|
if (info /= 0) then
|
||||||
|
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||||
|
& a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||||
|
info = psb_err_from_subroutine_ai_
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
call yy%set_dev()
|
||||||
|
class default
|
||||||
|
rx = xx%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
class default
|
||||||
|
rx = x%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
end if
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
end subroutine psb_c_cuda_csrg_vect_mv
|
@ -0,0 +1,127 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use diagdev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_csmv
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_diag_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
|
||||||
|
complex(psb_spk_), intent(inout) :: y(:)
|
||||||
|
integer, intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
|
||||||
|
character :: trans_
|
||||||
|
integer :: i,j,k,m,n, nnz, ir, jc
|
||||||
|
complex(psb_spk_) :: acc
|
||||||
|
type(c_ptr) :: gpX, gpY
|
||||||
|
logical :: tra
|
||||||
|
Integer :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_diag_csmv'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
m = a%get_ncols()
|
||||||
|
n = a%get_nrows()
|
||||||
|
else
|
||||||
|
n = a%get_ncols()
|
||||||
|
m = a%get_nrows()
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(x,1)<n) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(y,1)<m) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||||
|
else
|
||||||
|
!
|
||||||
|
! Just to test, move X/Y to/from the GPU.
|
||||||
|
!
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_double)
|
||||||
|
if (alpha /= dzero) then
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpX,x)
|
||||||
|
end if
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_double)
|
||||||
|
if (beta /= dzero) then
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpY,y)
|
||||||
|
end if
|
||||||
|
if (info == 0) &
|
||||||
|
& info = spmvDiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = readMultiVecDevice(gpY,y)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call freeMultiVecDevice(gpX)
|
||||||
|
call freeMultiVecDevice(gpY)
|
||||||
|
endif
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_diag_csmv
|
@ -0,0 +1,65 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_diag_mold(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_mold
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_diag_sparse_mat), intent(in) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='diag_mold'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_get_erraction(err_act)
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
if (allocated(b)) then
|
||||||
|
call b%free()
|
||||||
|
deallocate(b,stat=info)
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(psb_c_cuda_diag_sparse_mat :: b, stat=info)
|
||||||
|
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
call psb_errpush(info, name)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_diag_mold
|
@ -0,0 +1,66 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use diagdev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu
|
||||||
|
use iso_c_binding
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
integer(psb_ipk_), intent(in), optional :: nzrm
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d
|
||||||
|
type(diagdev_parms) :: gpu_parms
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
|
||||||
|
if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return
|
||||||
|
|
||||||
|
n = size(a%data,1)
|
||||||
|
d = size(a%data,2)
|
||||||
|
c = a%get_ncols()
|
||||||
|
!allocsize = a%get_size()
|
||||||
|
!write(*,*) 'Create the DIAG matrix'
|
||||||
|
gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_float)
|
||||||
|
if (c_associated(a%deviceMat)) then
|
||||||
|
call freeDiagDevice(a%deviceMat)
|
||||||
|
endif
|
||||||
|
info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_float)
|
||||||
|
if (info == 0) info = &
|
||||||
|
& writeDiagDevice(a%deviceMat,a%data,a%offset,n)
|
||||||
|
! if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_diag_to_gpu
|
@ -0,0 +1,116 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use diagdev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_vect_mv
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_diag_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: x
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: y
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||||
|
logical :: tra
|
||||||
|
character :: trans_
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_diag_vect_mv'
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
if (tra) then
|
||||||
|
if (.not.x%is_host()) call x%sync()
|
||||||
|
if (beta /= szero) then
|
||||||
|
if (.not.y%is_host()) call y%sync()
|
||||||
|
end if
|
||||||
|
call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||||
|
call y%set_host()
|
||||||
|
else
|
||||||
|
if (a%is_host()) call a%sync()
|
||||||
|
select type (xx => x)
|
||||||
|
type is (psb_c_vect_cuda)
|
||||||
|
select type(yy => y)
|
||||||
|
type is (psb_c_vect_cuda)
|
||||||
|
if (xx%is_host()) call xx%sync()
|
||||||
|
if (beta /= dzero) then
|
||||||
|
if (yy%is_host()) call yy%sync()
|
||||||
|
end if
|
||||||
|
info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||||
|
& beta,yy%deviceVect)
|
||||||
|
if (info /= 0) then
|
||||||
|
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||||
|
& a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||||
|
info = psb_err_from_subroutine_ai_
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
call yy%set_dev()
|
||||||
|
class default
|
||||||
|
rx = xx%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%spmm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
class default
|
||||||
|
rx = x%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%spmm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
|
||||||
|
end if
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_diag_vect_mv
|
@ -0,0 +1,428 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans)
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
use dnsdev_mod
|
||||||
|
use psb_c_vectordev_mod
|
||||||
|
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_vect_mv
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: x
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: y
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
logical :: tra
|
||||||
|
character :: trans_
|
||||||
|
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||||
|
Integer(Psb_ipk_) :: err_act, m, n, k
|
||||||
|
character(len=20) :: name='c_cuda_dnsg_vect_mv'
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = psb_toupper(trans)
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
if (trans_ =='N') then
|
||||||
|
m = a%get_nrows()
|
||||||
|
n = 1
|
||||||
|
k = a%get_ncols()
|
||||||
|
else
|
||||||
|
m = a%get_ncols()
|
||||||
|
n = 1
|
||||||
|
k = a%get_nrows()
|
||||||
|
end if
|
||||||
|
select type (xx => x)
|
||||||
|
type is (psb_c_vect_cuda)
|
||||||
|
select type(yy => y)
|
||||||
|
type is (psb_c_vect_cuda)
|
||||||
|
if (a%is_host()) call a%sync()
|
||||||
|
if (xx%is_host()) call xx%sync()
|
||||||
|
if (beta /= czero) then
|
||||||
|
if (yy%is_host()) call yy%sync()
|
||||||
|
end if
|
||||||
|
info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,&
|
||||||
|
& xx%deviceVect,beta,yy%deviceVect)
|
||||||
|
if (info /= 0) then
|
||||||
|
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||||
|
& a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||||
|
info = psb_err_from_subroutine_ai_
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
call yy%set_dev()
|
||||||
|
class default
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
rx = xx%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%spmm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
class default
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
rx = x%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%spmm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
|
||||||
|
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_dnsg_vect_mv
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_dnsg_mold(a,b,info)
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
use dnsdev_mod
|
||||||
|
use psb_c_vectordev_mod
|
||||||
|
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_mold
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='dnsg_mold'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_get_erraction(err_act)
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
if (allocated(b)) then
|
||||||
|
call b%free()
|
||||||
|
deallocate(b,stat=info)
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(psb_c_cuda_dnsg_sparse_mat :: b, stat=info)
|
||||||
|
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
call psb_errpush(info, name)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_dnsg_mold
|
||||||
|
|
||||||
|
|
||||||
|
!!$
|
||||||
|
!!$ interface
|
||||||
|
!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans)
|
||||||
|
!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type
|
||||||
|
!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a
|
||||||
|
!!$ complex(psb_spk_), intent(in) :: alpha, beta
|
||||||
|
!!$ class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||||
|
!!$ integer(psb_ipk_), intent(out) :: info
|
||||||
|
!!$ character, optional, intent(in) :: trans
|
||||||
|
!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv
|
||||||
|
!!$ end interface
|
||||||
|
|
||||||
|
!!$ interface
|
||||||
|
!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a)
|
||||||
|
!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_
|
||||||
|
!!$ integer(psb_ipk_), intent(in) :: nz
|
||||||
|
!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||||
|
!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz
|
||||||
|
!!$ end interface
|
||||||
|
!!$
|
||||||
|
!!$ interface
|
||||||
|
!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz)
|
||||||
|
!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_
|
||||||
|
!!$ integer(psb_ipk_), intent(in) :: m,n
|
||||||
|
!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||||
|
!!$ integer(psb_ipk_), intent(in), optional :: nz
|
||||||
|
!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz
|
||||||
|
!!$ end interface
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_dnsg_to_gpu(a,info)
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
use dnsdev_mod
|
||||||
|
use psb_c_vectordev_mod
|
||||||
|
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_to_gpu
|
||||||
|
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
Integer(Psb_ipk_) :: err_act, pitch, lda
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
character(len=20) :: name='c_cuda_dnsg_to_gpu'
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2)
|
||||||
|
info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),&
|
||||||
|
& spgpu_type_complex_float,1)
|
||||||
|
if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2))
|
||||||
|
if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info
|
||||||
|
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_dnsg_to_gpu
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info)
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
use dnsdev_mod
|
||||||
|
use psb_c_vectordev_mod
|
||||||
|
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_coo
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_dnsg_cp_from_coo'
|
||||||
|
integer(psb_ipk_) :: debug_level, debug_unit
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
type(psb_c_coo_sparse_mat) :: tmp
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
if (b%is_dev()) call b%sync()
|
||||||
|
|
||||||
|
call a%psb_c_dns_sparse_mat%cp_from_coo(b,info)
|
||||||
|
if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info
|
||||||
|
if (info == 0) call a%to_gpu(info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_dnsg_from_coo
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info)
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
use dnsdev_mod
|
||||||
|
use psb_c_vectordev_mod
|
||||||
|
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_fmt
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
type(psb_c_coo_sparse_mat) :: tmp
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_dnsg_cp_from_fmt'
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
if (b%is_dev()) call b%sync()
|
||||||
|
|
||||||
|
select type (b)
|
||||||
|
type is (psb_c_coo_sparse_mat)
|
||||||
|
call a%cp_from_coo(b,info)
|
||||||
|
|
||||||
|
!!$ class is (psb_c_ell_sparse_mat)
|
||||||
|
!!$ nzm = psb_size(b%ja,2)
|
||||||
|
!!$ m = b%get_nrows()
|
||||||
|
!!$ nc = b%get_ncols()
|
||||||
|
!!$ nza = b%get_nzeros()
|
||||||
|
!!$#ifdef HAVE_SPGPU
|
||||||
|
!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1)
|
||||||
|
!!$ ld = gpu_parms%pitch
|
||||||
|
!!$ nzm = gpu_parms%maxRowSize
|
||||||
|
!!$#else
|
||||||
|
!!$ ld = m
|
||||||
|
!!$#endif
|
||||||
|
!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
||||||
|
!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info)
|
||||||
|
!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info)
|
||||||
|
!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info)
|
||||||
|
!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info)
|
||||||
|
!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info)
|
||||||
|
!!$ if (info == 0) then
|
||||||
|
!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm)
|
||||||
|
!!$ end if
|
||||||
|
!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info)
|
||||||
|
!!$ if (info == 0) then
|
||||||
|
!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm)
|
||||||
|
!!$ end if
|
||||||
|
!!$ a%nzt = nza
|
||||||
|
!!$#ifdef HAVE_SPGPU
|
||||||
|
!!$ call a%to_gpu(info)
|
||||||
|
!!$#endif
|
||||||
|
|
||||||
|
class default
|
||||||
|
|
||||||
|
call b%cp_to_coo(tmp,info)
|
||||||
|
if (info == psb_success_) call a%mv_from_coo(tmp,info)
|
||||||
|
end select
|
||||||
|
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_cp_dnsg_from_fmt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info)
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
use dnsdev_mod
|
||||||
|
use psb_c_vectordev_mod
|
||||||
|
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_coo
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_coo_sparse_mat), intent(inout) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
character(len=20) :: name='c_cuda_dnsg_mv_from_coo'
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
if (.not.b%is_by_rows()) call b%fix(info)
|
||||||
|
if (info /= psb_success_) return
|
||||||
|
if (b%is_dev()) call b%sync()
|
||||||
|
call a%cp_from_coo(b,info)
|
||||||
|
if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info
|
||||||
|
call b%free()
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_mv_dnsg_from_coo
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info)
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
use dnsdev_mod
|
||||||
|
use psb_c_vectordev_mod
|
||||||
|
use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_fmt
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(inout) :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
|
||||||
|
type(psb_c_coo_sparse_mat) :: tmp
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_dnsg_cp_from_fmt'
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
if (b%is_dev()) call b%sync()
|
||||||
|
|
||||||
|
select type (b)
|
||||||
|
type is (psb_c_coo_sparse_mat)
|
||||||
|
call a%mv_from_coo(b,info)
|
||||||
|
|
||||||
|
!!$ class is (psb_c_ell_sparse_mat)
|
||||||
|
!!$ nzm = psb_size(b%ja,2)
|
||||||
|
!!$ m = b%get_nrows()
|
||||||
|
!!$ nc = b%get_ncols()
|
||||||
|
!!$ nza = b%get_nzeros()
|
||||||
|
!!$#ifdef HAVE_SPGPU
|
||||||
|
!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1)
|
||||||
|
!!$ ld = gpu_parms%pitch
|
||||||
|
!!$ nzm = gpu_parms%maxRowSize
|
||||||
|
!!$#else
|
||||||
|
!!$ ld = m
|
||||||
|
!!$#endif
|
||||||
|
!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
||||||
|
!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info)
|
||||||
|
!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info)
|
||||||
|
!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info)
|
||||||
|
!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info)
|
||||||
|
!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info)
|
||||||
|
!!$ if (info == 0) then
|
||||||
|
!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm)
|
||||||
|
!!$ end if
|
||||||
|
!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info)
|
||||||
|
!!$ if (info == 0) then
|
||||||
|
!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm)
|
||||||
|
!!$ end if
|
||||||
|
!!$ a%nzt = nza
|
||||||
|
!!$#ifdef HAVE_SPGPU
|
||||||
|
!!$ call a%to_gpu(info)
|
||||||
|
!!$#endif
|
||||||
|
|
||||||
|
class default
|
||||||
|
|
||||||
|
call b%mv_to_coo(tmp,info)
|
||||||
|
if (info == psb_success_) call a%mv_from_coo(tmp,info)
|
||||||
|
end select
|
||||||
|
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_mv_dnsg_from_fmt
|
@ -0,0 +1,99 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_allocate_mnnz
|
||||||
|
implicit none
|
||||||
|
integer(psb_ipk_), intent(in) :: m,n
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_), intent(in), optional :: nz
|
||||||
|
Integer(Psb_ipk_) :: err_act, info, nz_,ld
|
||||||
|
character(len=20) :: name='allocate_mnz'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
type(elldev_parms) :: gpu_parms
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
if (m < 0) then
|
||||||
|
info = psb_err_iarg_neg_
|
||||||
|
call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
if (n < 0) then
|
||||||
|
info = psb_err_iarg_neg_
|
||||||
|
call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
if (present(nz)) then
|
||||||
|
nz_ = (max(nz,ione) + m -1 )/m
|
||||||
|
else
|
||||||
|
nz_ = (max(7*m,7*n,ione)+m-1)/m
|
||||||
|
end if
|
||||||
|
if (nz_ < 0) then
|
||||||
|
info = psb_err_iarg_neg_
|
||||||
|
call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_float,1)
|
||||||
|
ld = gpu_parms%pitch
|
||||||
|
nz_ = gpu_parms%maxRowSize
|
||||||
|
|
||||||
|
if (info == psb_success_) call psb_realloc(m,a%irn,info)
|
||||||
|
if (info == psb_success_) call psb_realloc(m,a%idiag,info)
|
||||||
|
if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info)
|
||||||
|
if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info)
|
||||||
|
if (info == psb_success_) then
|
||||||
|
a%irn = 0
|
||||||
|
a%idiag = 0
|
||||||
|
a%nzt = 0
|
||||||
|
call a%set_nrows(m)
|
||||||
|
call a%set_ncols(n)
|
||||||
|
call a%set_bld()
|
||||||
|
call a%set_triangle(.false.)
|
||||||
|
call a%set_unit(.false.)
|
||||||
|
call a%set_dupl(psb_dupl_def_)
|
||||||
|
end if
|
||||||
|
|
||||||
|
call a%to_gpu(info,nzrm=nz_)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_allocate_mnnz
|
@ -0,0 +1,64 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_asb(a)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_asb
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: err_act, info
|
||||||
|
character(len=20) :: name='elg_asb'
|
||||||
|
logical :: clear_
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
real(psb_dpk_), allocatable :: valt(:,:)
|
||||||
|
integer(psb_ipk_), allocatable :: jat(:,:)
|
||||||
|
integer(psb_ipk_) :: nr, nc
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
! Only call sync() if we are on host
|
||||||
|
if (a%is_host()) then
|
||||||
|
call a%sync()
|
||||||
|
end if
|
||||||
|
call a%set_asb()
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_asb
|
@ -0,0 +1,124 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmm
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
|
||||||
|
complex(psb_spk_), intent(inout) :: y(:,:)
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
|
||||||
|
character :: trans_
|
||||||
|
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
|
||||||
|
complex(psb_spk_), allocatable :: acc(:)
|
||||||
|
type(c_ptr) :: gpX, gpY
|
||||||
|
logical :: tra
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_elg_csmm'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
m = a%get_ncols()
|
||||||
|
n = a%get_nrows()
|
||||||
|
else
|
||||||
|
n = a%get_ncols()
|
||||||
|
m = a%get_nrows()
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(x,1)<n) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(y,1)<m) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||||
|
else
|
||||||
|
!
|
||||||
|
! Just to test, move X/Y to/from the GPU.
|
||||||
|
!
|
||||||
|
nxy = min(size(x,2),size(y,2))
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_complex_float)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpX,x,nxy)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_complex_float)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpY,y,nxy)
|
||||||
|
|
||||||
|
if (info == 0) &
|
||||||
|
& info = spmvEllDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = readMultiVecDevice(gpY,y,nxy)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call freeMultiVecDevice(gpX)
|
||||||
|
call freeMultiVecDevice(gpY)
|
||||||
|
endif
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_csmm
|
@ -0,0 +1,127 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmv
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
|
||||||
|
complex(psb_spk_), intent(inout) :: y(:)
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
|
||||||
|
character :: trans_
|
||||||
|
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc
|
||||||
|
complex(psb_spk_) :: acc
|
||||||
|
type(c_ptr) :: gpX, gpY
|
||||||
|
logical :: tra
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='d_elg_csmv'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
m = a%get_ncols()
|
||||||
|
n = a%get_nrows()
|
||||||
|
else
|
||||||
|
n = a%get_ncols()
|
||||||
|
m = a%get_nrows()
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(x,1)<n) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(y,1)<m) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||||
|
else
|
||||||
|
!
|
||||||
|
! Just to test, move X/Y to/from the GPU.
|
||||||
|
!
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_complex_float)
|
||||||
|
if (alpha /= dzero) then
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpX,x)
|
||||||
|
end if
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_complex_float)
|
||||||
|
if (beta /= dzero) then
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpY,y)
|
||||||
|
end if
|
||||||
|
if (info == 0) &
|
||||||
|
& info = spmvEllDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = readMultiVecDevice(gpY,y)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call freeMultiVecDevice(gpX)
|
||||||
|
call freeMultiVecDevice(gpY)
|
||||||
|
endif
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_csmv
|
@ -0,0 +1,239 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use iso_c_binding
|
||||||
|
use elldev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_a
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: val(:)
|
||||||
|
integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_elg_csput_a'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit
|
||||||
|
real(psb_dpk_) :: t1,t2,t3
|
||||||
|
type(c_ptr) :: devIdxUpd
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
debug_unit = psb_get_debug_unit()
|
||||||
|
debug_level = psb_get_debug_level()
|
||||||
|
|
||||||
|
!!$ write(0,*) 'In ELG_csput_a'
|
||||||
|
if (nz <= 0) then
|
||||||
|
info = psb_err_iarg_neg_
|
||||||
|
int_err(1)=1
|
||||||
|
call psb_errpush(info,name,i_err=int_err)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
if (size(ia) < nz) then
|
||||||
|
info = psb_err_input_asize_invalid_i_
|
||||||
|
int_err(1)=2
|
||||||
|
call psb_errpush(info,name,i_err=int_err)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(ja) < nz) then
|
||||||
|
info = psb_err_input_asize_invalid_i_
|
||||||
|
int_err(1)=3
|
||||||
|
call psb_errpush(info,name,i_err=int_err)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
if (size(val) < nz) then
|
||||||
|
info = psb_err_input_asize_invalid_i_
|
||||||
|
int_err(1)=4
|
||||||
|
call psb_errpush(info,name,i_err=int_err)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (nz == 0) return
|
||||||
|
|
||||||
|
|
||||||
|
if (a%is_bld()) then
|
||||||
|
! Build phase should only ever be in COO
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
|
||||||
|
else if (a%is_upd()) then
|
||||||
|
!!$ write(*,*) 'elg_csput_a '
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,&
|
||||||
|
& imin,imax,jmin,jmax,info)
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
call a%set_host()
|
||||||
|
else
|
||||||
|
! State is wrong.
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
end if
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_csput_a
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use iso_c_binding
|
||||||
|
use elldev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_v
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: val
|
||||||
|
class(psb_i_base_vect_type), intent(inout) :: ia, ja
|
||||||
|
integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_elg_csput_v'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw
|
||||||
|
logical :: gpu_invoked
|
||||||
|
real(psb_dpk_) :: t1,t2,t3
|
||||||
|
type(c_ptr) :: devIdxUpd
|
||||||
|
integer(psb_ipk_), allocatable :: idxs(:)
|
||||||
|
logical, parameter :: debug_idxs=.false., debug_vals=.false.
|
||||||
|
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
debug_unit = psb_get_debug_unit()
|
||||||
|
debug_level = psb_get_debug_level()
|
||||||
|
|
||||||
|
! write(0,*) 'In ELG_csput_v'
|
||||||
|
if (nz <= 0) then
|
||||||
|
info = psb_err_iarg_neg_
|
||||||
|
int_err(1)=1
|
||||||
|
call psb_errpush(info,name,i_err=int_err)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
if (ia%get_nrows() < nz) then
|
||||||
|
info = psb_err_input_asize_invalid_i_
|
||||||
|
int_err(1)=2
|
||||||
|
call psb_errpush(info,name,i_err=int_err)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (ja%get_nrows() < nz) then
|
||||||
|
info = psb_err_input_asize_invalid_i_
|
||||||
|
int_err(1)=3
|
||||||
|
call psb_errpush(info,name,i_err=int_err)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
if (val%get_nrows() < nz) then
|
||||||
|
info = psb_err_input_asize_invalid_i_
|
||||||
|
int_err(1)=4
|
||||||
|
call psb_errpush(info,name,i_err=int_err)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (nz == 0) return
|
||||||
|
|
||||||
|
|
||||||
|
if (a%is_bld()) then
|
||||||
|
! Build phase should only ever be in COO
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
|
||||||
|
else if (a%is_upd()) then
|
||||||
|
|
||||||
|
t1=psb_wtime()
|
||||||
|
gpu_invoked = .false.
|
||||||
|
select type (ia)
|
||||||
|
class is (psb_i_vect_cuda)
|
||||||
|
select type (ja)
|
||||||
|
class is (psb_i_vect_cuda)
|
||||||
|
select type (val)
|
||||||
|
class is (psb_c_vect_cuda)
|
||||||
|
if (a%is_host()) call a%sync()
|
||||||
|
if (val%is_host()) call val%sync()
|
||||||
|
if (ia%is_host()) call ia%sync()
|
||||||
|
if (ja%is_host()) call ja%sync()
|
||||||
|
info = csputEllDeviceFloatComplex(a%deviceMat,nz,&
|
||||||
|
& ia%deviceVect,ja%deviceVect,val%deviceVect)
|
||||||
|
call a%set_dev()
|
||||||
|
gpu_invoked=.true.
|
||||||
|
end select
|
||||||
|
end select
|
||||||
|
end select
|
||||||
|
if (.not.gpu_invoked) then
|
||||||
|
!!$ write(0,*)'Not gpu_invoked '
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,&
|
||||||
|
& imin,imax,jmin,jmax,info)
|
||||||
|
call a%set_host()
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (info /= 0) then
|
||||||
|
info = psb_err_internal_error_
|
||||||
|
end if
|
||||||
|
|
||||||
|
|
||||||
|
else
|
||||||
|
! State is wrong.
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
end if
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_csput_v
|
@ -0,0 +1,67 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_from_gpu(a,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_from_gpu
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
|
||||||
|
if (.not.(c_associated(a%deviceMat))) then
|
||||||
|
call a%free()
|
||||||
|
return
|
||||||
|
end if
|
||||||
|
|
||||||
|
m = a%get_nrows()
|
||||||
|
nzm = psb_size(a%val,2)
|
||||||
|
n = a%get_ncols()
|
||||||
|
|
||||||
|
pitch = getEllDevicePitch(a%deviceMat)
|
||||||
|
maxrowsize = getEllDeviceMaxRowSize(a%deviceMat)
|
||||||
|
|
||||||
|
if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then
|
||||||
|
call psb_realloc(pitch,maxrowsize,a%val,info)
|
||||||
|
if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info)
|
||||||
|
if (info == 0) call psb_realloc(pitch,a%irn,info)
|
||||||
|
end if
|
||||||
|
if (info == 0) info = &
|
||||||
|
& readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag)
|
||||||
|
call a%set_sync()
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_from_gpu
|
@ -0,0 +1,84 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_inner_vect_sv
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_elg_inner_vect_sv'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||||
|
|
||||||
|
call psb_get_erraction(err_act)
|
||||||
|
! This is the base version. If we get here
|
||||||
|
! it means the derived class is incomplete,
|
||||||
|
! so we throw an error.
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
if (.false.) then
|
||||||
|
rx = x%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%inner_spsm(alpha,rx,beta,ry,info,trans)
|
||||||
|
call y%bld(ry)
|
||||||
|
else
|
||||||
|
call x%sync()
|
||||||
|
call y%sync()
|
||||||
|
call a%psb_c_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans)
|
||||||
|
call y%set_host()
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
info = psb_err_from_subroutine_
|
||||||
|
call psb_errpush(info,name, a_err='inner_cssm')
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_inner_vect_sv
|
@ -0,0 +1,63 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
subroutine psb_c_cuda_elg_mold(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_mold
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='elg_mold'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_get_erraction(err_act)
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
if (allocated(b)) then
|
||||||
|
call b%free()
|
||||||
|
deallocate(b,stat=info)
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(psb_c_cuda_elg_sparse_mat :: b, stat=info)
|
||||||
|
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
call psb_errpush(info, name)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_mold
|
@ -0,0 +1,72 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_reallocate_nz(nz,a)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_reallocate_nz
|
||||||
|
implicit none
|
||||||
|
integer(psb_ipk_), intent(in) :: nz
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_) :: m, nzrm,ld
|
||||||
|
Integer(Psb_ipk_) :: err_act, info
|
||||||
|
character(len=20) :: name='c_cuda_elg_reallocate_nz'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
|
||||||
|
!
|
||||||
|
! What should this really do???
|
||||||
|
!
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
m = a%get_nrows()
|
||||||
|
nzrm = (max(nz,ione)+m-1)/m
|
||||||
|
ld = size(a%ja,1)
|
||||||
|
call psb_realloc(ld,nzrm,a%ja,info)
|
||||||
|
if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info)
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
call psb_errpush(psb_err_alloc_dealloc_,name)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
call a%to_gpu(info,nzrm=nzrm)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_reallocate_nz
|
@ -0,0 +1,71 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_scal(d,a,info,side)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scal
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: d(:)
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, intent(in), optional :: side
|
||||||
|
|
||||||
|
|
||||||
|
Integer(Psb_ipk_) :: err_act,mnm, i, j, m
|
||||||
|
character(len=20) :: name='scal'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
|
||||||
|
if (a%is_unit()) then
|
||||||
|
call a%make_nonunit()
|
||||||
|
end if
|
||||||
|
|
||||||
|
call a%psb_c_ell_sparse_mat%scal(d,info,side)
|
||||||
|
if (info /= psb_success_) goto 9999
|
||||||
|
|
||||||
|
call a%to_gpu(info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_scal
|
@ -0,0 +1,66 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_scals(d,a,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scals
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: d
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='scal'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
info = psb_success_
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
if (a%is_unit()) then
|
||||||
|
call a%make_nonunit()
|
||||||
|
end if
|
||||||
|
|
||||||
|
a%val(:,:) = a%val(:,:) * d
|
||||||
|
|
||||||
|
call a%to_gpu(info)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_scals
|
@ -0,0 +1,84 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
integer(psb_ipk_), intent(in), optional :: nzrm
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt
|
||||||
|
type(elldev_parms) :: gpu_parms
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
|
||||||
|
if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return
|
||||||
|
|
||||||
|
m = a%get_nrows()
|
||||||
|
nzm = psb_size(a%val,2)
|
||||||
|
n = a%get_ncols()
|
||||||
|
nzt = a%get_nzeros()
|
||||||
|
if (present(nzrm)) nzm = max(nzm,nzrm)
|
||||||
|
|
||||||
|
gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_float,1)
|
||||||
|
|
||||||
|
if (c_associated(a%deviceMat)) then
|
||||||
|
pitch = getEllDevicePitch(a%deviceMat)
|
||||||
|
maxrowsize = getEllDeviceMaxRowSize(a%deviceMat)
|
||||||
|
else
|
||||||
|
pitch = -1
|
||||||
|
maxrowsize = -1
|
||||||
|
end if
|
||||||
|
|
||||||
|
if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then
|
||||||
|
if (c_associated(a%deviceMat)) then
|
||||||
|
call freeEllDevice(a%deviceMat)
|
||||||
|
endif
|
||||||
|
info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_float,1)
|
||||||
|
pitch = getEllDevicePitch(a%deviceMat)
|
||||||
|
maxrowsize = getEllDeviceMaxRowSize(a%deviceMat)
|
||||||
|
end if
|
||||||
|
if (info == 0) then
|
||||||
|
if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then
|
||||||
|
call psb_realloc(pitch,maxrowsize,a%val,info)
|
||||||
|
if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if (info == 0) info = &
|
||||||
|
& writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag)
|
||||||
|
call a%set_sync()
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_to_gpu
|
@ -0,0 +1,61 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_trim(a)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_trim
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
|
||||||
|
Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld
|
||||||
|
character(len=20) :: name='trim'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
m = max(1_psb_ipk_,a%get_nrows())
|
||||||
|
ld = max(1_psb_ipk_,size(a%ja,1))
|
||||||
|
nzm = max(1_psb_ipk_,maxval(a%irn(1:m)))
|
||||||
|
|
||||||
|
call psb_realloc(m,a%irn,info)
|
||||||
|
if (info == psb_success_) call psb_realloc(m,a%idiag,info)
|
||||||
|
if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info)
|
||||||
|
if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info)
|
||||||
|
|
||||||
|
if (info /= psb_success_) goto 9999
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_trim
|
@ -0,0 +1,121 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_vect_mv
|
||||||
|
use psb_c_cuda_vect_mod
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_elg_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: x
|
||||||
|
class(psb_c_base_vect_type), intent(inout) :: y
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||||
|
logical :: tra
|
||||||
|
character :: trans_
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_elg_vect_mv'
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
if (tra) then
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
if (.not.x%is_host()) call x%sync()
|
||||||
|
if (beta /= czero) then
|
||||||
|
if (.not.y%is_host()) call y%sync()
|
||||||
|
end if
|
||||||
|
call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||||
|
call y%set_host()
|
||||||
|
else
|
||||||
|
if (a%is_host()) call a%sync()
|
||||||
|
select type (xx => x)
|
||||||
|
type is (psb_c_vect_cuda)
|
||||||
|
select type(yy => y)
|
||||||
|
type is (psb_c_vect_cuda)
|
||||||
|
if (a%is_host()) call a%sync()
|
||||||
|
if (xx%is_host()) call xx%sync()
|
||||||
|
if (beta /= czero) then
|
||||||
|
if (yy%is_host()) call yy%sync()
|
||||||
|
end if
|
||||||
|
info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||||
|
& beta,yy%deviceVect)
|
||||||
|
if (info /= 0) then
|
||||||
|
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||||
|
& a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||||
|
info = psb_err_from_subroutine_ai_
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
call yy%set_dev()
|
||||||
|
class default
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
rx = xx%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%spmm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
class default
|
||||||
|
if (a%is_dev()) call a%sync()
|
||||||
|
rx = x%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%spmm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
|
||||||
|
end if
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_elg_vect_mv
|
@ -0,0 +1,126 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use hdiagdev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmv
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
|
||||||
|
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
|
||||||
|
complex(psb_spk_), intent(inout) :: y(:)
|
||||||
|
integer, intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
|
||||||
|
character :: trans_
|
||||||
|
integer :: i,j,k,m,n, nnz, ir, jc
|
||||||
|
complex(psb_spk_) :: acc
|
||||||
|
type(c_ptr) :: gpX, gpY
|
||||||
|
logical :: tra
|
||||||
|
Integer :: err_act
|
||||||
|
character(len=20) :: name='c_cuda_hdiag_csmv'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
m = a%get_ncols()
|
||||||
|
n = a%get_nrows()
|
||||||
|
else
|
||||||
|
n = a%get_ncols()
|
||||||
|
m = a%get_nrows()
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(x,1)<n) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (size(y,1)<m) then
|
||||||
|
info = 36
|
||||||
|
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||||
|
else
|
||||||
|
!
|
||||||
|
! Just to test, move X/Y to/from the GPU.
|
||||||
|
!
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpX,1,size(x,1),spgpu_type_double)
|
||||||
|
if (alpha /= dzero) then
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpX,x)
|
||||||
|
end if
|
||||||
|
if (info == 0) &
|
||||||
|
& info = FallocMultiVecDevice(gpY,1,size(y,1),spgpu_type_double)
|
||||||
|
if (beta /= dzero) then
|
||||||
|
if (info == 0) &
|
||||||
|
& info = writeMultiVecDevice(gpY,y)
|
||||||
|
end if
|
||||||
|
if (info == 0) &
|
||||||
|
& info = spmvHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||||
|
if (info == 0) &
|
||||||
|
& info = readMultiVecDevice(gpY,y)
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call freeMultiVecDevice(gpX)
|
||||||
|
call freeMultiVecDevice(gpY)
|
||||||
|
endif
|
||||||
|
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_hdiag_csmv
|
@ -0,0 +1,64 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine psb_c_cuda_hdiag_mold(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_mold
|
||||||
|
implicit none
|
||||||
|
class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
|
||||||
|
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
integer(psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='hdiag_mold'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_get_erraction(err_act)
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
if (allocated(b)) then
|
||||||
|
call b%free()
|
||||||
|
deallocate(b,stat=info)
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(psb_c_cuda_hdiag_sparse_mat :: b, stat=info)
|
||||||
|
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
call psb_errpush(info, name)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_c_cuda_hdiag_mold
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue