You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
616 lines
24 KiB
C
616 lines
24 KiB
C
1 year ago
|
/* Parallel Sparse BLAS GPU plugin */
|
||
|
/* (C) Copyright 2013 */
|
||
|
|
||
|
/* Salvatore Filippone */
|
||
|
/* Alessandro Fanfarillo */
|
||
|
|
||
|
/* Redistribution and use in source and binary forms, with or without */
|
||
|
/* modification, are permitted provided that the following conditions */
|
||
|
/* are met: */
|
||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||
|
/* notice, this list of conditions and the following disclaimer. */
|
||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||
|
/* documentation and/or other materials provided with the distribution. */
|
||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||
|
/* not be used to endorse or promote products derived from this */
|
||
|
/* software without specific written permission. */
|
||
|
|
||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||
|
|
||
|
|
||
|
#include "hlldev.h"
|
||
|
#if defined(HAVE_SPGPU)
|
||
|
//new
|
||
|
HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros,
|
||
|
unsigned int allocsize, unsigned int elementType, unsigned int firstIndex)
|
||
|
{
|
||
|
HllDeviceParams params;
|
||
|
|
||
|
params.elementType = elementType;
|
||
|
params.hackSize = hksize;
|
||
|
//numero di elementi di val
|
||
|
params.allocsize = allocsize;
|
||
|
params.rows = rows;
|
||
|
params.nzt = nzeros;
|
||
|
params.avgNzr = (nzeros+rows-1)/rows;
|
||
|
params.firstIndex = firstIndex;
|
||
|
return params;
|
||
|
|
||
|
}
|
||
|
|
||
|
int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros,
|
||
|
int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr)
|
||
|
{
|
||
|
|
||
|
|
||
|
if (mat!=NULL) {
|
||
|
*hackOffsLength = mat->hackOffsLength ;
|
||
|
*hksize = mat->hackSize ;
|
||
|
*nzeros = mat->nzt ;
|
||
|
*allocsize = mat->allocsize ;
|
||
|
*rows = mat->rows ;
|
||
|
*avgnzr = mat->avgNzr ;
|
||
|
*firstIndex = mat->baseIndex ;
|
||
|
return SPGPU_SUCCESS;
|
||
|
} else {
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
}
|
||
|
}
|
||
|
//new
|
||
|
int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params)
|
||
|
{
|
||
|
HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice));
|
||
|
int ret=SPGPU_SUCCESS;
|
||
|
*remoteMatrix = (void *)tmp;
|
||
|
|
||
|
tmp->hackSize = params->hackSize;
|
||
|
|
||
|
tmp->allocsize = params->allocsize;
|
||
|
|
||
|
tmp->rows = params->rows;
|
||
|
tmp->avgNzr = params->avgNzr;
|
||
|
tmp->nzt = params->nzt;
|
||
|
tmp->baseIndex = params->firstIndex;
|
||
|
//fprintf(stderr,"Allocating HLG with %d avgNzr\n",params->avgNzr);
|
||
|
tmp->hackOffsLength = (int)(tmp->rows+tmp->hackSize-1)/tmp->hackSize;
|
||
|
|
||
|
//printf("hackOffsLength %d\n",tmp->hackOffsLength);
|
||
|
|
||
|
if (ret == SPGPU_SUCCESS)
|
||
|
ret=allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int));
|
||
|
|
||
|
if (ret == SPGPU_SUCCESS)
|
||
|
ret=allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int));
|
||
|
|
||
|
if (ret == SPGPU_SUCCESS)
|
||
|
ret=allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int));
|
||
|
|
||
|
if (ret == SPGPU_SUCCESS)
|
||
|
ret=allocRemoteBuffer((void **)&(tmp->hackOffs), ((tmp->hackOffsLength+1)*sizeof(int)));
|
||
|
|
||
|
if (params->elementType == SPGPU_TYPE_INT)
|
||
|
{
|
||
|
if (ret == SPGPU_SUCCESS)
|
||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(int));
|
||
|
}
|
||
|
else if (params->elementType == SPGPU_TYPE_FLOAT)
|
||
|
{
|
||
|
if (ret == SPGPU_SUCCESS)
|
||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
|
||
|
}
|
||
|
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
||
|
{
|
||
|
if (ret == SPGPU_SUCCESS)
|
||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
|
||
|
}
|
||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
||
|
{
|
||
|
if (ret == SPGPU_SUCCESS)
|
||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
|
||
|
}
|
||
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
||
|
{
|
||
|
if (ret == SPGPU_SUCCESS)
|
||
|
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
|
||
|
}
|
||
|
else
|
||
|
return SPGPU_UNSUPPORTED; // Unsupported params
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
void freeHllDevice(void* remoteMatrix)
|
||
|
{
|
||
|
HllDevice *devMat = (HllDevice *) remoteMatrix;
|
||
|
//fprintf(stderr,"freeHllDevice\n");
|
||
|
if (devMat != NULL) {
|
||
|
freeRemoteBuffer(devMat->rS);
|
||
|
freeRemoteBuffer(devMat->diag);
|
||
|
freeRemoteBuffer(devMat->rP);
|
||
|
freeRemoteBuffer(devMat->cM);
|
||
|
free(remoteMatrix);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//new
|
||
|
int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros,
|
||
|
unsigned int allocsize,
|
||
|
unsigned int elementType, unsigned int firstIndex)
|
||
|
{ int i;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
HllDeviceParams p;
|
||
|
|
||
|
p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex);
|
||
|
i = allocHllDevice(deviceMat, &p);
|
||
|
if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
|
||
|
}
|
||
|
return(i);
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
||
|
float beta, void* deviceY)
|
||
|
{
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||
|
spgpuHandle_t handle=psb_gpuGetHandle();
|
||
|
|
||
|
#ifdef HAVE_SPGPU
|
||
|
#ifdef VERBOSE
|
||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||
|
#endif
|
||
|
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
|
||
|
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
|
||
|
devMat->baseIndex);*/
|
||
|
|
||
|
spgpuShellspmv (handle, (float *)y->v_, (float *)y->v_, alpha, (float *)devMat->cM,
|
||
|
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||
|
devMat->avgNzr, devMat->rows, (float *)x->v_, beta, devMat->baseIndex);
|
||
|
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
//new
|
||
|
int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
||
|
double beta, void* deviceY)
|
||
|
{
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||
|
spgpuHandle_t handle=psb_gpuGetHandle();
|
||
|
|
||
|
#ifdef HAVE_SPGPU
|
||
|
#ifdef VERBOSE
|
||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||
|
#endif
|
||
|
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
|
||
|
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
|
||
|
devMat->baseIndex);*/
|
||
|
|
||
|
spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM,
|
||
|
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||
|
devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex);
|
||
|
//cudaSync();
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
|
||
|
float complex beta, void* deviceY)
|
||
|
{
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||
|
spgpuHandle_t handle=psb_gpuGetHandle();
|
||
|
|
||
|
#ifdef HAVE_SPGPU
|
||
|
cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
|
||
|
cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta));
|
||
|
#ifdef VERBOSE
|
||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||
|
#endif
|
||
|
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM,
|
||
|
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
|
||
|
devMat->baseIndex);*/
|
||
|
|
||
|
spgpuChellspmv (handle, (cuFloatComplex *)y->v_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM,
|
||
|
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||
|
devMat->avgNzr, devMat->rows, (cuFloatComplex *)x->v_, b, devMat->baseIndex);
|
||
|
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
|
||
|
double complex beta, void* deviceY)
|
||
|
{
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||
|
spgpuHandle_t handle=psb_gpuGetHandle();
|
||
|
|
||
|
#ifdef HAVE_SPGPU
|
||
|
cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
|
||
|
cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta));
|
||
|
#ifdef VERBOSE
|
||
|
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
|
||
|
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
|
||
|
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
|
||
|
#endif
|
||
|
|
||
|
spgpuZhellspmv (handle, (cuDoubleComplex *)y->v_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM,
|
||
|
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
|
||
|
devMat->avgNzr,devMat->rows, (cuDoubleComplex *)x->v_, b, devMat->baseIndex);
|
||
|
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||
|
{ int i;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
// Ex updateFromHost function
|
||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
//i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||
|
/*if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
|
||
|
}*/
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||
|
{ int i;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
// Ex updateFromHost function
|
||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||
|
if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||
|
}*/
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||
|
{ int i;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
// Ex updateFromHost function
|
||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
||
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||
|
if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||
|
}*/
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||
|
{ int i;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
// Ex updateFromHost function
|
||
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
||
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||
|
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
||
|
if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
||
|
}*/
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||
|
{ int i;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||
|
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
/*i = readEllDevice(deviceMat, (void *) val, ja, irn);
|
||
|
if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
|
||
|
}*/
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||
|
{ int i;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||
|
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
/*if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||
|
}*/
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||
|
{ int i;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||
|
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||
|
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
/*if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||
|
}*/
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
|
||
|
{ int i;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
||
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
||
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
||
|
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
||
|
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
/*if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
||
|
}*/
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
// New copy routines.
|
||
|
|
||
|
int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||
|
int *irn, int *hoffs, int *idisp, int *ja,
|
||
|
float *val, void *deviceMat)
|
||
|
{ int i,j;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
spgpuHandle_t handle;
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
float *devVal;
|
||
|
int *devIdisp, *devJa;
|
||
|
int *tja;
|
||
|
//fprintf(stderr,"devMat: %p\n",devMat);
|
||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float));
|
||
|
|
||
|
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||
|
//cudaSync();
|
||
|
|
||
|
handle = psb_gpuGetHandle();
|
||
|
psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||
|
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||
|
devIdisp,devJa,devVal,
|
||
|
(int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM);
|
||
|
|
||
|
freeRemoteBuffer(devIdisp);
|
||
|
freeRemoteBuffer(devJa);
|
||
|
freeRemoteBuffer(devVal);
|
||
|
|
||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||
|
if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloat",i);
|
||
|
}
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||
|
int *irn, int *hoffs, int *idisp, int *ja,
|
||
|
double *val, void *deviceMat)
|
||
|
{ int i,j;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
spgpuHandle_t handle;
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
double *devVal;
|
||
|
int *devIdisp, *devJa;
|
||
|
int *tja;
|
||
|
//fprintf(stderr,"devMat: %p\n",devMat);
|
||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double));
|
||
|
|
||
|
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double));
|
||
|
//fprintf(stderr,"WriteRemoteBuffer val %d\n",i);
|
||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||
|
//fprintf(stderr,"WriteRemoteBuffer ja %d\n",i);
|
||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||
|
//fprintf(stderr,"WriteRemoteBuffer irn %d\n",i);
|
||
|
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
//fprintf(stderr,"WriteRemoteBuffer hoffs %d\n",i);
|
||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||
|
//fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i);
|
||
|
//cudaSync();
|
||
|
//fprintf(stderr," hacksz: %d \n",hacksz);
|
||
|
handle = psb_gpuGetHandle();
|
||
|
psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||
|
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||
|
devIdisp,devJa,devVal,
|
||
|
(int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM);
|
||
|
|
||
|
freeRemoteBuffer(devIdisp);
|
||
|
freeRemoteBuffer(devJa);
|
||
|
freeRemoteBuffer(devVal);
|
||
|
|
||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||
|
if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDouble",i);
|
||
|
}
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||
|
int *irn, int *hoffs, int *idisp, int *ja,
|
||
|
float complex *val, void *deviceMat)
|
||
|
{ int i,j;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
spgpuHandle_t handle;
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
float complex *devVal;
|
||
|
int *devIdisp, *devJa;
|
||
|
int *tja;
|
||
|
//fprintf(stderr,"devMat: %p\n",devMat);
|
||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex));
|
||
|
|
||
|
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||
|
//cudaSync();
|
||
|
|
||
|
handle = psb_gpuGetHandle();
|
||
|
psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||
|
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||
|
devIdisp,devJa,devVal,
|
||
|
(int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM);
|
||
|
|
||
|
freeRemoteBuffer(devIdisp);
|
||
|
freeRemoteBuffer(devJa);
|
||
|
freeRemoteBuffer(devVal);
|
||
|
|
||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||
|
if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloatComplex",i);
|
||
|
}
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz,
|
||
|
int *irn, int *hoffs, int *idisp, int *ja,
|
||
|
double complex *val, void *deviceMat)
|
||
|
{ int i,j;
|
||
|
#ifdef HAVE_SPGPU
|
||
|
spgpuHandle_t handle;
|
||
|
HllDevice *devMat = (HllDevice *) deviceMat;
|
||
|
double complex *devVal;
|
||
|
int *devIdisp, *devJa;
|
||
|
int *tja;
|
||
|
//fprintf(stderr,"devMat: %p\n",devMat);
|
||
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
||
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
||
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex));
|
||
|
|
||
|
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
|
||
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
|
||
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
||
|
//cudaSync();
|
||
|
|
||
|
handle = psb_gpuGetHandle();
|
||
|
psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
|
||
|
(int *) devMat->rS, (int *) devMat->hackOffs,
|
||
|
devIdisp,devJa,devVal,
|
||
|
(int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM);
|
||
|
|
||
|
freeRemoteBuffer(devIdisp);
|
||
|
freeRemoteBuffer(devJa);
|
||
|
freeRemoteBuffer(devVal);
|
||
|
|
||
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
||
|
if (i != 0) {
|
||
|
fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDoubleComplex",i);
|
||
|
}
|
||
|
return SPGPU_SUCCESS;
|
||
|
#else
|
||
|
return SPGPU_UNSUPPORTED;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
#endif
|