You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
687 lines
28 KiB
C
687 lines
28 KiB
C
/* Parallel Sparse BLAS GPU plugin */
|
|
/* (C) Copyright 2013 */
|
|
|
|
/* Salvatore Filippone */
|
|
/* Alessandro Fanfarillo */
|
|
|
|
/* Redistribution and use in source and binary forms, with or without */
|
|
/* modification, are permitted provided that the following conditions */
|
|
/* are met: */
|
|
/* 1. Redistributions of source code must retain the above copyright */
|
|
/* notice, this list of conditions and the following disclaimer. */
|
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
|
/* notice, this list of conditions, and the following disclaimer in the */
|
|
/* documentation and/or other materials provided with the distribution. */
|
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
|
/* not be used to endorse or promote products derived from this */
|
|
/* software without specific written permission. */
|
|
|
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
|
|
#include <sys/time.h>
|
|
#include "elldev.h"
|
|
|
|
#define PASS_RS 0
|
|
|
|
EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize,
|
|
unsigned int nnzeros,
|
|
unsigned int columns, unsigned int elementType,
|
|
unsigned int firstIndex)
|
|
{
|
|
EllDeviceParams params;
|
|
|
|
if (elementType == SPGPU_TYPE_DOUBLE)
|
|
{
|
|
params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D;
|
|
}
|
|
else
|
|
{
|
|
params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S;
|
|
}
|
|
//For complex?
|
|
params.elementType = elementType;
|
|
|
|
params.rows = rows;
|
|
params.maxRowSize = maxRowSize;
|
|
params.avgRowSize = (nnzeros+rows-1)/rows;
|
|
params.columns = columns;
|
|
params.firstIndex = firstIndex;
|
|
|
|
//params.pitch = computeEllAllocPitch(rows);
|
|
|
|
return params;
|
|
|
|
}
|
|
//new
|
|
int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params)
|
|
{
|
|
struct EllDevice *tmp = (struct EllDevice *)malloc(sizeof(struct EllDevice));
|
|
*remoteMatrix = (void *)tmp;
|
|
tmp->rows = params->rows;
|
|
tmp->cMPitch = computeEllAllocPitch(tmp->rows);
|
|
tmp->rPPitch = tmp->cMPitch;
|
|
tmp->pitch= tmp->cMPitch;
|
|
tmp->maxRowSize = params->maxRowSize;
|
|
tmp->avgRowSize = params->avgRowSize;
|
|
tmp->allocsize = (int)tmp->maxRowSize * tmp->pitch;
|
|
//tmp->allocsize = (int)params->maxRowSize * tmp->cMPitch;
|
|
allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int));
|
|
allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int));
|
|
allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int));
|
|
tmp->columns = params->columns;
|
|
tmp->baseIndex = params->firstIndex;
|
|
tmp->dataType = params->elementType;
|
|
//fprintf(stderr,"allocEllDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize);
|
|
if (params->elementType == SPGPU_TYPE_FLOAT)
|
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
|
|
else if (params->elementType == SPGPU_TYPE_DOUBLE)
|
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
|
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
|
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
|
|
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
|
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
|
|
else
|
|
return SPGPU_UNSUPPORTED; // Unsupported params
|
|
//fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
|
|
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
|
|
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
//new
|
|
void zeroEllDevice(void *remoteMatrix)
|
|
{
|
|
struct EllDevice *tmp = (struct EllDevice *) remoteMatrix;
|
|
|
|
if (tmp->dataType == SPGPU_TYPE_FLOAT)
|
|
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(float));
|
|
else if (tmp->dataType == SPGPU_TYPE_DOUBLE)
|
|
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(double));
|
|
else if (tmp->dataType == SPGPU_TYPE_COMPLEX_FLOAT)
|
|
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuFloatComplex));
|
|
else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE)
|
|
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex));
|
|
else
|
|
return ; // Unsupported params
|
|
//fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
|
|
// tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM);
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
void freeEllDevice(void* remoteMatrix)
|
|
{
|
|
struct EllDevice *devMat = (struct EllDevice *) remoteMatrix;
|
|
//fprintf(stderr,"freeEllDevice\n");
|
|
if (devMat != NULL) {
|
|
freeRemoteBuffer(devMat->rS);
|
|
freeRemoteBuffer(devMat->rP);
|
|
freeRemoteBuffer(devMat->cM);
|
|
free(remoteMatrix);
|
|
}
|
|
}
|
|
|
|
//new
|
|
int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize,
|
|
unsigned int nnzeros,
|
|
unsigned int columns, unsigned int elementType,
|
|
unsigned int firstIndex)
|
|
{ int i;
|
|
EllDeviceParams p;
|
|
|
|
p = getEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex);
|
|
i = allocEllDevice(deviceMat, &p);
|
|
if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
|
|
}
|
|
return(i);
|
|
}
|
|
|
|
void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, int* rP, int* rS,
|
|
int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex)
|
|
{
|
|
int i=0;
|
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
|
|
|
for (i=0; i<s; i++)
|
|
{
|
|
if (PASS_RS) {
|
|
spgpuSellspmv (handle, (float*) z, (float*)y, alpha, (float*) cM, rP, pitch, pitch, rS,
|
|
NULL, avgRowSize, maxRowSize, rows, (float*)x, beta, firstIndex);
|
|
} else {
|
|
spgpuSellspmv (handle, (float*) z, (float*)y, alpha, (float*) cM, rP, pitch, pitch, NULL,
|
|
NULL, avgRowSize, maxRowSize, rows, (float*)x, beta, firstIndex);
|
|
}
|
|
z += vPitch;
|
|
y += vPitch;
|
|
x += vPitch;
|
|
}
|
|
}
|
|
//new
|
|
int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
|
|
float beta, void* deviceY)
|
|
{ int i=SPGPU_SUCCESS;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
|
|
|
#ifdef VERBOSE
|
|
__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");
|
|
__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");
|
|
__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");
|
|
#endif
|
|
/*spgpuSellspmv (handle, (float*) y->v_, (float*)y->v_, alpha,
|
|
(float*) devMat->cM, devMat->rP, devMat->cMPitch,
|
|
devMat->rPPitch, devMat->rS, devMat->rows,
|
|
(float*)x->v_, beta, devMat->baseIndex);*/
|
|
sspmdmm_gpu ( (float *)y->v_,y->count_, y->pitch_, (float *)y->v_, alpha, (float *)devMat->cM, devMat->rP, devMat->rS,
|
|
devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch,
|
|
(float *)x->v_, beta, devMat->baseIndex);
|
|
return(i);
|
|
}
|
|
|
|
|
|
void
|
|
dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, int* rP,
|
|
int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
|
|
double *x, double beta, int firstIndex)
|
|
{
|
|
int i=0;
|
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
|
for (i=0; i<s; i++)
|
|
{
|
|
if (PASS_RS) {
|
|
spgpuDellspmv (handle, (double*) z, (double*)y, alpha, (double*) cM, rP,
|
|
pitch, pitch, rS,
|
|
NULL, avgRowSize, maxRowSize, rows, (double*)x, beta, firstIndex);
|
|
} else {
|
|
spgpuDellspmv (handle, (double*) z, (double*)y, alpha, (double*) cM, rP,
|
|
pitch, pitch, NULL,
|
|
NULL, avgRowSize, maxRowSize, rows, (double*)x, beta, firstIndex);
|
|
}
|
|
z += vPitch;
|
|
y += vPitch;
|
|
x += vPitch;
|
|
}
|
|
}
|
|
|
|
//new
|
|
int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
|
|
double beta, void* deviceY)
|
|
{
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
|
|
|
/*spgpuDellspmv (handle, (double*) y->v_, (double*)y->v_, alpha, (double*) devMat->cM, devMat->rP, devMat->cMPitch, devMat->rPPitch, devMat->rS, devMat->rows, (double*)x->v_, beta, devMat->baseIndex);*/
|
|
/* fprintf(stderr,"From spmvEllDouble: mat %d %d %d %d y %d %d \n", */
|
|
/* devMat->avgRowSize, devMat->maxRowSize, devMat->rows, */
|
|
/* devMat->pitch, y->count_, y->pitch_); */
|
|
dspmdmm_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_,
|
|
alpha, (double *)devMat->cM,
|
|
devMat->rP, devMat->rS, devMat->avgRowSize,
|
|
devMat->maxRowSize, devMat->rows, devMat->pitch,
|
|
(double *)x->v_, beta, devMat->baseIndex);
|
|
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
void
|
|
cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y,
|
|
cuFloatComplex alpha, cuFloatComplex* cM,
|
|
int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
|
|
cuFloatComplex *x, cuFloatComplex beta, int firstIndex)
|
|
{
|
|
int i=0;
|
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
|
for (i=0; i<s; i++)
|
|
{
|
|
if (PASS_RS) {
|
|
spgpuCellspmv (handle, (cuFloatComplex *) z, (cuFloatComplex *)y, alpha, (cuFloatComplex *) cM, rP,
|
|
pitch, pitch, rS, NULL, avgRowSize, maxRowSize, rows, (cuFloatComplex *) x, beta, firstIndex);
|
|
} else {
|
|
spgpuCellspmv (handle, (cuFloatComplex *) z, (cuFloatComplex *)y, alpha, (cuFloatComplex *) cM, rP,
|
|
pitch, pitch, NULL, NULL, avgRowSize, maxRowSize, rows, (cuFloatComplex *) x, beta, firstIndex);
|
|
}
|
|
z += vPitch;
|
|
y += vPitch;
|
|
x += vPitch;
|
|
}
|
|
}
|
|
|
|
int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX,
|
|
float complex beta, void* deviceY)
|
|
{
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
|
|
|
cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
|
|
cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta));
|
|
cspmdmm_gpu ((cuFloatComplex *)y->v_, y->count_, y->pitch_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM,
|
|
devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch,
|
|
(cuFloatComplex *)x->v_, b, devMat->baseIndex);
|
|
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
void
|
|
zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* cM,
|
|
int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch,
|
|
cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex)
|
|
{
|
|
int i=0;
|
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
|
for (i=0; i<s; i++)
|
|
{
|
|
if (PASS_RS) {
|
|
spgpuZellspmv (handle, (cuDoubleComplex *) z, (cuDoubleComplex *)y, alpha, (cuDoubleComplex *) cM, rP,
|
|
pitch, pitch, rS, NULL, avgRowSize, maxRowSize, rows, (cuDoubleComplex *) x, beta, firstIndex);
|
|
} else {
|
|
spgpuZellspmv (handle, (cuDoubleComplex *) z, (cuDoubleComplex *)y, alpha, (cuDoubleComplex *) cM, rP,
|
|
pitch, pitch, NULL, NULL, avgRowSize, maxRowSize, rows, (cuDoubleComplex *) x, beta, firstIndex);
|
|
}
|
|
z += vPitch;
|
|
y += vPitch;
|
|
x += vPitch;
|
|
}
|
|
}
|
|
|
|
int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX,
|
|
double complex beta, void* deviceY)
|
|
{
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
|
|
|
cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
|
|
cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta));
|
|
zspmdmm_gpu ((cuDoubleComplex *)y->v_, y->count_, y->pitch_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM,
|
|
devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows,
|
|
devMat->pitch, (cuDoubleComplex *)x->v_, b, devMat->baseIndex);
|
|
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
// Ex updateFromHost function
|
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
|
//i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
|
/*if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
|
|
}*/
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
// Ex updateFromHost function
|
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
|
|
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
|
if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
|
}
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
// Ex updateFromHost function
|
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
|
|
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
|
if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
|
}*/
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
// Ex updateFromHost function
|
|
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
|
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
|
|
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
|
|
if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
|
}*/
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
|
/*i = readEllDevice(deviceMat, (void *) val, ja, irn);
|
|
if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
|
|
}*/
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
|
/*if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
|
}*/
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
|
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
|
/*if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
|
}*/
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
|
|
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
|
|
/*if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
|
|
}*/
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int getEllDevicePitch(void* deviceMat)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
i = devMat->pitch; //old
|
|
//i = getPitchEllDevice(deviceMat);
|
|
return(i);
|
|
}
|
|
|
|
int getEllDeviceMaxRowSize(void* deviceMat)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
i = devMat->maxRowSize;
|
|
return(i);
|
|
}
|
|
|
|
|
|
|
|
|
|
// New copying interface
|
|
|
|
int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
|
int *idisp, int *ja, float *val, void *deviceMat)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
float *devVal;
|
|
int *devIdisp, *devJa;
|
|
spgpuHandle_t handle;
|
|
handle = psb_cudaGetHandle();
|
|
|
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float));
|
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float));
|
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
|
|
|
if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
|
(int *) devMat->rS,devIdisp,devJa,devVal,
|
|
(int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM);
|
|
// Ex updateFromHost function
|
|
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
|
|
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
|
|
|
|
freeRemoteBuffer(devIdisp);
|
|
freeRemoteBuffer(devJa);
|
|
freeRemoteBuffer(devVal);
|
|
|
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
|
if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
|
|
}
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
|
int *idisp, int *ja, double *val, void *deviceMat)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
double *devVal;
|
|
int *devIdisp, *devJa;
|
|
spgpuHandle_t handle;
|
|
handle = psb_cudaGetHandle();
|
|
|
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double));
|
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double));
|
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
|
|
|
if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
|
(int *) devMat->rS,devIdisp,devJa,devVal,
|
|
(int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM);
|
|
// Ex updateFromHost function
|
|
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
|
|
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
|
|
|
|
freeRemoteBuffer(devIdisp);
|
|
freeRemoteBuffer(devJa);
|
|
freeRemoteBuffer(devVal);
|
|
|
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
|
if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
|
|
}
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
|
|
int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
|
int *idisp, int *ja, float complex *val, void *deviceMat)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
float complex *devVal;
|
|
int *devIdisp, *devJa;
|
|
spgpuHandle_t handle;
|
|
handle = psb_cudaGetHandle();
|
|
|
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex));
|
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex));
|
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
|
|
|
if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
|
(int *) devMat->rS,devIdisp,devJa,devVal,
|
|
(int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM);
|
|
// Ex updateFromHost function
|
|
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float complex));
|
|
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
|
|
|
|
freeRemoteBuffer(devIdisp);
|
|
freeRemoteBuffer(devJa);
|
|
freeRemoteBuffer(devVal);
|
|
|
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
|
if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloatComplex",i);
|
|
}
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn,
|
|
int *idisp, int *ja, double complex *val, void *deviceMat)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
double complex *devVal;
|
|
int *devIdisp, *devJa;
|
|
spgpuHandle_t handle;
|
|
handle = psb_cudaGetHandle();
|
|
|
|
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
|
|
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
|
|
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex));
|
|
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex));
|
|
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
|
|
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
|
|
|
|
if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
|
|
(int *) devMat->rS,devIdisp,devJa,devVal,
|
|
(int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM);
|
|
// Ex updateFromHost function
|
|
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double complex));
|
|
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
|
|
//if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
|
|
|
|
|
|
freeRemoteBuffer(devIdisp);
|
|
freeRemoteBuffer(devJa);
|
|
freeRemoteBuffer(devVal);
|
|
|
|
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
|
|
if (i != 0) {
|
|
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDoubleComplex",i);
|
|
}
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
|
|
int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *val)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
|
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
|
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
|
float alpha=1.0;
|
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
|
|
|
if (nnz <=0) return SPGPU_SUCCESS;
|
|
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
|
|
|
spgpuSellcsput(handle,alpha,(float *) devMat->cM,
|
|
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
|
nnz, devIa->v_, devJa->v_, (float *) devVal->v_, 1);
|
|
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void *val)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
|
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
|
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
|
double alpha=1.0;
|
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
|
|
|
if (nnz <=0) return SPGPU_SUCCESS;
|
|
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
|
|
|
spgpuDellcsput(handle,alpha,(double *) devMat->cM,
|
|
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
|
nnz, devIa->v_, devJa->v_, (double *) devVal->v_, 1);
|
|
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
|
|
int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz,
|
|
void *ia, void *ja, void *val)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
|
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
|
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
|
cuFloatComplex alpha = make_cuFloatComplex(1.0, 0.0);
|
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
|
|
|
if (nnz <=0) return SPGPU_SUCCESS;
|
|
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
|
|
|
spgpuCellcsput(handle,alpha,(cuFloatComplex *) devMat->cM,
|
|
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
|
nnz, devIa->v_, devJa->v_, (cuFloatComplex *) devVal->v_, 1);
|
|
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz,
|
|
void *ia, void *ja, void *val)
|
|
{ int i;
|
|
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
|
|
struct MultiVectDevice *devVal = (struct MultiVectDevice *) val;
|
|
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
|
|
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
|
|
cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
|
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
|
|
|
if (nnz <=0) return SPGPU_SUCCESS;
|
|
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
|
|
|
|
spgpuZellcsput(handle,alpha,(cuDoubleComplex *) devMat->cM,
|
|
devMat->rP,devMat->pitch, devMat->pitch, devMat->rS,
|
|
nnz, devIa->v_, devJa->v_, (cuDoubleComplex *) devVal->v_, 1);
|
|
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
|