You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psblas3/cuda/dcsga.c

172 lines
5.6 KiB
C

/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cusparse_v2.h>
#include "cintrf.h"
#include "dcsga.h"
int d_CSGADeviceFree(d_Cmat *Matrix)
{
d_CSRGDeviceMat *cMat= Matrix->mat;
if (cMat!=NULL) d_CSRGDeviceFree(cMat);
return(CUSPARSE_STATUS_SUCCESS);
}
int d_CSGADeviceAlloc(d_Cmat *Matrix,int nr, int nc, int nz)
{
int rc=0;
d_CSRGDeviceMat *cMat;
if ((rc=d_CSRGDeviceAlloc(Matrix,nr,nc,nz))!=0)
return(rc);
cMat = Matrix->mat;
if (nr <= 0) nr = 1;
if ((rc= allocRemoteBuffer(((void **) &(cMat->rowBlocks)), ((nr+1)*sizeof(int)))) != 0)
return(rc);
return(CUSPARSE_STATUS_SUCCESS);
}
void d_CSGAComputeRowBlocks(int totalRows, int* irp, int* numBlocks, int *rowBlocks){
rowBlocks[0] = 1;
int sum = 0, last_i= 1, ctr=1;
for(int i = 1; i < totalRows; i++){
sum += irp[i]-irp[i-1];
if(sum == MAX_NNZ_PER_WG){
last_i = i+1;
rowBlocks[ctr++] = i+1;
sum = 0;
}
else if( sum > MAX_NNZ_PER_WG){
if(i - last_i > 1){
rowBlocks[ctr++] = i-1 +1;
i--;
}
else if(i - last_i == 1){
rowBlocks[ctr++] = i +1;
}
last_i = i+1;
sum = 0;
}
}
//printf("%d %d\n",ctr,totalRows);
*numBlocks = ctr;
rowBlocks[ctr++] = totalRows;
return ;
}
int d_CSGAHost2Device(d_Cmat *Matrix,int nr, int nc, int nz,
int *irp, int *ja, double *val, int numBlocks, int *rowBlocks)
{
int rc=0;
d_CSRGDeviceMat *cMat= Matrix->mat;
if (cMat!=NULL) {
if ((rc=d_CSRGHost2Device(Matrix,nr,nc,nz,irp,ja,val))
!= SPGPU_SUCCESS)
return(rc);
cMat->numBlocks = numBlocks;
// fprintf(stderr," CSGAH2D: %d (%d:%d) %p\n",numBlocks,
// rowBlocks[0],rowBlocks[1],cMat->rowBlocks);
if ((rc=writeRemoteBuffer((void *) rowBlocks,(void *) cMat->rowBlocks,
(numBlocks+1)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
//fprintf(stderr," CSGAH2D ok\n");
} else {
return(-1);
}
return(CUSPARSE_STATUS_SUCCESS);
}
int d_CSGADevice2Host(d_Cmat *Matrix,int nr, int nc, int nz,
int *irp, int *ja, double *val, int *numBlocks, int *rowBlocks)
{
int rc=0;
d_CSRGDeviceMat *cMat= Matrix->mat;
if (cMat!=NULL) {
if ((rc=d_CSRGDevice2Host(Matrix,nr,nc,nz,irp,ja,val))
!= SPGPU_SUCCESS)
return(rc);
*numBlocks = cMat->numBlocks ;
if ((rc=readRemoteBuffer((void *) rowBlocks,(void *) cMat->rowBlocks,
((*numBlocks)+1)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
}
return(CUSPARSE_STATUS_SUCCESS);
}
int d_spmvCSGADevice(d_Cmat *Matrix, double alpha, void* deviceX,
double beta, void* deviceY, int *rb)
{
d_CSRGDeviceMat *devMat = (d_CSRGDeviceMat *) Matrix->mat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
int indexBase=1;
#if 0
fprintf(stderr,"devMat %p m %d n %d nB %d rB %p x %p y %p \n",devMat,
devMat->m,devMat->n,devMat->numBlocks,devMat->rowBlocks,x->v_,y->v_);
fprintf(stderr,"x_count %d y_count %d xsize %d ysize %d \n",x->count_,y->count_,
x->size_,y->size_);
#endif
#if 0&&defined(VERBOSE)
__assert(x->count_ == y->count_, "ERROR: x and y don't share the same number of vectors");
__assert(x->size_ >= devMat->n, "ERROR: x vector's size is not >= to matrix size (columns)");
__assert(y->size_ >= devMat->m, "ERROR: y vector's size is not >= to matrix size (rows)");
#endif
//fprintf(stderr,"Calling dCSGAMV \n");
dCSGAMV(handle, beta,(double *)y->v_, alpha,
(double *)devMat->val, devMat->ja, devMat->irp,
devMat->m,devMat->n,y->count_, devMat->numBlocks, devMat->rowBlocks,
(double *)x->v_, 1, rb);
}