#include #include #include "psi_cuda_common.cuh" #undef GEN_PSI_FUNC_NAME #define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToElg) #define THREAD_BLOCK 256 #ifdef __cplusplus extern "C" { #endif void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, int *idiag, int *rP, VALUE_TYPE *cM); #ifdef __cplusplus } #endif __global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza, int baseIdx, int hacksz, int ldv, int nzm, int *rS, int *devIdisp, int *devJa, VALUE_TYPE *devVal, int *idiag, int *rP, VALUE_TYPE *cM) { int ir, k, ipnt, rsz,jc; int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK); int i=ii+ki; int idval=0; if (ki >= nrws) return; if (i >= nr) return; ipnt=devIdisp[i]; rsz=rS[i]; ir = i; for (k=0; kcurrentStream >>>(i,nrws, nr, nza, baseIdx, hacksz, ldv, nzm, rS,devIdisp,devJa,devVal, idiag, rP,cM); } void GEN_PSI_FUNC_NAME(TYPE_SYMBOL) (spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, int *idiag, int *rP, VALUE_TYPE *cM) { int i, nrws; //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); //fprintf(stderr,"Loop on j: %d\n",j); for (i=0; i