From 2af0d5693821dc079b4443dbc3e5896dd0b5e7e2 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 2 Jan 2025 11:49:08 +0100 Subject: [PATCH] Malloc and trasnfers with CUDA should use (size_t) casts --- cuda/cuda_util.c | 16 +-- cuda/cuda_util.h | 11 +- cuda/cvectordev.c | 6 +- cuda/dvectordev.c | 7 +- cuda/elldev.c | 230 +++++++++++++++++++++++++-------------- cuda/fcusparse_fct.h | 47 ++++---- cuda/hdiagdev.c | 36 ++++--- cuda/hlldev.c | 250 ++++++++++++++++++++++++++++--------------- cuda/ivectordev.c | 6 +- cuda/svectordev.c | 7 +- cuda/vectordev.c | 15 ++- cuda/zvectordev.c | 6 +- 12 files changed, 402 insertions(+), 235 deletions(-) diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index b7d0199e..17889a06 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -38,12 +38,12 @@ static struct cudaDeviceProp *prop=NULL; static spgpuHandle_t psb_cuda_handle = NULL; static cublasHandle_t psb_cublas_handle = NULL; #if defined(TRACK_CUDA_MALLOC) -static long long total_cuda_mem = 0; +static int64_t total_cuda_mem = 0; #endif -int allocRemoteBuffer(void** buffer, int count) +int allocRemoteBuffer(void** buffer, size_t count) { - cudaError_t err = cudaMalloc(buffer, count); + cudaError_t err = cudaMalloc(buffer, (size_t) count); #if defined(TRACK_CUDA_MALLOC) total_cuda_mem += count; fprintf(stderr,"Tracking CUDA allocRemoteBuffer for %ld bytes total %ld address %p\n", @@ -65,7 +65,7 @@ int allocRemoteBuffer(void** buffer, int count) } } -int hostRegisterMapped(void *pointer, long size) +int hostRegisterMapped(void *pointer, size_t size) { cudaError_t err = cudaHostRegister(pointer, size, cudaHostRegisterMapped); @@ -101,7 +101,7 @@ int getDevicePointer(void **d_p, void * h_p) } } -int registerMappedMemory(void *buffer, void **dp, int size) +int registerMappedMemory(void *buffer, void **dp, size_t size) { //cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterMapped); @@ -130,7 +130,7 @@ int registerMappedMemory(void *buffer, void **dp, int size) } } -int allocMappedMemory(void **buffer, void **dp, int size) +int allocMappedMemory(void **buffer, void **dp, size_t size) { cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); if (err == 0) err = cudaHostGetDevicePointer(dp,*buffer,0); @@ -168,7 +168,7 @@ int unregisterMappedMemory(void *buffer) } } -int writeRemoteBuffer(void* hostSrc, void* buffer, int count) +int writeRemoteBuffer(void* hostSrc, void* buffer, size_t count) { cudaError_t err = cudaMemcpy(buffer, hostSrc, count, cudaMemcpyHostToDevice); @@ -181,7 +181,7 @@ int writeRemoteBuffer(void* hostSrc, void* buffer, int count) } } -int readRemoteBuffer(void* hostDest, void* buffer, int count) +int readRemoteBuffer(void* hostDest, void* buffer, size_t count) { diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h index 4eafb5bf..609e6f43 100644 --- a/cuda/cuda_util.h +++ b/cuda/cuda_util.h @@ -37,6 +37,7 @@ #include #include #include +#include #include "cuda_runtime.h" #include "core.h" @@ -44,12 +45,12 @@ #include "fcusparse.h" #include "cublas_v2.h" -int allocRemoteBuffer(void** buffer, int count); -int allocMappedMemory(void **buffer, void **dp, int size); -int registerMappedMemory(void *buffer, void **dp, int size); +int allocRemoteBuffer(void** buffer, size_t count); +int allocMappedMemory(void **buffer, void **dp, size_t size); +int registerMappedMemory(void *buffer, void **dp, size_t size); int unregisterMappedMemory(void *buffer); -int writeRemoteBuffer(void* hostSrc, void* buffer, int count); -int readRemoteBuffer(void* hostDest, void* buffer, int count); +int writeRemoteBuffer(void* hostSrc, void* buffer, size_t count); +int readRemoteBuffer(void* hostDest, void* buffer, size_t count); int freeRemoteBuffer(void* buffer); int gpuInit(int dev); int getDeviceCount(); diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index b05bca55..0eaacbdb 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -39,7 +39,7 @@ int registerMappedFloatComplex(void *buff, void **d_p, int n, cuFloatComplex dummy) { - return registerMappedMemory(buff,d_p,n*sizeof(cuFloatComplex)); + return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(cuFloatComplex)); } int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) @@ -47,7 +47,7 @@ int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; // Ex updateFromHost vector function i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, - devVec->pitch_*devVec->count_*sizeof(cuFloatComplex)); + ((size_t) devVec->pitch_)*devVec->count_*sizeof(cuFloatComplex)); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); } @@ -67,7 +67,7 @@ int readMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) { int i,j; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, - devVec->pitch_*devVec->count_*sizeof(cuFloatComplex)); + ((size_t) devVec->pitch_)*devVec->count_*sizeof(cuFloatComplex)); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i); } diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index d4f5513b..10bbc326 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -39,14 +39,15 @@ int registerMappedDouble(void *buff, void **d_p, int n, double dummy) { - return registerMappedMemory(buff,d_p,n*sizeof(double)); + return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(double)); } int writeMultiVecDeviceDouble(void* deviceVec, double* hostVec) { int i; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; // Ex updateFromHost vector function - i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(double)); + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(double)); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); } @@ -66,7 +67,7 @@ int readMultiVecDeviceDouble(void* deviceVec, double* hostVec) { int i,j; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, - devVec->pitch_*devVec->count_*sizeof(double)); + ((size_t) devVec->pitch_)*devVec->count_*sizeof(double)); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDouble",i); } diff --git a/cuda/elldev.c b/cuda/elldev.c index a5d893c1..cf49aadd 100644 --- a/cuda/elldev.c +++ b/cuda/elldev.c @@ -75,21 +75,25 @@ int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params) tmp->avgRowSize = params->avgRowSize; tmp->allocsize = (int)tmp->maxRowSize * tmp->pitch; //tmp->allocsize = (int)params->maxRowSize * tmp->cMPitch; - allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int)); - allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int)); - allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->rS), ((size_t) tmp->rows)*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->diag), ((size_t) tmp->rows)*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->rP), ((size_t) tmp->allocsize)*sizeof(int)); tmp->columns = params->columns; tmp->baseIndex = params->firstIndex; tmp->dataType = params->elementType; //fprintf(stderr,"allocEllDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize); if (params->elementType == SPGPU_TYPE_FLOAT) - allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(float)); else if (params->elementType == SPGPU_TYPE_DOUBLE) - allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(double)); else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) - allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(cuFloatComplex)); else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) - allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(cuDoubleComplex)); else return SPGPU_UNSUPPORTED; // Unsupported params //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, @@ -104,13 +108,17 @@ void zeroEllDevice(void *remoteMatrix) struct EllDevice *tmp = (struct EllDevice *) remoteMatrix; if (tmp->dataType == SPGPU_TYPE_FLOAT) - cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(float)); + cudaMemset((void *)tmp->cM, 0, + ((size_t) tmp->allocsize)*sizeof(float)); else if (tmp->dataType == SPGPU_TYPE_DOUBLE) - cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(double)); + cudaMemset((void *)tmp->cM, 0, + ((size_t) tmp->allocsize)*sizeof(double)); else if (tmp->dataType == SPGPU_TYPE_COMPLEX_FLOAT) - cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuFloatComplex)); + cudaMemset((void *)tmp->cM, 0, + ((size_t) tmp->allocsize)*sizeof(cuFloatComplex)); else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE) - cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex)); + cudaMemset((void *)tmp->cM, 0, + ((size_t) tmp->allocsize)*sizeof(cuDoubleComplex)); else return ; // Unsupported params //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, @@ -318,10 +326,14 @@ int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function - i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); - if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); //i = writeEllDevice(deviceMat, (void *) val, ja, irn); /*if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); @@ -333,10 +345,14 @@ int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* ir { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function - i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); - if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(double)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ if (i != 0) { @@ -349,10 +365,14 @@ int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function - i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); - i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); if (i != 0) { @@ -365,10 +385,14 @@ int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function - i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); - i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); if (i != 0) { @@ -380,10 +404,14 @@ int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; - i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); - i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(float)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); /*i = readEllDevice(deviceMat, (void *) val, ja, irn); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); @@ -394,10 +422,14 @@ int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; - i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); - i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(double)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); /*if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ @@ -407,10 +439,14 @@ int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; - i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); - i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuFloatComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); /*if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ @@ -420,10 +456,14 @@ int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; - i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); - i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuDoubleComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); /*if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ @@ -459,17 +499,23 @@ int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, spgpuHandle_t handle; handle = psb_cudaGetHandle(); - allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); - allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); - allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float)); - i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float)); - if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); - - if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + allocRemoteBuffer((void **)&(devIdisp), ((size_t) (nr+1))*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) (nza))*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) (nza))*sizeof(float)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz, + ldv,nzm, (int *) devMat->rS,devIdisp,devJa,devVal, - (int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM); + (int *) devMat->diag, (int *) devMat->rP, + (float *)devMat->cM); // Ex updateFromHost function //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -498,17 +544,23 @@ int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, spgpuHandle_t handle; handle = psb_cudaGetHandle(); - allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); - allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); - allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double)); - i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double)); - if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); - - if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(double)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(double)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz, + ldv,nzm, (int *) devMat->rS,devIdisp,devJa,devVal, - (int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM); + (int *) devMat->diag, (int *) devMat->rP, + (double *)devMat->cM); // Ex updateFromHost function //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -536,17 +588,23 @@ int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, in spgpuHandle_t handle; handle = psb_cudaGetHandle(); - allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); - allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); - allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex)); - i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex)); - if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); - - if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(cuFloatComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz, + ldv,nzm, (int *) devMat->rS,devIdisp,devJa,devVal, - (int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM); + (int *) devMat->diag,(int *) devMat->rP, + (float complex *)devMat->cM); // Ex updateFromHost function //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float complex)); //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -575,17 +633,23 @@ int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, i spgpuHandle_t handle; handle = psb_cudaGetHandle(); - allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); - allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); - allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex)); - i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex)); - if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); - - if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(cuDoubleComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) (devMat->rows+1))*sizeof(int)); + + if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz, + ldv,nzm, (int *) devMat->rS,devIdisp,devJa,devVal, - (int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM); + (int *) devMat->diag,(int *) devMat->rP, + (double complex *)devMat->cM); // Ex updateFromHost function //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double complex)); //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index ec7166f5..848194aa 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -77,7 +77,7 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cMat->mvbuffer = NULL; } //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); - allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz); + allocRemoteBuffer((void **) &(cMat->mvbuffer), (size_t) bfsz); cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle, @@ -115,7 +115,7 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cMat->mvbuffer = NULL; } //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); - allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz); + allocRemoteBuffer((void **) &(cMat->mvbuffer), (size_t) bfsz); cMat->mvbsize = bfsz; } @@ -189,7 +189,7 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cMat->svbuffer = NULL; } //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); - allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz); + allocRemoteBuffer((void **) &(cMat->svbuffer), (size_t) bfsz); cMat->svbsize=bfsz; CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, @@ -251,11 +251,14 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) cMat->nz = nz; if (nr1 == 0) nr1 = 1; if (nz1 == 0) nz1 = 1; - if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), ((nr1+1)*sizeof(int)))) != 0) + if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), + (((size_t) nr1+1)*sizeof(int)))) != 0) return(rc); - if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), ((nz1)*sizeof(int)))) != 0) + if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), + (((size_t) nz1)*sizeof(int)))) != 0) return(rc); - if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), ((nz1)*sizeof(TYPE)))) != 0) + if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), + (((size_t) nz1)*sizeof(TYPE)))) != 0) return(rc); #if CUDA_SHORT_VERSION <= 10 if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) @@ -286,7 +289,7 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) /* } */ if (bfsz > 0) { //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); - allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz); + allocRemoteBuffer((void **) &(cMat->svbuffer), (size_t) bfsz); } else { cMat->svbuffer=NULL; @@ -481,16 +484,16 @@ int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, cusparseHandle_t *my_handle=getHandle(); if ((rc=writeRemoteBuffer((void *) irp, (void *) cMat->irp, - (m+1)*sizeof(int))) + ((size_t) m+1)*sizeof(int))) != SPGPU_SUCCESS) return(rc); if ((rc=writeRemoteBuffer((void *) ja,(void *) cMat->ja, - (nz)*sizeof(int))) + ((size_t) nz)*sizeof(int))) != SPGPU_SUCCESS) return(rc); if ((rc=writeRemoteBuffer((void *) val, (void *) cMat->val, - (nz)*sizeof(TYPE))) + ((size_t) nz)*sizeof(TYPE))) != SPGPU_SUCCESS) return(rc); #if (CUDA_SHORT_VERSION > 10 ) && (CUDA_VERSION < 11030) @@ -515,14 +518,17 @@ int T_CSRGDevice2Host(T_Cmat *Matrix, int m, int n, int nz, int rc; T_CSRGDeviceMat *cMat = Matrix->mat; - if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, (m+1)*sizeof(int))) + if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, + ((size_t) m+1)*sizeof(int))) != SPGPU_SUCCESS) return(rc); - if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, (nz)*sizeof(int))) + if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, + ((size_t) nz)*sizeof(int))) != SPGPU_SUCCESS) return(rc); - if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, (nz)*sizeof(TYPE))) + if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, + ((size_t) nz)*sizeof(TYPE))) != SPGPU_SUCCESS) return(rc); @@ -679,24 +685,27 @@ int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, if (nr1 == 0) nr1 = 1; if (nz1 == 0) nz1 = 1; - if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), ((nr1+1)*sizeof(int)))) != 0) + if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), + (((size_t) nr1+1)*sizeof(int)))) != 0) return(rc); - if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), ((nz1)*sizeof(int)))) != 0) + if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), + (((size_t) nz1)*sizeof(int)))) != 0) return(rc); - if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), ((nz1)*sizeof(TYPE)))) != 0) + if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), + (((size_t) nz1)*sizeof(TYPE)))) != 0) return(rc); if ((rc=writeRemoteBuffer((void *) irp, (void *) hMat->irp, - (m+1)*sizeof(int))) + ((size_t) m+1)*sizeof(int))) != SPGPU_SUCCESS) return(rc); if ((rc=writeRemoteBuffer((void *) ja,(void *) hMat->ja, - (nz)*sizeof(int))) + ((size_t) nz)*sizeof(int))) != SPGPU_SUCCESS) return(rc); if ((rc=writeRemoteBuffer((void *) val, (void *) hMat->val, - (nz)*sizeof(TYPE))) + ((size_t) nz)*sizeof(TYPE))) != SPGPU_SUCCESS) return(rc); /* rc = (int) cusparseGetMatType(hMat->descr); */ diff --git a/cuda/hdiagdev.c b/cuda/hdiagdev.c index 6302eed1..813e4fab 100644 --- a/cuda/hdiagdev.c +++ b/cuda/hdiagdev.c @@ -93,38 +93,45 @@ int allocHdiagDevice(void **remoteMatrix, HdiagDeviceParams* params) #endif if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->hackOffsets), (tmp->hackCount+1)*sizeof(int)); + ret=allocRemoteBuffer((void **)&(tmp->hackOffsets), + ((size_t) tmp->hackCount+1)*sizeof(int)); if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets), tmp->allocationHeight*sizeof(int)); + ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets), + ((size_t) tmp->allocationHeight)*sizeof(int)); /* tmp->baseIndex = params->firstIndex; */ if (params->elementType == SPGPU_TYPE_INT) { if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(int)); + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(int)); } else if (params->elementType == SPGPU_TYPE_FLOAT) { if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(float)); + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(float)); } else if (params->elementType == SPGPU_TYPE_DOUBLE) { if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(double)); + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(double)); } else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) { if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuFloatComplex)); + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(cuFloatComplex)); } else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) { if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuDoubleComplex)); + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(cuDoubleComplex)); } else return SPGPU_UNSUPPORTED; // Unsupported params @@ -137,7 +144,8 @@ int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, { int i=0; HdiagDeviceParams p; - p = getHdiagDeviceParams(rows, cols, allocationHeight, hackSize, hackCount,elementType); + p = getHdiagDeviceParams(rows, cols, allocationHeight, + hackSize, hackCount,elementType); i = allocHdiagDevice(deviceMat, &p); #if DEBUG @@ -188,14 +196,14 @@ int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int * if(i== SPGPU_SUCCESS) i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets, - (devMat->hackCount+1)*sizeof(int)); + ((size_t) devMat->hackCount+1)*sizeof(int)); if(i== SPGPU_SUCCESS) i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets, - devMat->allocationHeight*sizeof(int)); + ((size_t) devMat->allocationHeight)*sizeof(int)); if(i== SPGPU_SUCCESS) i = writeRemoteBuffer((void*) val, (void *)devMat->cM, - devMat->allocationHeight*devMat->hackSize*sizeof(double)); + ((size_t) devMat->allocationHeight)*devMat->hackSize*sizeof(double)); if (i!=0) fprintf(stderr,"Error in writeHdiagDeviceDouble %d\n",i); @@ -304,14 +312,14 @@ int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *ha if(i== SPGPU_SUCCESS) i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets, - (devMat->hackCount+1)*sizeof(int)); + ((size_t) devMat->hackCount+1)*sizeof(int)); if(i== SPGPU_SUCCESS) i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets, - devMat->allocationHeight*sizeof(int)); + ((size_t) devMat->allocationHeight)*sizeof(int)); if(i== SPGPU_SUCCESS) i = writeRemoteBuffer((void*) val, (void *)devMat->cM, - devMat->allocationHeight*devMat->hackSize*sizeof(float)); + ((size_t) devMat->allocationHeight)*devMat->hackSize*sizeof(float)); if (i!=0) fprintf(stderr,"Error in writeHdiagDeviceFloat %d\n",i); diff --git a/cuda/hlldev.c b/cuda/hlldev.c index 9da6a48c..186831d3 100644 --- a/cuda/hlldev.c +++ b/cuda/hlldev.c @@ -71,6 +71,8 @@ int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params) { HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice)); int ret=SPGPU_SUCCESS; + size_t tt; + int ti; *remoteMatrix = (void *)tmp; tmp->hackSize = params->hackSize; @@ -87,41 +89,54 @@ int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params) //printf("hackOffsLength %d\n",tmp->hackOffsLength); if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int)); + ret=allocRemoteBuffer((void **)&(tmp->rP), + ((size_t) tmp->allocsize)*sizeof(int)); if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int)); + ret=allocRemoteBuffer((void **)&(tmp->rS), + ((size_t) tmp->rows)*sizeof(int)); if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int)); + ret=allocRemoteBuffer((void **)&(tmp->diag), + ((size_t) tmp->rows)*sizeof(int)); if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->hackOffs), ((tmp->hackOffsLength+1)*sizeof(int))); + ret=allocRemoteBuffer((void **)&(tmp->hackOffs), + (((size_t) tmp->hackOffsLength+1)*sizeof(int))); if (params->elementType == SPGPU_TYPE_INT) { if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(int)); + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(int)); } else if (params->elementType == SPGPU_TYPE_FLOAT) { if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(float)); } else if (params->elementType == SPGPU_TYPE_DOUBLE) { - if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + if (ret == SPGPU_SUCCESS) { + /* tt = ((size_t) tmp->allocsize)*sizeof(double); + ti = ((size_t) tmp->allocsize)*sizeof(double); + fprintf(stderr,"%ld %d %d\n",tt, ti, tmp->allocsize);*/ + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(double)); + } } else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) { if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(cuFloatComplex)); } else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) { if (ret == SPGPU_SUCCESS) - ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(cuDoubleComplex)); } else return SPGPU_UNSUPPORTED; // Unsupported params @@ -151,7 +166,7 @@ int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, un p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex); i = allocHllDevice(deviceMat, &p); if (i != 0) { - fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + fprintf(stderr,"From routine : %s : %d \n","FallocHllDevice",i); } return(i); } @@ -259,11 +274,16 @@ int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* { int i; HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function - i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); - i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); - i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(float)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); //i = writeEllDevice(deviceMat, (void *) val, ja, irn); /*if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); @@ -275,11 +295,16 @@ int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int { int i; HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function - i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); - i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); - i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(double)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); @@ -291,11 +316,16 @@ int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int { int i; HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function - i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); - i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); - i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); @@ -307,11 +337,16 @@ int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i { int i; HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function - i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); - i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); - i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); @@ -322,11 +357,16 @@ int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; HllDevice *devMat = (HllDevice *) deviceMat; - i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); - i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(float)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); /*i = readEllDevice(deviceMat, (void *) val, ja, irn); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); @@ -337,11 +377,16 @@ int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* i int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; HllDevice *devMat = (HllDevice *) deviceMat; - i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); - i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(double)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); /*if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ @@ -351,11 +396,16 @@ int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; HllDevice *devMat = (HllDevice *) deviceMat; - i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); - i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuFloatComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); /*if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ @@ -365,11 +415,16 @@ int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; HllDevice *devMat = (HllDevice *) deviceMat; - i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); - i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); - i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); - i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuDoubleComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); /*if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ @@ -388,16 +443,21 @@ int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz int *devIdisp, *devJa; int *tja; //fprintf(stderr,"devMat: %p\n",devMat); - allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); - allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); - allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float)); + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(float)); // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); - i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float)); - if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); //cudaSync(); handle = psb_cudaGetHandle(); @@ -427,20 +487,25 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is int *devIdisp, *devJa; int *tja; //fprintf(stderr,"devMat: %p\n",devMat); - allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); - allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); - allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double)); + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(double)); // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); - i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(double)); //fprintf(stderr,"WriteRemoteBuffer val %d\n",i); - if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); //fprintf(stderr,"WriteRemoteBuffer ja %d\n",i); - if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); //fprintf(stderr,"WriteRemoteBuffer irn %d\n",i); - if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); //fprintf(stderr,"WriteRemoteBuffer hoffs %d\n",i); - if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); //fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i); //cudaSync(); //fprintf(stderr," hacksz: %d \n",hacksz); @@ -448,7 +513,8 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, - (int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM); + (int *) devMat->diag, (int *) devMat->rP, + (double *)devMat->cM); freeRemoteBuffer(devIdisp); freeRemoteBuffer(devJa); @@ -471,23 +537,29 @@ int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int *devIdisp, *devJa; int *tja; //fprintf(stderr,"devMat: %p\n",devMat); - allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); - allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); - allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex)); + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuFloatComplex)); // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); - i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex)); - if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(cuFloatComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); //cudaSync(); handle = psb_cudaGetHandle(); psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, - (int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM); + (int *) devMat->diag,(int *) devMat->rP, + (float complex *)devMat->cM); freeRemoteBuffer(devIdisp); freeRemoteBuffer(devJa); @@ -510,23 +582,29 @@ int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int *devIdisp, *devJa; int *tja; //fprintf(stderr,"devMat: %p\n",devMat); - allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); - allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); - allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex)); + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuDoubleComplex)); // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); - i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex)); - if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); - if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(cuDoubleComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); //cudaSync(); handle = psb_cudaGetHandle(); psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, - (int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM); + (int *) devMat->diag,(int *) devMat->rP, + (double complex *)devMat->cM); freeRemoteBuffer(devIdisp); freeRemoteBuffer(devJa); diff --git a/cuda/ivectordev.c b/cuda/ivectordev.c index 241f1115..f908e391 100644 --- a/cuda/ivectordev.c +++ b/cuda/ivectordev.c @@ -39,14 +39,14 @@ int registerMappedInt(void *buff, void **d_p, int n, int dummy) { - return registerMappedMemory(buff,d_p,n*sizeof(int)); + return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(int)); } int writeMultiVecDeviceInt(void* deviceVec, int* hostVec) { int i; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, - devVec->pitch_*devVec->count_*sizeof(int)); + ((size_t) devVec->pitch_)*devVec->count_*sizeof(int)); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); } @@ -67,7 +67,7 @@ int readMultiVecDeviceInt(void* deviceVec, int* hostVec) { int i,j; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, - devVec->pitch_*devVec->count_*sizeof(int)); + ((size_t) devVec->pitch_)*devVec->count_*sizeof(int)); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceInt",i); } diff --git a/cuda/svectordev.c b/cuda/svectordev.c index ab4dd01b..e1c43b5e 100644 --- a/cuda/svectordev.c +++ b/cuda/svectordev.c @@ -39,14 +39,15 @@ int registerMappedFloat(void *buff, void **d_p, int n, float dummy) { - return registerMappedMemory(buff,d_p,n*sizeof(float)); + return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(float)); } int writeMultiVecDeviceFloat(void* deviceVec, float* hostVec) { int i; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; // Ex updateFromHost vector function - i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(float)); + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(float)); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); } @@ -66,7 +67,7 @@ int readMultiVecDeviceFloat(void* deviceVec, float* hostVec) { int i,j; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, - devVec->pitch_*devVec->count_*sizeof(float)); + ((size_t) devVec->pitch_)*devVec->count_*sizeof(float)); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i); } diff --git a/cuda/vectordev.c b/cuda/vectordev.c index 65b4d533..db976fe9 100644 --- a/cuda/vectordev.c +++ b/cuda/vectordev.c @@ -98,7 +98,8 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams * tmp->pitch_ = (((params->size*sizeof(int) + 255)/256)*256)/sizeof(int); //fprintf(stderr,"Allocating an INT vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double)); - return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(int)); + return allocRemoteBuffer((void **)&(tmp->v_), + ((size_t) tmp->pitch_)*params->count*sizeof(int)); } else if (params->elementType == SPGPU_TYPE_FLOAT) { @@ -107,7 +108,8 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams * else tmp->pitch_ = (((params->size*sizeof(float) + 255)/256)*256)/sizeof(float); - return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(float)); + return allocRemoteBuffer((void **)&(tmp->v_), + ((size_t) tmp->pitch_)*params->count*sizeof(float)); } else if (params->elementType == SPGPU_TYPE_DOUBLE) { @@ -118,7 +120,8 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams * tmp->pitch_ = (int)(((params->size*sizeof(double) + 255)/256)*256)/sizeof(double); //fprintf(stderr,"Allocating a DOUBLE vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double)); - return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(double)); + return allocRemoteBuffer((void **)&(tmp->v_), + ((size_t) tmp->pitch_)*tmp->count_*sizeof(double)); } else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) { @@ -126,7 +129,8 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams * tmp->pitch_ = params->size; else tmp->pitch_ = (int)(((params->size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex); - return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuFloatComplex)); + return allocRemoteBuffer((void **)&(tmp->v_), + ((size_t) tmp->pitch_)*tmp->count_*sizeof(cuFloatComplex)); } else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) { @@ -134,7 +138,8 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams * tmp->pitch_ = params->size; else tmp->pitch_ = (int)(((params->size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex); - return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuDoubleComplex)); + return allocRemoteBuffer((void **)&(tmp->v_), + ((size_t) tmp->pitch_)*tmp->count_*sizeof(cuDoubleComplex)); } else return SPGPU_UNSUPPORTED; // Unsupported params diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index 49741582..102ba0d2 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -39,7 +39,7 @@ int registerMappedDoubleComplex(void *buff, void **d_p, int n, cuDoubleComplex dummy) { - return registerMappedMemory(buff,d_p,n*sizeof(cuDoubleComplex)); + return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(cuDoubleComplex)); } int writeMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) @@ -47,7 +47,7 @@ int writeMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; // Ex updateFromHost vector function i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, - devVec->pitch_*devVec->count_*sizeof(cuDoubleComplex)); + ((size_t) devVec->pitch_)*devVec->count_*sizeof(cuDoubleComplex)); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); } @@ -67,7 +67,7 @@ int readMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) { int i,j; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, - devVec->pitch_*devVec->count_*sizeof(cuDoubleComplex)); + ((size_t) devVec->pitch_)*devVec->count_*sizeof(cuDoubleComplex)); if (i != 0) { fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleComplex",i); }