Malloc and trasnfers with CUDA should use (size_t) casts

repackage
sfilippone 2 months ago
parent 20958f654a
commit 2af0d56938

@ -38,12 +38,12 @@ static struct cudaDeviceProp *prop=NULL;
static spgpuHandle_t psb_cuda_handle = NULL;
static cublasHandle_t psb_cublas_handle = NULL;
#if defined(TRACK_CUDA_MALLOC)
static long long total_cuda_mem = 0;
static int64_t total_cuda_mem = 0;
#endif
int allocRemoteBuffer(void** buffer, int count)
int allocRemoteBuffer(void** buffer, size_t count)
{
cudaError_t err = cudaMalloc(buffer, count);
cudaError_t err = cudaMalloc(buffer, (size_t) count);
#if defined(TRACK_CUDA_MALLOC)
total_cuda_mem += count;
fprintf(stderr,"Tracking CUDA allocRemoteBuffer for %ld bytes total %ld address %p\n",
@ -65,7 +65,7 @@ int allocRemoteBuffer(void** buffer, int count)
}
}
int hostRegisterMapped(void *pointer, long size)
int hostRegisterMapped(void *pointer, size_t size)
{
cudaError_t err = cudaHostRegister(pointer, size, cudaHostRegisterMapped);
@ -101,7 +101,7 @@ int getDevicePointer(void **d_p, void * h_p)
}
}
int registerMappedMemory(void *buffer, void **dp, int size)
int registerMappedMemory(void *buffer, void **dp, size_t size)
{
//cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterMapped);
@ -130,7 +130,7 @@ int registerMappedMemory(void *buffer, void **dp, int size)
}
}
int allocMappedMemory(void **buffer, void **dp, int size)
int allocMappedMemory(void **buffer, void **dp, size_t size)
{
cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
if (err == 0) err = cudaHostGetDevicePointer(dp,*buffer,0);
@ -168,7 +168,7 @@ int unregisterMappedMemory(void *buffer)
}
}
int writeRemoteBuffer(void* hostSrc, void* buffer, int count)
int writeRemoteBuffer(void* hostSrc, void* buffer, size_t count)
{
cudaError_t err = cudaMemcpy(buffer, hostSrc, count, cudaMemcpyHostToDevice);
@ -181,7 +181,7 @@ int writeRemoteBuffer(void* hostSrc, void* buffer, int count)
}
}
int readRemoteBuffer(void* hostDest, void* buffer, int count)
int readRemoteBuffer(void* hostDest, void* buffer, size_t count)
{

@ -37,6 +37,7 @@
#include <stdlib.h>
#include <sys/time.h>
#include <string.h>
#include <stdint.h>
#include "cuda_runtime.h"
#include "core.h"
@ -44,12 +45,12 @@
#include "fcusparse.h"
#include "cublas_v2.h"
int allocRemoteBuffer(void** buffer, int count);
int allocMappedMemory(void **buffer, void **dp, int size);
int registerMappedMemory(void *buffer, void **dp, int size);
int allocRemoteBuffer(void** buffer, size_t count);
int allocMappedMemory(void **buffer, void **dp, size_t size);
int registerMappedMemory(void *buffer, void **dp, size_t size);
int unregisterMappedMemory(void *buffer);
int writeRemoteBuffer(void* hostSrc, void* buffer, int count);
int readRemoteBuffer(void* hostDest, void* buffer, int count);
int writeRemoteBuffer(void* hostSrc, void* buffer, size_t count);
int readRemoteBuffer(void* hostDest, void* buffer, size_t count);
int freeRemoteBuffer(void* buffer);
int gpuInit(int dev);
int getDeviceCount();

@ -39,7 +39,7 @@
int registerMappedFloatComplex(void *buff, void **d_p, int n, cuFloatComplex dummy)
{
return registerMappedMemory(buff,d_p,n*sizeof(cuFloatComplex));
return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(cuFloatComplex));
}
int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec)
@ -47,7 +47,7 @@ int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec)
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
// Ex updateFromHost vector function
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(cuFloatComplex));
((size_t) devVec->pitch_)*devVec->count_*sizeof(cuFloatComplex));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
}
@ -67,7 +67,7 @@ int readMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec)
{ int i,j;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(cuFloatComplex));
((size_t) devVec->pitch_)*devVec->count_*sizeof(cuFloatComplex));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i);
}

@ -39,14 +39,15 @@
int registerMappedDouble(void *buff, void **d_p, int n, double dummy)
{
return registerMappedMemory(buff,d_p,n*sizeof(double));
return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(double));
}
int writeMultiVecDeviceDouble(void* deviceVec, double* hostVec)
{ int i;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
// Ex updateFromHost vector function
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(double));
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_,
((size_t) devVec->pitch_)*devVec->count_*sizeof(double));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
}
@ -66,7 +67,7 @@ int readMultiVecDeviceDouble(void* deviceVec, double* hostVec)
{ int i,j;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(double));
((size_t) devVec->pitch_)*devVec->count_*sizeof(double));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDouble",i);
}

@ -75,21 +75,25 @@ int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params)
tmp->avgRowSize = params->avgRowSize;
tmp->allocsize = (int)tmp->maxRowSize * tmp->pitch;
//tmp->allocsize = (int)params->maxRowSize * tmp->cMPitch;
allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int));
allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int));
allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int));
allocRemoteBuffer((void **)&(tmp->rS), ((size_t) tmp->rows)*sizeof(int));
allocRemoteBuffer((void **)&(tmp->diag), ((size_t) tmp->rows)*sizeof(int));
allocRemoteBuffer((void **)&(tmp->rP), ((size_t) tmp->allocsize)*sizeof(int));
tmp->columns = params->columns;
tmp->baseIndex = params->firstIndex;
tmp->dataType = params->elementType;
//fprintf(stderr,"allocEllDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize);
if (params->elementType == SPGPU_TYPE_FLOAT)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->allocsize)*sizeof(float));
else if (params->elementType == SPGPU_TYPE_DOUBLE)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->allocsize)*sizeof(double));
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->allocsize)*sizeof(cuFloatComplex));
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->allocsize)*sizeof(cuDoubleComplex));
else
return SPGPU_UNSUPPORTED; // Unsupported params
//fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
@ -104,13 +108,17 @@ void zeroEllDevice(void *remoteMatrix)
struct EllDevice *tmp = (struct EllDevice *) remoteMatrix;
if (tmp->dataType == SPGPU_TYPE_FLOAT)
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(float));
cudaMemset((void *)tmp->cM, 0,
((size_t) tmp->allocsize)*sizeof(float));
else if (tmp->dataType == SPGPU_TYPE_DOUBLE)
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(double));
cudaMemset((void *)tmp->cM, 0,
((size_t) tmp->allocsize)*sizeof(double));
else if (tmp->dataType == SPGPU_TYPE_COMPLEX_FLOAT)
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuFloatComplex));
cudaMemset((void *)tmp->cM, 0,
((size_t) tmp->allocsize)*sizeof(cuFloatComplex));
else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE)
cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex));
cudaMemset((void *)tmp->cM, 0,
((size_t) tmp->allocsize)*sizeof(cuDoubleComplex));
else
return ; // Unsupported params
//fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize,
@ -318,10 +326,14 @@ int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn,
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(float));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
//i = writeEllDevice(deviceMat, (void *) val, ja, irn);
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
@ -333,10 +345,14 @@ int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* ir
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(double));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/
if (i != 0) {
@ -349,10 +365,14 @@ int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(cuFloatComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
@ -365,10 +385,14 @@ int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(cuDoubleComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
@ -380,10 +404,14 @@ int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i
int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(float));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
/*i = readEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
@ -394,10 +422,14 @@ int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn,
int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(double));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
@ -407,10 +439,14 @@ int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn
int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(cuFloatComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
@ -420,10 +456,14 @@ int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int
int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag)
{ int i;
struct EllDevice *devMat = (struct EllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(cuDoubleComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
@ -459,17 +499,23 @@ int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm,
spgpuHandle_t handle;
handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float));
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
allocRemoteBuffer((void **)&(devIdisp), ((size_t) (nr+1))*sizeof(int));
allocRemoteBuffer((void **)&(devJa), ((size_t) (nza))*sizeof(int));
allocRemoteBuffer((void **)&(devVal), ((size_t) (nza))*sizeof(float));
i = writeRemoteBuffer((void*) val, (void *)devVal,
((size_t) nza)*sizeof(float));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa,
((size_t) nza)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS,
((size_t) devMat->rows)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp,
((size_t) devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,
ldv,nzm,
(int *) devMat->rS,devIdisp,devJa,devVal,
(int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM);
(int *) devMat->diag, (int *) devMat->rP,
(float *)devMat->cM);
// Ex updateFromHost function
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
@ -498,17 +544,23 @@ int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm,
spgpuHandle_t handle;
handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double));
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(double));
i = writeRemoteBuffer((void*) val, (void *)devVal,
((size_t) nza)*sizeof(double));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa,
((size_t) nza)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS,
((size_t) devMat->rows)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp,
((size_t) devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,
ldv,nzm,
(int *) devMat->rS,devIdisp,devJa,devVal,
(int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM);
(int *) devMat->diag, (int *) devMat->rP,
(double *)devMat->cM);
// Ex updateFromHost function
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
@ -536,17 +588,23 @@ int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, in
spgpuHandle_t handle;
handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex));
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuFloatComplex));
i = writeRemoteBuffer((void*) val, (void *)devVal,
((size_t) nza)*sizeof(cuFloatComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa,
((size_t) nza)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS,
((size_t) devMat->rows)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp,
((size_t) devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,
ldv,nzm,
(int *) devMat->rS,devIdisp,devJa,devVal,
(int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM);
(int *) devMat->diag,(int *) devMat->rP,
(float complex *)devMat->cM);
// Ex updateFromHost function
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float complex));
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
@ -575,17 +633,23 @@ int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, i
spgpuHandle_t handle;
handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex));
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm,
allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuDoubleComplex));
i = writeRemoteBuffer((void*) val, (void *)devVal,
((size_t) nza)*sizeof(cuDoubleComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa,
((size_t) nza)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS,
((size_t) devMat->rows)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp,
((size_t) (devMat->rows+1))*sizeof(int));
if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,
ldv,nzm,
(int *) devMat->rS,devIdisp,devJa,devVal,
(int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM);
(int *) devMat->diag,(int *) devMat->rP,
(double complex *)devMat->cM);
// Ex updateFromHost function
//i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double complex));
//if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));

@ -77,7 +77,7 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
cMat->mvbuffer = NULL;
}
//CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz));
allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz);
allocRemoteBuffer((void **) &(cMat->mvbuffer), (size_t) bfsz);
cMat->mvbsize = bfsz;
}
CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle,
@ -115,7 +115,7 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
cMat->mvbuffer = NULL;
}
//CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz));
allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz);
allocRemoteBuffer((void **) &(cMat->mvbuffer), (size_t) bfsz);
cMat->mvbsize = bfsz;
}
@ -189,7 +189,7 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
cMat->svbuffer = NULL;
}
//CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz));
allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz);
allocRemoteBuffer((void **) &(cMat->svbuffer), (size_t) bfsz);
cMat->svbsize=bfsz;
CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle,
@ -251,11 +251,14 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz)
cMat->nz = nz;
if (nr1 == 0) nr1 = 1;
if (nz1 == 0) nz1 = 1;
if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), ((nr1+1)*sizeof(int)))) != 0)
if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)),
(((size_t) nr1+1)*sizeof(int)))) != 0)
return(rc);
if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), ((nz1)*sizeof(int)))) != 0)
if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)),
(((size_t) nz1)*sizeof(int)))) != 0)
return(rc);
if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), ((nz1)*sizeof(TYPE)))) != 0)
if ((rc= allocRemoteBuffer(((void **) &(cMat->val)),
(((size_t) nz1)*sizeof(TYPE)))) != 0)
return(rc);
#if CUDA_SHORT_VERSION <= 10
if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0)
@ -286,7 +289,7 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz)
/* } */
if (bfsz > 0) {
//CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz));
allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz);
allocRemoteBuffer((void **) &(cMat->svbuffer), (size_t) bfsz);
} else {
cMat->svbuffer=NULL;
@ -481,16 +484,16 @@ int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz,
cusparseHandle_t *my_handle=getHandle();
if ((rc=writeRemoteBuffer((void *) irp, (void *) cMat->irp,
(m+1)*sizeof(int)))
((size_t) m+1)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=writeRemoteBuffer((void *) ja,(void *) cMat->ja,
(nz)*sizeof(int)))
((size_t) nz)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=writeRemoteBuffer((void *) val, (void *) cMat->val,
(nz)*sizeof(TYPE)))
((size_t) nz)*sizeof(TYPE)))
!= SPGPU_SUCCESS)
return(rc);
#if (CUDA_SHORT_VERSION > 10 ) && (CUDA_VERSION < 11030)
@ -515,14 +518,17 @@ int T_CSRGDevice2Host(T_Cmat *Matrix, int m, int n, int nz,
int rc;
T_CSRGDeviceMat *cMat = Matrix->mat;
if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, (m+1)*sizeof(int)))
if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp,
((size_t) m+1)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, (nz)*sizeof(int)))
if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja,
((size_t) nz)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, (nz)*sizeof(TYPE)))
if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val,
((size_t) nz)*sizeof(TYPE)))
!= SPGPU_SUCCESS)
return(rc);
@ -679,24 +685,27 @@ int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz,
if (nr1 == 0) nr1 = 1;
if (nz1 == 0) nz1 = 1;
if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), ((nr1+1)*sizeof(int)))) != 0)
if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)),
(((size_t) nr1+1)*sizeof(int)))) != 0)
return(rc);
if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), ((nz1)*sizeof(int)))) != 0)
if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)),
(((size_t) nz1)*sizeof(int)))) != 0)
return(rc);
if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), ((nz1)*sizeof(TYPE)))) != 0)
if ((rc= allocRemoteBuffer(((void **) &(hMat->val)),
(((size_t) nz1)*sizeof(TYPE)))) != 0)
return(rc);
if ((rc=writeRemoteBuffer((void *) irp, (void *) hMat->irp,
(m+1)*sizeof(int)))
((size_t) m+1)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=writeRemoteBuffer((void *) ja,(void *) hMat->ja,
(nz)*sizeof(int)))
((size_t) nz)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
if ((rc=writeRemoteBuffer((void *) val, (void *) hMat->val,
(nz)*sizeof(TYPE)))
((size_t) nz)*sizeof(TYPE)))
!= SPGPU_SUCCESS)
return(rc);
/* rc = (int) cusparseGetMatType(hMat->descr); */

@ -93,38 +93,45 @@ int allocHdiagDevice(void **remoteMatrix, HdiagDeviceParams* params)
#endif
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->hackOffsets), (tmp->hackCount+1)*sizeof(int));
ret=allocRemoteBuffer((void **)&(tmp->hackOffsets),
((size_t) tmp->hackCount+1)*sizeof(int));
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets), tmp->allocationHeight*sizeof(int));
ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets),
((size_t) tmp->allocationHeight)*sizeof(int));
/* tmp->baseIndex = params->firstIndex; */
if (params->elementType == SPGPU_TYPE_INT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(int));
ret=allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(int));
}
else if (params->elementType == SPGPU_TYPE_FLOAT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(float));
ret=allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(float));
}
else if (params->elementType == SPGPU_TYPE_DOUBLE)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(double));
ret=allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(double));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuFloatComplex));
ret=allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(cuFloatComplex));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuDoubleComplex));
ret=allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(cuDoubleComplex));
}
else
return SPGPU_UNSUPPORTED; // Unsupported params
@ -137,7 +144,8 @@ int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols,
{ int i=0;
HdiagDeviceParams p;
p = getHdiagDeviceParams(rows, cols, allocationHeight, hackSize, hackCount,elementType);
p = getHdiagDeviceParams(rows, cols, allocationHeight,
hackSize, hackCount,elementType);
i = allocHdiagDevice(deviceMat, &p);
#if DEBUG
@ -188,14 +196,14 @@ int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets,
(devMat->hackCount+1)*sizeof(int));
((size_t) devMat->hackCount+1)*sizeof(int));
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets,
devMat->allocationHeight*sizeof(int));
((size_t) devMat->allocationHeight)*sizeof(int));
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
devMat->allocationHeight*devMat->hackSize*sizeof(double));
((size_t) devMat->allocationHeight)*devMat->hackSize*sizeof(double));
if (i!=0)
fprintf(stderr,"Error in writeHdiagDeviceDouble %d\n",i);
@ -304,14 +312,14 @@ int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *ha
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets,
(devMat->hackCount+1)*sizeof(int));
((size_t) devMat->hackCount+1)*sizeof(int));
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets,
devMat->allocationHeight*sizeof(int));
((size_t) devMat->allocationHeight)*sizeof(int));
if(i== SPGPU_SUCCESS)
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
devMat->allocationHeight*devMat->hackSize*sizeof(float));
((size_t) devMat->allocationHeight)*devMat->hackSize*sizeof(float));
if (i!=0)
fprintf(stderr,"Error in writeHdiagDeviceFloat %d\n",i);

@ -71,6 +71,8 @@ int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params)
{
HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice));
int ret=SPGPU_SUCCESS;
size_t tt;
int ti;
*remoteMatrix = (void *)tmp;
tmp->hackSize = params->hackSize;
@ -87,41 +89,54 @@ int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params)
//printf("hackOffsLength %d\n",tmp->hackOffsLength);
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int));
ret=allocRemoteBuffer((void **)&(tmp->rP),
((size_t) tmp->allocsize)*sizeof(int));
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int));
ret=allocRemoteBuffer((void **)&(tmp->rS),
((size_t) tmp->rows)*sizeof(int));
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int));
ret=allocRemoteBuffer((void **)&(tmp->diag),
((size_t) tmp->rows)*sizeof(int));
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->hackOffs), ((tmp->hackOffsLength+1)*sizeof(int)));
ret=allocRemoteBuffer((void **)&(tmp->hackOffs),
(((size_t) tmp->hackOffsLength+1)*sizeof(int)));
if (params->elementType == SPGPU_TYPE_INT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(int));
ret=allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->allocsize)*sizeof(int));
}
else if (params->elementType == SPGPU_TYPE_FLOAT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float));
ret=allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->allocsize)*sizeof(float));
}
else if (params->elementType == SPGPU_TYPE_DOUBLE)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double));
if (ret == SPGPU_SUCCESS) {
/* tt = ((size_t) tmp->allocsize)*sizeof(double);
ti = ((size_t) tmp->allocsize)*sizeof(double);
fprintf(stderr,"%ld %d %d\n",tt, ti, tmp->allocsize);*/
ret=allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->allocsize)*sizeof(double));
}
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex));
ret=allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->allocsize)*sizeof(cuFloatComplex));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
{
if (ret == SPGPU_SUCCESS)
ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex));
ret=allocRemoteBuffer((void **)&(tmp->cM),
((size_t) tmp->allocsize)*sizeof(cuDoubleComplex));
}
else
return SPGPU_UNSUPPORTED; // Unsupported params
@ -151,7 +166,7 @@ int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, un
p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex);
i = allocHllDevice(deviceMat, &p);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i);
fprintf(stderr,"From routine : %s : %d \n","FallocHllDevice",i);
}
return(i);
}
@ -259,11 +274,16 @@ int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int*
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(float));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
//i = writeEllDevice(deviceMat, (void *) val, ja, irn);
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i);
@ -275,11 +295,16 @@ int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(double));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
@ -291,11 +316,16 @@ int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(cuFloatComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
@ -307,11 +337,16 @@ int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
// Ex updateFromHost function
i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(cuDoubleComplex));
i = writeRemoteBuffer((void*) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = writeRemoteBuffer((void*) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
/*i = writeEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i);
@ -322,11 +357,16 @@ int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i
int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
i = readRemoteBuffer((void *) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(float));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
/*i = readEllDevice(deviceMat, (void *) val, ja, irn);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i);
@ -337,11 +377,16 @@ int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* i
int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
i = readRemoteBuffer((void *) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(double));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void *) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
@ -351,11 +396,16 @@ int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int*
int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
i = readRemoteBuffer((void *) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(cuFloatComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
@ -365,11 +415,16 @@ int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int
int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag)
{ int i;
HllDevice *devMat = (HllDevice *) deviceMat;
i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int));
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int));
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
i = readRemoteBuffer((void *) val, (void *)devMat->cM,
((size_t) devMat->allocsize)*sizeof(cuDoubleComplex));
i = readRemoteBuffer((void *) ja, (void *)devMat->rP,
((size_t) devMat->allocsize)*sizeof(int));
i = readRemoteBuffer((void *) irn, (void *)devMat->rS,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void*) idiag, (void *)devMat->diag,
((size_t) devMat->rows)*sizeof(int));
i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
/*if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i);
}*/
@ -388,16 +443,21 @@ int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz
int *devIdisp, *devJa;
int *tja;
//fprintf(stderr,"devMat: %p\n",devMat);
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float));
allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(float));
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devVal,
((size_t) nza)*sizeof(float));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa,
((size_t) nza)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS,
((size_t) devMat->rows)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp,
((size_t) devMat->rows+1)*sizeof(int));
//cudaSync();
handle = psb_cudaGetHandle();
@ -427,20 +487,25 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is
int *devIdisp, *devJa;
int *tja;
//fprintf(stderr,"devMat: %p\n",devMat);
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double));
allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(double));
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double));
i = writeRemoteBuffer((void*) val, (void *)devVal,
((size_t) nza)*sizeof(double));
//fprintf(stderr,"WriteRemoteBuffer val %d\n",i);
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa,
((size_t) nza)*sizeof(int));
//fprintf(stderr,"WriteRemoteBuffer ja %d\n",i);
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS,
((size_t) devMat->rows)*sizeof(int));
//fprintf(stderr,"WriteRemoteBuffer irn %d\n",i);
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
//fprintf(stderr,"WriteRemoteBuffer hoffs %d\n",i);
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp,
((size_t) devMat->rows+1)*sizeof(int));
//fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i);
//cudaSync();
//fprintf(stderr," hacksz: %d \n",hacksz);
@ -448,7 +513,8 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is
psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal,
(int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM);
(int *) devMat->diag, (int *) devMat->rP,
(double *)devMat->cM);
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);
@ -471,23 +537,29 @@ int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs,
int *devIdisp, *devJa;
int *tja;
//fprintf(stderr,"devMat: %p\n",devMat);
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex));
allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuFloatComplex));
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devVal,
((size_t) nza)*sizeof(cuFloatComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa,
((size_t) nza)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS,
((size_t) devMat->rows)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp,
((size_t) devMat->rows+1)*sizeof(int));
//cudaSync();
handle = psb_cudaGetHandle();
psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal,
(int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM);
(int *) devMat->diag,(int *) devMat->rP,
(float complex *)devMat->cM);
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);
@ -510,23 +582,29 @@ int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs,
int *devIdisp, *devJa;
int *tja;
//fprintf(stderr,"devMat: %p\n",devMat);
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex));
allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int));
allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuDoubleComplex));
// fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength);
i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
i = writeRemoteBuffer((void*) val, (void *)devVal,
((size_t) nza)*sizeof(cuDoubleComplex));
if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa,
((size_t) nza)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS,
((size_t) devMat->rows)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs,
((size_t) devMat->hackOffsLength+1)*sizeof(int));
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp,
((size_t) devMat->rows+1)*sizeof(int));
//cudaSync();
handle = psb_cudaGetHandle();
psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal,
(int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM);
(int *) devMat->diag,(int *) devMat->rP,
(double complex *)devMat->cM);
freeRemoteBuffer(devIdisp);
freeRemoteBuffer(devJa);

@ -39,14 +39,14 @@
int registerMappedInt(void *buff, void **d_p, int n, int dummy)
{
return registerMappedMemory(buff,d_p,n*sizeof(int));
return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(int));
}
int writeMultiVecDeviceInt(void* deviceVec, int* hostVec)
{ int i;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(int));
((size_t) devVec->pitch_)*devVec->count_*sizeof(int));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
}
@ -67,7 +67,7 @@ int readMultiVecDeviceInt(void* deviceVec, int* hostVec)
{ int i,j;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(int));
((size_t) devVec->pitch_)*devVec->count_*sizeof(int));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceInt",i);
}

@ -39,14 +39,15 @@
int registerMappedFloat(void *buff, void **d_p, int n, float dummy)
{
return registerMappedMemory(buff,d_p,n*sizeof(float));
return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(float));
}
int writeMultiVecDeviceFloat(void* deviceVec, float* hostVec)
{ int i;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
// Ex updateFromHost vector function
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(float));
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_,
((size_t) devVec->pitch_)*devVec->count_*sizeof(float));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
}
@ -66,7 +67,7 @@ int readMultiVecDeviceFloat(void* deviceVec, float* hostVec)
{ int i,j;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(float));
((size_t) devVec->pitch_)*devVec->count_*sizeof(float));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i);
}

@ -98,7 +98,8 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
tmp->pitch_ = (((params->size*sizeof(int) + 255)/256)*256)/sizeof(int);
//fprintf(stderr,"Allocating an INT vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double));
return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(int));
return allocRemoteBuffer((void **)&(tmp->v_),
((size_t) tmp->pitch_)*params->count*sizeof(int));
}
else if (params->elementType == SPGPU_TYPE_FLOAT)
{
@ -107,7 +108,8 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
else
tmp->pitch_ = (((params->size*sizeof(float) + 255)/256)*256)/sizeof(float);
return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(float));
return allocRemoteBuffer((void **)&(tmp->v_),
((size_t) tmp->pitch_)*params->count*sizeof(float));
}
else if (params->elementType == SPGPU_TYPE_DOUBLE)
{
@ -118,7 +120,8 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
tmp->pitch_ = (int)(((params->size*sizeof(double) + 255)/256)*256)/sizeof(double);
//fprintf(stderr,"Allocating a DOUBLE vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double));
return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(double));
return allocRemoteBuffer((void **)&(tmp->v_),
((size_t) tmp->pitch_)*tmp->count_*sizeof(double));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
{
@ -126,7 +129,8 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
tmp->pitch_ = params->size;
else
tmp->pitch_ = (int)(((params->size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex);
return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuFloatComplex));
return allocRemoteBuffer((void **)&(tmp->v_),
((size_t) tmp->pitch_)*tmp->count_*sizeof(cuFloatComplex));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
{
@ -134,7 +138,8 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
tmp->pitch_ = params->size;
else
tmp->pitch_ = (int)(((params->size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex);
return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuDoubleComplex));
return allocRemoteBuffer((void **)&(tmp->v_),
((size_t) tmp->pitch_)*tmp->count_*sizeof(cuDoubleComplex));
}
else
return SPGPU_UNSUPPORTED; // Unsupported params

@ -39,7 +39,7 @@
int registerMappedDoubleComplex(void *buff, void **d_p, int n, cuDoubleComplex dummy)
{
return registerMappedMemory(buff,d_p,n*sizeof(cuDoubleComplex));
return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(cuDoubleComplex));
}
int writeMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec)
@ -47,7 +47,7 @@ int writeMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec)
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
// Ex updateFromHost vector function
i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(cuDoubleComplex));
((size_t) devVec->pitch_)*devVec->count_*sizeof(cuDoubleComplex));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i);
}
@ -67,7 +67,7 @@ int readMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec)
{ int i,j;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_,
devVec->pitch_*devVec->count_*sizeof(cuDoubleComplex));
((size_t) devVec->pitch_)*devVec->count_*sizeof(cuDoubleComplex));
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleComplex",i);
}

Loading…
Cancel
Save