Make multivectors work

psblas-bgmres
Salvatore Filippone 2 years ago
parent ee140bc8dd
commit 7d74ebf5c4

@ -182,9 +182,9 @@ int writeRemoteBuffer(void* hostSrc, void* buffer, int count)
}
// TODO
int writeRemoteBufferR2(void* hostSrc, void* buffer, int count, int pitch, int size)
int writeRemoteBufferR2(void* hostSrc, int hpitch, void* buffer, int count, int pitch, int size)
{
cudaError_t err = cudaMemcpy2D(buffer, pitch, hostSrc, count, count, size, cudaMemcpyHostToDevice);
cudaError_t err = cudaMemcpy2D(buffer, pitch, hostSrc, hpitch, size, count, cudaMemcpyHostToDevice);
if (err == cudaSuccess)
return SPGPU_SUCCESS;
@ -221,9 +221,9 @@ int readRemoteBuffer(void* hostDest, void* buffer, int count)
}
// TODO sistemare pitch e size (si possono gestire senza realloc su fortran)
int readRemoteBufferR2(void* hostDest, void* buffer, int count, int pitch, int size)
int readRemoteBufferR2(void* hostDest, int hpitch, void* buffer, int count, int pitch, int size)
{
cudaError_t err = cudaMemcpy2D(hostDest, count, buffer, pitch, count, size, cudaMemcpyDeviceToHost);
cudaError_t err = cudaMemcpy2D(hostDest, hpitch, buffer, pitch, size, count, cudaMemcpyDeviceToHost);
if (err == cudaSuccess)
return SPGPU_SUCCESS;

@ -49,9 +49,9 @@ int allocMappedMemory(void **buffer, void **dp, int size);
int registerMappedMemory(void *buffer, void **dp, int size);
int unregisterMappedMemory(void *buffer);
int writeRemoteBuffer(void* hostSrc, void* buffer, int count);
int writeRemoteBufferR2(void* hostSrc, void* buffer, int count, int pitch, int size);
int writeRemoteBufferR2(void* hostSrc, int hpitch, void* buffer, int count, int pitch, int size);
int readRemoteBuffer(void* hostDest, void* buffer, int count);
int readRemoteBufferR2(void* hostDest, void* buffer, int count, int pitch, int size);
int readRemoteBufferR2(void* hostDest, int hpitch, void* buffer, int count, int pitch, int size);
int freeRemoteBuffer(void* buffer);
int gpuInit(int dev);
int getDeviceCount();

@ -56,9 +56,12 @@ int writeMultiVecDeviceDouble(void* deviceVec, double* hostVec)
int writeMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld)
{ int i;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
i = writeRemoteBufferR2((void*) hostVec, (void *)devVec->v_, devVec->count_*sizeof(double), devVec->pitch_, devVec->size_);
double *hv, *dv;
i = writeRemoteBufferR2((void*) hostVec, ld*sizeof(double),
(void *)devVec->v_, (devVec->count_),
sizeof(double)*(devVec->pitch_), (devVec->size_)*sizeof(double));
// i = writeMultiVecDeviceDouble(deviceVec, (void *) hostVec);
fprintf(stderr,"From routine : %s : %p %p\n","writeMultiVecDeviceDoubleR2",devVec->v_,devVec->v_+devVec->pitch_);
//fprintf(stderr,"From routine : %s : %p %p\n","writeMultiVecDeviceDoubleR2",devVec->v_,devVec->v_+devVec->pitch_);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleR2",i);
}
@ -78,10 +81,14 @@ int readMultiVecDeviceDouble(void* deviceVec, double* hostVec)
int readMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld)
{ int i;
double *hv, *dv;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
i = readRemoteBufferR2((void *) hostVec, (void *)devVec->v_, devVec->count_*sizeof(double), devVec->pitch_, devVec->size_);
i = readRemoteBufferR2((void *) hostVec, ld*sizeof(double),
(void *)devVec->v_, devVec->count_,
sizeof(double)*(devVec->pitch_), (devVec->size_)*sizeof(double));
// i = readMultiVecDeviceDouble(deviceVec, hostVec);
fprintf(stderr,"From routine : %s : %p \n","readMultiVecDeviceDoubleR2",devVec->v_);
// fprintf(stderr,"From routine : %s : %p \n","readMultiVecDeviceDoubleR2",devVec->v_);
if (i != 0) {
fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleR2",i);
}
@ -242,7 +249,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
return SPGPU_UNSUPPORTED;
for(j=0;j<devVecY->count_;j++)
fprintf(stderr,"CUDA ENTERED %d %d %d %d \n",j, n, pitch, devVecY->size_);
//fprintf(stderr,"CUDA ENTERED %d %d %d %d \n",j, n, pitch, devVecY->size_);
spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta,
(double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j);
return(i);

@ -234,8 +234,8 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
// CHECK_CUSPARSE(cusparseDestroyDnMat(vecX));
// CHECK_CUSPARSE(cusparseDestroyDnMat(vecY));
for(j=0;j<y->count_;j++) {
vX=x->v_+pitch*j;
vY=y->v_+pitch*j;
vX=x->v_+pitch*j*sizeof(TYPE);
vY=y->v_+pitch*j*sizeof(TYPE);
fprintf(stderr,"CUDA ENTERED 1 %d %p %p %d %d %d %d\n",j, vX, vY, pitch, y->size_, cMat->m, cMat->n);
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
@ -257,7 +257,7 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
&alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
cMat->mvbuffer));
fprintf(stderr,"CUDA ENTERED 2 %d %p %p %d %d %d %d\n",j, vX, vY, *((double*)vX), *((double*)vY), pitch, y->size_);
//fprintf(stderr,"CUDA ENTERED 2 %d %p %p %d %d %d %d\n",j, vX, vY, *((double*)vX), *((double*)vY), pitch, y->size_);
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
}

@ -45,7 +45,7 @@ struct MultiVectDevice
//number of elements for a single vector
int size_;
//pithc in number of elements
//pitch in number of elements
int pitch_;
// Vectors in device memory (single allocation)

Loading…
Cancel
Save