Fix cuda interfaces for renaming

rename-cuda
sfilippone 1 year ago
parent 6fa0bf7fe7
commit 9b713c177b

@ -37,7 +37,7 @@
static int hasUVA=-1; static int hasUVA=-1;
static struct cudaDeviceProp *prop=NULL; static struct cudaDeviceProp *prop=NULL;
static spgpuHandle_t psb_gpu_handle = NULL; static spgpuHandle_t psb_cuda_handle = NULL;
static cublasHandle_t psb_cublas_handle = NULL; static cublasHandle_t psb_cublas_handle = NULL;
@ -228,7 +228,7 @@ int gpuInit(int dev)
return SPGPU_UNSPECIFIED; return SPGPU_UNSPECIFIED;
} }
if (!psb_cublas_handle) if (!psb_cublas_handle)
psb_gpuCreateCublasHandle(); psb_cudaCreateCublasHandle();
hasUVA=getDeviceHasUVA(); hasUVA=getDeviceHasUVA();
return err; return err;
@ -238,14 +238,14 @@ int gpuInit(int dev)
void gpuClose() void gpuClose()
{ {
cudaStream_t st1, st2; cudaStream_t st1, st2;
if (! psb_gpu_handle) if (! psb_cuda_handle)
st1=spgpuGetStream(psb_gpu_handle); st1=spgpuGetStream(psb_cuda_handle);
if (! psb_cublas_handle) if (! psb_cublas_handle)
cublasGetStream(psb_cublas_handle,&st2); cublasGetStream(psb_cublas_handle,&st2);
psb_gpuDestroyHandle(); psb_cudaDestroyHandle();
if (st1 != st2) if (st1 != st2)
psb_gpuDestroyCublasHandle(); psb_cudaDestroyCublasHandle();
free(prop); free(prop);
prop=NULL; prop=NULL;
hasUVA=-1; hasUVA=-1;
@ -391,49 +391,49 @@ void cudaReset()
} }
spgpuHandle_t psb_gpuGetHandle() spgpuHandle_t psb_cudaGetHandle()
{ {
return psb_gpu_handle; return psb_cuda_handle;
} }
void psb_gpuCreateHandle() void psb_cudaCreateHandle()
{ {
if (!psb_gpu_handle) if (!psb_cuda_handle)
spgpuCreate(&psb_gpu_handle, getDevice()); spgpuCreate(&psb_cuda_handle, getDevice());
} }
void psb_gpuDestroyHandle() void psb_cudaDestroyHandle()
{ {
if (!psb_gpu_handle) if (!psb_cuda_handle)
spgpuDestroy(psb_gpu_handle); spgpuDestroy(psb_cuda_handle);
psb_gpu_handle = NULL; psb_cuda_handle = NULL;
} }
cudaStream_t psb_gpuGetStream() cudaStream_t psb_cudaGetStream()
{ {
return spgpuGetStream(psb_gpu_handle); return spgpuGetStream(psb_cuda_handle);
} }
void psb_gpuSetStream(cudaStream_t stream) void psb_cudaSetStream(cudaStream_t stream)
{ {
spgpuSetStream(psb_gpu_handle, stream); spgpuSetStream(psb_cuda_handle, stream);
return ; return ;
} }
cublasHandle_t psb_gpuGetCublasHandle() cublasHandle_t psb_cudaGetCublasHandle()
{ {
if (!psb_cublas_handle) if (!psb_cublas_handle)
psb_gpuCreateCublasHandle(); psb_cudaCreateCublasHandle();
return psb_cublas_handle; return psb_cublas_handle;
} }
void psb_gpuCreateCublasHandle() void psb_cudaCreateCublasHandle()
{ if (!psb_cublas_handle) { if (!psb_cublas_handle)
cublasCreate(&psb_cublas_handle); cublasCreate(&psb_cublas_handle);
} }
void psb_gpuDestroyCublasHandle() void psb_cudaDestroyCublasHandle()
{ {
if (!psb_cublas_handle) if (!psb_cublas_handle)
cublasDestroy(psb_cublas_handle); cublasDestroy(psb_cublas_handle);

@ -71,15 +71,15 @@ void cudaReset();
void gpuClose(); void gpuClose();
spgpuHandle_t psb_gpuGetHandle(); spgpuHandle_t psb_cudaGetHandle();
void psb_gpuCreateHandle(); void psb_cudaCreateHandle();
void psb_gpuDestroyHandle(); void psb_cudaDestroyHandle();
cudaStream_t psb_gpuGetStream(); cudaStream_t psb_cudaGetStream();
void psb_gpuSetStream(cudaStream_t stream); void psb_cudaSetStream(cudaStream_t stream);
cublasHandle_t psb_gpuGetCublasHandle(); cublasHandle_t psb_cudaGetCublasHandle();
void psb_gpuCreateCublasHandle(); void psb_cudaCreateCublasHandle();
void psb_gpuDestroyCublasHandle(); void psb_cudaDestroyCublasHandle();
int allocateInt(void **, int); int allocateInt(void **, int);

@ -89,7 +89,7 @@ int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last,
{ int i=0; { int i=0;
int pitch = 0; int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_); spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_);
@ -104,7 +104,7 @@ int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiV
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecIrl->pitch_; pitch = devVecIrl->pitch_;
if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -144,7 +144,7 @@ int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
int i, *idx =(int *) indexes;; int i, *idx =(int *) indexes;;
cuFloatComplex *hv = (cuFloatComplex *) host_values;; cuFloatComplex *hv = (cuFloatComplex *) host_values;;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
i=0; i=0;
hv = &(hv[hfirst-indexBase]); hv = &(hv[hfirst-indexBase]);
@ -175,7 +175,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
cuFloatComplex *hv = (cuFloatComplex *) host_values; cuFloatComplex *hv = (cuFloatComplex *) host_values;
int *idx=(int *) indexes; int *idx=(int *) indexes;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
idx = &(idx[first-indexBase]); idx = &(idx[first-indexBase]);
hv = &(hv[hfirst-indexBase]); hv = &(hv[hfirst-indexBase]);
@ -187,7 +187,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_, spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
@ -197,7 +197,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_, spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
@ -207,7 +207,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_, spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_,
@ -218,7 +218,7 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA) int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
// Note: inner kernel can handle aliased input/output // Note: inner kernel can handle aliased input/output
spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_, spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_,
@ -231,7 +231,7 @@ int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n,
{int i=0; {int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_, spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_,
(cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); (cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_);
@ -244,7 +244,7 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe
int pitch = 0; int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_; pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ )) if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -261,7 +261,7 @@ int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha,
{ int i = 0; { int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -279,7 +279,7 @@ int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVec
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -297,7 +297,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -311,7 +311,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA
int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA)
{ int i = 0; { int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if (n > devVecA->size_) if (n > devVecA->size_)
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;

@ -186,7 +186,7 @@ int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
#ifdef VERBOSE #ifdef VERBOSE
@ -268,7 +268,7 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
#ifdef VERBOSE #ifdef VERBOSE

@ -126,7 +126,7 @@ int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha,
int status; int status;
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
cublasHandle_t handle=psb_gpuGetCublasHandle(); cublasHandle_t handle=psb_cudaGetCublasHandle();
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */ /* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) { if (n == 1) {
@ -157,7 +157,7 @@ int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha,
int status; int status;
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
cublasHandle_t handle=psb_gpuGetCublasHandle(); cublasHandle_t handle=psb_cudaGetCublasHandle();
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */ /* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) { if (n == 1) {
@ -188,7 +188,7 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a
int status; int status;
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
cublasHandle_t handle=psb_gpuGetCublasHandle(); cublasHandle_t handle=psb_cudaGetCublasHandle();
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */ /* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) { if (n == 1) {
@ -219,7 +219,7 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex
int status; int status;
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
cublasHandle_t handle=psb_gpuGetCublasHandle(); cublasHandle_t handle=psb_cudaGetCublasHandle();
cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */ /* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) { if (n == 1) {

@ -88,7 +88,7 @@ int setscalMultiVecDeviceDouble(double val, int first, int last,
{ int i=0; { int i=0;
int pitch = 0; int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_); spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_);
@ -104,7 +104,7 @@ int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal,
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecIrl->pitch_; pitch = devVecIrl->pitch_;
if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -143,7 +143,7 @@ int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n,
int i, *idx =(int *) indexes;; int i, *idx =(int *) indexes;;
double *hv = (double *) host_values;; double *hv = (double *) host_values;;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
i=0; i=0;
hv = &(hv[hfirst-indexBase]); hv = &(hv[hfirst-indexBase]);
@ -168,7 +168,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v
double *hv = (double *) host_values; double *hv = (double *) host_values;
int *idx=(int *) indexes; int *idx=(int *) indexes;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
idx = &(idx[first-indexBase]); idx = &(idx[first-indexBase]);
hv = &(hv[hfirst-indexBase]); hv = &(hv[hfirst-indexBase]);
@ -179,7 +179,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v
int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
// Note: inner kernel can handle aliased input/output // Note: inner kernel can handle aliased input/output
spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_, spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_,
@ -189,7 +189,7 @@ int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA)
int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -198,7 +198,7 @@ int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -207,7 +207,7 @@ int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -219,7 +219,7 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM
{int i=0; {int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_); spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
return(i); return(i);
@ -231,7 +231,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
int pitch = 0; int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_; pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ )) if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -246,7 +246,7 @@ int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceV
{ int i = 0; { int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -262,7 +262,7 @@ int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -278,7 +278,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA,
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -291,7 +291,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA,
int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA) int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA)
{ int i = 0; { int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if (n > devVecA->size_) if (n > devVecA->size_)
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;

@ -158,7 +158,7 @@ void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, i
int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex) int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex)
{ {
int i=0; int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
for (i=0; i<s; i++) for (i=0; i<s; i++)
{ {
@ -208,7 +208,7 @@ dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, i
double *x, double beta, int firstIndex) double *x, double beta, int firstIndex)
{ {
int i=0; int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
for (i=0; i<s; i++) for (i=0; i<s; i++)
{ {
if (PASS_RS) { if (PASS_RS) {
@ -258,7 +258,7 @@ cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y,
cuFloatComplex *x, cuFloatComplex beta, int firstIndex) cuFloatComplex *x, cuFloatComplex beta, int firstIndex)
{ {
int i=0; int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
for (i=0; i<s; i++) for (i=0; i<s; i++)
{ {
if (PASS_RS) { if (PASS_RS) {
@ -300,7 +300,7 @@ zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDouble
cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex) cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex)
{ {
int i=0; int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
for (i=0; i<s; i++) for (i=0; i<s; i++)
{ {
if (PASS_RS) { if (PASS_RS) {
@ -520,7 +520,7 @@ int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm,
float *devVal; float *devVal;
int *devIdisp, *devJa; int *devIdisp, *devJa;
spgpuHandle_t handle; spgpuHandle_t handle;
handle = psb_gpuGetHandle(); handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@ -563,7 +563,7 @@ int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm,
double *devVal; double *devVal;
int *devIdisp, *devJa; int *devIdisp, *devJa;
spgpuHandle_t handle; spgpuHandle_t handle;
handle = psb_gpuGetHandle(); handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@ -605,7 +605,7 @@ int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, in
float complex *devVal; float complex *devVal;
int *devIdisp, *devJa; int *devIdisp, *devJa;
spgpuHandle_t handle; spgpuHandle_t handle;
handle = psb_gpuGetHandle(); handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@ -648,7 +648,7 @@ int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, i
double complex *devVal; double complex *devVal;
int *devIdisp, *devJa; int *devIdisp, *devJa;
spgpuHandle_t handle; spgpuHandle_t handle;
handle = psb_gpuGetHandle(); handle = psb_cudaGetHandle();
allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@ -690,7 +690,7 @@ int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
float alpha=1.0; float alpha=1.0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if (nnz <=0) return SPGPU_SUCCESS; if (nnz <=0) return SPGPU_SUCCESS;
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
@ -711,7 +711,7 @@ int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
double alpha=1.0; double alpha=1.0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if (nnz <=0) return SPGPU_SUCCESS; if (nnz <=0) return SPGPU_SUCCESS;
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
@ -734,7 +734,7 @@ int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz,
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
cuFloatComplex alpha = make_cuFloatComplex(1.0, 0.0); cuFloatComplex alpha = make_cuFloatComplex(1.0, 0.0);
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if (nnz <=0) return SPGPU_SUCCESS; if (nnz <=0) return SPGPU_SUCCESS;
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
@ -756,7 +756,7 @@ int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz,
struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0); cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if (nnz <=0) return SPGPU_SUCCESS; if (nnz <=0) return SPGPU_SUCCESS;
//fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);

@ -264,7 +264,7 @@ int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
#ifdef VERBOSE #ifdef VERBOSE
@ -395,7 +395,7 @@ int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
#ifdef VERBOSE #ifdef VERBOSE

@ -168,7 +168,7 @@ int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
HllDevice *devMat = (HllDevice *) deviceMat; HllDevice *devMat = (HllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
#ifdef VERBOSE #ifdef VERBOSE
@ -197,7 +197,7 @@ int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
HllDevice *devMat = (HllDevice *) deviceMat; HllDevice *devMat = (HllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
#ifdef VERBOSE #ifdef VERBOSE
@ -225,7 +225,7 @@ int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device
HllDevice *devMat = (HllDevice *) deviceMat; HllDevice *devMat = (HllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha)); cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
@ -255,7 +255,7 @@ int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi
HllDevice *devMat = (HllDevice *) deviceMat; HllDevice *devMat = (HllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef HAVE_SPGPU #ifdef HAVE_SPGPU
cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha)); cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
@ -454,7 +454,7 @@ int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
//cudaSync(); //cudaSync();
handle = psb_gpuGetHandle(); handle = psb_cudaGetHandle();
psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs, (int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal, devIdisp,devJa,devVal,
@ -502,7 +502,7 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is
//fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i); //fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i);
//cudaSync(); //cudaSync();
//fprintf(stderr," hacksz: %d \n",hacksz); //fprintf(stderr," hacksz: %d \n",hacksz);
handle = psb_gpuGetHandle(); handle = psb_cudaGetHandle();
psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs, (int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal, devIdisp,devJa,devVal,
@ -545,7 +545,7 @@ int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs,
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
//cudaSync(); //cudaSync();
handle = psb_gpuGetHandle(); handle = psb_cudaGetHandle();
psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs, (int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal, devIdisp,devJa,devVal,
@ -588,7 +588,7 @@ int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs,
if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
//cudaSync(); //cudaSync();
handle = psb_gpuGetHandle(); handle = psb_cudaGetHandle();
psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
(int *) devMat->rS, (int *) devMat->hackOffs, (int *) devMat->rS, (int *) devMat->hackOffs,
devIdisp,devJa,devVal, devIdisp,devJa,devVal,

@ -90,7 +90,7 @@ int setscalMultiVecDeviceInt(int val, int first, int last,
{ int i=0; { int i=0;
int pitch = 0; int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_); spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_);
@ -105,7 +105,7 @@ int geinsMultiVecDeviceInt(int n, void* devMultiVecIrl, void* devMultiVecVal,
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecIrl->pitch_; pitch = devVecIrl->pitch_;
if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -144,7 +144,7 @@ int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n,
int i, *idx =(int *) indexes;; int i, *idx =(int *) indexes;;
int *hv = (int *) host_values;; int *hv = (int *) host_values;;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
i=0; i=0;
hv = &(hv[hfirst-indexBase]); hv = &(hv[hfirst-indexBase]);
@ -169,7 +169,7 @@ int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void
int *hv = (int *) host_values; int *hv = (int *) host_values;
int *idx=(int *) indexes; int *idx=(int *) indexes;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
idx = &(idx[first-indexBase]); idx = &(idx[first-indexBase]);
hv = &(hv[hfirst-indexBase]); hv = &(hv[hfirst-indexBase]);

@ -88,7 +88,7 @@ int setscalMultiVecDeviceFloat(float val, int first, int last,
{ int i=0; { int i=0;
int pitch = 0; int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_); spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_);
@ -103,7 +103,7 @@ int geinsMultiVecDeviceFloat(int n, void* devMultiVecIrl, void* devMultiVecVal,
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecIrl->pitch_; pitch = devVecIrl->pitch_;
if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -142,7 +142,7 @@ int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n,
int i, *idx =(int *) indexes;; int i, *idx =(int *) indexes;;
float *hv = (float *) host_values;; float *hv = (float *) host_values;;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
i=0; i=0;
hv = &(hv[hfirst-indexBase]); hv = &(hv[hfirst-indexBase]);
@ -167,7 +167,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo
float *hv = (float *) host_values; float *hv = (float *) host_values;
int *idx=(int *) indexes; int *idx=(int *) indexes;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
idx = &(idx[first-indexBase]); idx = &(idx[first-indexBase]);
hv = &(hv[hfirst-indexBase]); hv = &(hv[hfirst-indexBase]);
@ -179,7 +179,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo
int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -188,7 +188,7 @@ int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -197,7 +197,7 @@ int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -207,7 +207,7 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA) int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
// Note: inner kernel can handle aliased input/output // Note: inner kernel can handle aliased input/output
spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_, spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_,
@ -219,7 +219,7 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul
{int i=0; {int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_); spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_);
return(i); return(i);
@ -231,7 +231,7 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX,
int pitch = 0; int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_; pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ )) if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -246,7 +246,7 @@ int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVec
{ int i = 0; { int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -262,7 +262,7 @@ int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA,
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -278,7 +278,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA,
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -291,7 +291,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA,
int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA) int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA)
{ int i = 0; { int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if (n > devVecA->size_) if (n > devVecA->size_)
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;

@ -89,7 +89,7 @@ int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last,
{ int i=0; { int i=0;
int pitch = 0; int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_); spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_);
@ -104,7 +104,7 @@ int geinsMultiVecDeviceDoubleComplex(int n, void* devMultiVecIrl, void* devMulti
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecIrl->pitch_; pitch = devVecIrl->pitch_;
if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) if ((n > devVecIrl->size_) || (n>devVecVal->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -144,7 +144,7 @@ int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
int i, *idx =(int *) indexes;; int i, *idx =(int *) indexes;;
cuDoubleComplex *hv = (cuDoubleComplex *) host_values;; cuDoubleComplex *hv = (cuDoubleComplex *) host_values;;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
i=0; i=0;
hv = &(hv[hfirst-indexBase]); hv = &(hv[hfirst-indexBase]);
@ -174,7 +174,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
cuDoubleComplex *hv = (cuDoubleComplex *) host_values; cuDoubleComplex *hv = (cuDoubleComplex *) host_values;
int *idx=(int *) indexes; int *idx=(int *) indexes;
struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
idx = &(idx[first-indexBase]); idx = &(idx[first-indexBase]);
hv = &(hv[hfirst-indexBase]); hv = &(hv[hfirst-indexBase]);
@ -186,7 +186,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_); spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -195,7 +195,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_,
@ -205,7 +205,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_,
@ -216,7 +216,7 @@ int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA) int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
// Note: inner kernel can handle aliased input/output // Note: inner kernel can handle aliased input/output
spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_, spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_,
@ -228,7 +228,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti
{int i=0; {int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_, spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_,
(cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); (cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_);
@ -241,7 +241,7 @@ int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMulti
int pitch = 0; int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_; pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ )) if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -258,7 +258,7 @@ int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha,
{ int i = 0; { int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -275,7 +275,7 @@ int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceV
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -293,7 +293,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if ((n > devVecA->size_) || (n>devVecB->size_ )) if ((n > devVecA->size_) || (n>devVecB->size_ ))
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;
@ -307,7 +307,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe
int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA) int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA)
{ int i = 0; { int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
spgpuHandle_t handle=psb_gpuGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
if (n > devVecA->size_) if (n > devVecA->size_)
return SPGPU_UNSUPPORTED; return SPGPU_UNSUPPORTED;

@ -573,8 +573,8 @@ program pdgenmv
! dense matrices ! dense matrices
type(psb_d_vect_type), target :: xv, bv, xg, bg type(psb_d_vect_type), target :: xv, bv, xg, bg
#ifdef HAVE_GPU #ifdef HAVE_GPU
type(psb_d_vect_gpu) :: vmold type(psb_d_vect_cuda) :: vmold
type(psb_i_vect_gpu) :: imold type(psb_i_vect_cuda) :: imold
#endif #endif
real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:) real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:)
! blacs parameters ! blacs parameters
@ -595,14 +595,14 @@ program pdgenmv
type(psb_d_rsb_sparse_mat), target :: arsb type(psb_d_rsb_sparse_mat), target :: arsb
#endif #endif
#ifdef HAVE_GPU #ifdef HAVE_GPU
type(psb_d_elg_sparse_mat), target :: aelg type(psb_d_cuda_elg_sparse_mat), target :: aelg
type(psb_d_csrg_sparse_mat), target :: acsrg type(psb_d_cuda_csrg_sparse_mat), target :: acsrg
#if CUDA_SHORT_VERSION <= 10 #if CUDA_SHORT_VERSION <= 10
type(psb_d_hybg_sparse_mat), target :: ahybg type(psb_d_cuda_hybg_sparse_mat), target :: ahybg
#endif #endif
type(psb_d_hlg_sparse_mat), target :: ahlg type(psb_d_cuda_hlg_sparse_mat), target :: ahlg
type(psb_d_hdiag_sparse_mat), target :: ahdiag type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag
type(psb_d_dnsg_sparse_mat), target :: adnsg type(psb_d_cuda_dnsg_sparse_mat), target :: adnsg
#endif #endif
class(psb_d_base_sparse_mat), pointer :: agmold, acmold class(psb_d_base_sparse_mat), pointer :: agmold, acmold
! other variables ! other variables

Loading…
Cancel
Save