Fix cuda interfaces for renaming

1 year ago · 9b713c177b
parent 6fa0bf7fe7
commit 9b713c177b
13 changed files with 128 additions and 128 deletions
--- a/cuda/cuda_util.c
+++ b/cuda/cuda_util.c
@ -37,7 +37,7 @@

 static int hasUVA=-1;
 static struct cudaDeviceProp *prop=NULL;
-static spgpuHandle_t psb_gpu_handle = NULL;
+static spgpuHandle_t psb_cuda_handle = NULL;
 static cublasHandle_t psb_cublas_handle = NULL;


@ -228,7 +228,7 @@ int gpuInit(int dev)
    return SPGPU_UNSPECIFIED;
  }
  if (!psb_cublas_handle)
-    psb_gpuCreateCublasHandle();
+    psb_cudaCreateCublasHandle();
  hasUVA=getDeviceHasUVA();
  
  return err;
@ -238,14 +238,14 @@ int gpuInit(int dev)
 void gpuClose()
 {
  cudaStream_t st1, st2;
-  if (! psb_gpu_handle)
-    st1=spgpuGetStream(psb_gpu_handle);
+  if (! psb_cuda_handle)
+    st1=spgpuGetStream(psb_cuda_handle);
  if (! psb_cublas_handle)
    cublasGetStream(psb_cublas_handle,&st2);

-  psb_gpuDestroyHandle();
+  psb_cudaDestroyHandle();
  if (st1 != st2) 
-    psb_gpuDestroyCublasHandle();
+    psb_cudaDestroyCublasHandle();
  free(prop);
  prop=NULL;
  hasUVA=-1;
@ -391,49 +391,49 @@ void cudaReset()
 }


-spgpuHandle_t psb_gpuGetHandle()
+spgpuHandle_t psb_cudaGetHandle()
 {
-  return psb_gpu_handle;
+  return psb_cuda_handle;
 }

-void psb_gpuCreateHandle()
+void psb_cudaCreateHandle()
 {
-  if (!psb_gpu_handle)
-    spgpuCreate(&psb_gpu_handle, getDevice());
+  if (!psb_cuda_handle)
+    spgpuCreate(&psb_cuda_handle, getDevice());
  
 }

-void psb_gpuDestroyHandle()
+void psb_cudaDestroyHandle()
 {
-  if (!psb_gpu_handle)
-    spgpuDestroy(psb_gpu_handle);
-  psb_gpu_handle = NULL; 
+  if (!psb_cuda_handle)
+    spgpuDestroy(psb_cuda_handle);
+  psb_cuda_handle = NULL; 
 }

-cudaStream_t psb_gpuGetStream()
+cudaStream_t psb_cudaGetStream()
 {
-  return spgpuGetStream(psb_gpu_handle);
+  return spgpuGetStream(psb_cuda_handle);
 }

-void  psb_gpuSetStream(cudaStream_t stream)
+void  psb_cudaSetStream(cudaStream_t stream)
 {
-  spgpuSetStream(psb_gpu_handle, stream);
+  spgpuSetStream(psb_cuda_handle, stream);
  return ;
 }



-cublasHandle_t psb_gpuGetCublasHandle()
+cublasHandle_t psb_cudaGetCublasHandle()
 {
  if (!psb_cublas_handle)
-    psb_gpuCreateCublasHandle();
+    psb_cudaCreateCublasHandle();
  return psb_cublas_handle;
 }
-void psb_gpuCreateCublasHandle()
+void psb_cudaCreateCublasHandle()
 {  if (!psb_cublas_handle)
    cublasCreate(&psb_cublas_handle);
 }
-void psb_gpuDestroyCublasHandle()
+void psb_cudaDestroyCublasHandle()
 {
  if (!psb_cublas_handle)
    cublasDestroy(psb_cublas_handle);
--- a/cuda/cuda_util.h
+++ b/cuda/cuda_util.h
@ -71,15 +71,15 @@ void cudaReset();
 void gpuClose();


-spgpuHandle_t psb_gpuGetHandle(); 
-void psb_gpuCreateHandle();
-void psb_gpuDestroyHandle();
-cudaStream_t psb_gpuGetStream();
-void  psb_gpuSetStream(cudaStream_t stream);
-
-cublasHandle_t psb_gpuGetCublasHandle(); 
-void psb_gpuCreateCublasHandle();
-void psb_gpuDestroyCublasHandle();
+spgpuHandle_t psb_cudaGetHandle(); 
+void psb_cudaCreateHandle();
+void psb_cudaDestroyHandle();
+cudaStream_t psb_cudaGetStream();
+void  psb_cudaSetStream(cudaStream_t stream);
+
+cublasHandle_t psb_cudaGetCublasHandle(); 
+void psb_cudaCreateCublasHandle();
+void psb_cudaDestroyCublasHandle();


 int allocateInt(void **, int);
--- a/cuda/cvectordev.c
+++ b/cuda/cvectordev.c
@ -89,7 +89,7 @@ int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last,
 { int i=0;
  int pitch = 0;
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_);
  
@ -104,7 +104,7 @@ int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiV
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
  struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
  struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  pitch = devVecIrl->pitch_;
  if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -144,7 +144,7 @@ int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
  int i, *idx =(int *) indexes;;
  cuFloatComplex *hv = (cuFloatComplex *) host_values;;  
  struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  
  i=0; 
  hv  = &(hv[hfirst-indexBase]);
@ -175,7 +175,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
  cuFloatComplex *hv  = (cuFloatComplex *) host_values;
  int *idx=(int *) indexes;
  struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  idx = &(idx[first-indexBase]);
  hv  = &(hv[hfirst-indexBase]);
@ -187,7 +187,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,

 int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_, 
@ -197,7 +197,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV

 int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_, 
@ -207,7 +207,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV

 int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_, 
@ -218,7 +218,7 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV

 int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
  // Note: inner kernel can handle aliased input/output
  spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_, 
@ -231,7 +231,7 @@ int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n,
 {int i=0;
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_, 
 	     (cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_);
@ -244,7 +244,7 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe
  int pitch = 0;
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
  struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  pitch = devVecY->pitch_;
  if ((n > devVecY->size_) || (n>devVecX->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -261,7 +261,7 @@ int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha,
 { int i = 0; 
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  if ((n > devVecA->size_) || (n>devVecB->size_ )) 
    return SPGPU_UNSUPPORTED;

@ -279,7 +279,7 @@ int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVec
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
  struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -297,7 +297,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;

-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if ((n > devVecA->size_) || (n>devVecB->size_ ))
    return SPGPU_UNSUPPORTED;
@ -311,7 +311,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA
 int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA)
 { int i = 0; 
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  if (n > devVecA->size_)
    return SPGPU_UNSUPPORTED;

--- a/cuda/diagdev.c
+++ b/cuda/diagdev.c
@ -186,7 +186,7 @@ int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
  struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
  struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
  struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
@ -268,7 +268,7 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
  struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
  struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
  struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
--- a/cuda/dnsdev.c
+++ b/cuda/dnsdev.c
@ -126,7 +126,7 @@ int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha,
  int status;
 #ifdef HAVE_SPGPU

-  cublasHandle_t handle=psb_gpuGetCublasHandle();
+  cublasHandle_t handle=psb_cudaGetCublasHandle();
  cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
  /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
  if (n == 1) {
@ -157,7 +157,7 @@ int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha,
  int status;
 #ifdef HAVE_SPGPU

-  cublasHandle_t handle=psb_gpuGetCublasHandle();
+  cublasHandle_t handle=psb_cudaGetCublasHandle();
  cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
  /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
  if (n == 1) {
@ -188,7 +188,7 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a
  int status;
 #ifdef HAVE_SPGPU

-  cublasHandle_t handle=psb_gpuGetCublasHandle();
+  cublasHandle_t handle=psb_cudaGetCublasHandle();
  cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
  /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
  if (n == 1) {
@ -219,7 +219,7 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex
  int status;
 #ifdef HAVE_SPGPU

-  cublasHandle_t handle=psb_gpuGetCublasHandle();
+  cublasHandle_t handle=psb_cudaGetCublasHandle();
  cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
  /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
  if (n == 1) {
--- a/cuda/dvectordev.c
+++ b/cuda/dvectordev.c
@ -88,7 +88,7 @@ int setscalMultiVecDeviceDouble(double val, int first, int last,
 { int i=0;
  int pitch = 0;
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_);
  
@ -104,7 +104,7 @@ int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal,
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
  struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
  struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  pitch = devVecIrl->pitch_;
  if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -143,7 +143,7 @@ int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n,
  int i, *idx =(int *) indexes;;
  double *hv = (double *) host_values;;  
  struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  
  i=0; 
  hv  = &(hv[hfirst-indexBase]);
@ -168,7 +168,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v
  double *hv  = (double *) host_values;
  int *idx=(int *) indexes;
  struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  idx = &(idx[first-indexBase]);
  hv  = &(hv[hfirst-indexBase]);
@ -179,7 +179,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v

 int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
  // Note: inner kernel can handle aliased input/output
  spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_, 
@ -189,7 +189,7 @@ int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA)

 int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -198,7 +198,7 @@ int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)

 int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -207,7 +207,7 @@ int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)

 int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -219,7 +219,7 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM
 {int i=0;
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
  return(i);
@ -231,7 +231,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
  int pitch = 0;
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
  struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  pitch = devVecY->pitch_;
  if ((n > devVecY->size_) || (n>devVecX->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -246,7 +246,7 @@ int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceV
 { int i = 0; 
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  if ((n > devVecA->size_) || (n>devVecB->size_ )) 
    return SPGPU_UNSUPPORTED;

@ -262,7 +262,7 @@ int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
  struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -278,7 +278,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA,
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;

-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if ((n > devVecA->size_) || (n>devVecB->size_ ))
    return SPGPU_UNSUPPORTED;
@ -291,7 +291,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA,
 int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA)
 { int i = 0; 
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  if (n > devVecA->size_)
    return SPGPU_UNSUPPORTED;

--- a/cuda/elldev.c
+++ b/cuda/elldev.c
@ -158,7 +158,7 @@ void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, i
 		 int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex)
 {
  int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  for (i=0; i<s; i++)
    {
@ -208,7 +208,7 @@ dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, i
 	     double *x, double beta, int firstIndex)
 {
  int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  for (i=0; i<s; i++)
    {
      if (PASS_RS) {
@ -258,7 +258,7 @@ cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y,
 	     cuFloatComplex *x, cuFloatComplex beta, int firstIndex)
 {
  int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  for (i=0; i<s; i++)
    {
      if (PASS_RS) {
@ -300,7 +300,7 @@ zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDouble
 	     cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex)
 {
  int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  for (i=0; i<s; i++)
    {
      if (PASS_RS) {
@ -520,7 +520,7 @@ int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm,
  float *devVal;
  int *devIdisp, *devJa;
  spgpuHandle_t handle; 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();

  allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
  allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@ -563,7 +563,7 @@ int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm,
  double *devVal;
  int *devIdisp, *devJa;
  spgpuHandle_t handle; 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();

  allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
  allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@ -605,7 +605,7 @@ int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, in
  float complex *devVal;
  int *devIdisp, *devJa;
  spgpuHandle_t handle; 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();

  allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
  allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@ -648,7 +648,7 @@ int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, i
  double complex *devVal;
  int *devIdisp, *devJa;
  spgpuHandle_t handle; 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();

  allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
  allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@ -690,7 +690,7 @@ int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *
  struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
  struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
  float  alpha=1.0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if (nnz <=0) return SPGPU_SUCCESS; 
  //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
@ -711,7 +711,7 @@ int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void
  struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
  struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
  double  alpha=1.0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if (nnz <=0) return SPGPU_SUCCESS; 
  //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
@ -734,7 +734,7 @@ int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz,
  struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
  struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
  cuFloatComplex alpha =  make_cuFloatComplex(1.0, 0.0);
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if (nnz <=0) return SPGPU_SUCCESS; 
  //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
@ -756,7 +756,7 @@ int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz,
  struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
  struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
  cuDoubleComplex alpha =  make_cuDoubleComplex(1.0, 0.0);
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if (nnz <=0) return SPGPU_SUCCESS; 
  //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
--- a/cuda/hdiagdev.c
+++ b/cuda/hdiagdev.c
@ -264,7 +264,7 @@ int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
  struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
  struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
  struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
@ -395,7 +395,7 @@ int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
  struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
  struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
  struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
--- a/cuda/hlldev.c
+++ b/cuda/hlldev.c
@ -168,7 +168,7 @@ int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
  HllDevice *devMat = (HllDevice *) deviceMat;
  struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
  struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
@ -197,7 +197,7 @@ int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
  HllDevice *devMat = (HllDevice *) deviceMat;
  struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
  struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
@ -225,7 +225,7 @@ int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device
  HllDevice *devMat = (HllDevice *) deviceMat;
  struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
  struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

 #ifdef HAVE_SPGPU
  cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
@ -255,7 +255,7 @@ int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi
  HllDevice *devMat = (HllDevice *) deviceMat;
  struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
  struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

 #ifdef HAVE_SPGPU
  cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
@ -454,7 +454,7 @@ int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz
  if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
  //cudaSync();

-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
  psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
 			  (int *) devMat->rS, (int *) devMat->hackOffs,
 			  devIdisp,devJa,devVal,
@ -502,7 +502,7 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is
  //fprintf(stderr,"WriteRemoteBuffer   idisp  %d\n",i);
  //cudaSync();
  //fprintf(stderr," hacksz: %d \n",hacksz);
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
  psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
 			  (int *) devMat->rS, (int *) devMat->hackOffs,
 			  devIdisp,devJa,devVal,
@ -545,7 +545,7 @@ int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs,
  if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
  //cudaSync();

-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
  psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
 			  (int *) devMat->rS, (int *) devMat->hackOffs,
 			  devIdisp,devJa,devVal,
@ -588,7 +588,7 @@ int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs,
  if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
  //cudaSync();

-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
  psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
 			  (int *) devMat->rS, (int *) devMat->hackOffs,
 			  devIdisp,devJa,devVal,
--- a/cuda/ivectordev.c
+++ b/cuda/ivectordev.c
@ -90,7 +90,7 @@ int setscalMultiVecDeviceInt(int val, int first, int last,
 { int i=0;
  int pitch = 0;
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_);
  
@ -105,7 +105,7 @@ int geinsMultiVecDeviceInt(int n, void* devMultiVecIrl, void* devMultiVecVal,
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
  struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
  struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  pitch = devVecIrl->pitch_;
  if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -144,7 +144,7 @@ int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n,
  int i, *idx =(int *) indexes;;
  int *hv = (int *) host_values;;  
  struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  
  i=0; 
  hv  = &(hv[hfirst-indexBase]);
@ -169,7 +169,7 @@ int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void
  int *hv  = (int *) host_values;
  int *idx=(int *) indexes;
  struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  idx = &(idx[first-indexBase]);
  hv  = &(hv[hfirst-indexBase]);
--- a/cuda/svectordev.c
+++ b/cuda/svectordev.c
@ -88,7 +88,7 @@ int setscalMultiVecDeviceFloat(float val, int first, int last,
 { int i=0;
  int pitch = 0;
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_);
  
@ -103,7 +103,7 @@ int geinsMultiVecDeviceFloat(int n, void* devMultiVecIrl, void* devMultiVecVal,
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
  struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
  struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  pitch = devVecIrl->pitch_;
  if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -142,7 +142,7 @@ int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n,
  int i, *idx =(int *) indexes;;
  float *hv = (float *) host_values;;  
  struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  
  i=0; 
  hv  = &(hv[hfirst-indexBase]);
@ -167,7 +167,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo
  float *hv  = (float *) host_values;
  int *idx=(int *) indexes;
  struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  idx = &(idx[first-indexBase]);
  hv  = &(hv[hfirst-indexBase]);
@ -179,7 +179,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo

 int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -188,7 +188,7 @@ int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)

 int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -197,7 +197,7 @@ int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)

 int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -207,7 +207,7 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)

 int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
  // Note: inner kernel can handle aliased input/output
  spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_, 
@ -219,7 +219,7 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul
 {int i=0;
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_);
  return(i);
@ -231,7 +231,7 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX,
  int pitch = 0;
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
  struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  pitch = devVecY->pitch_;
  if ((n > devVecY->size_) || (n>devVecX->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -246,7 +246,7 @@ int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVec
 { int i = 0; 
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  if ((n > devVecA->size_) || (n>devVecB->size_ )) 
    return SPGPU_UNSUPPORTED;

@ -262,7 +262,7 @@ int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA,
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
  struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -278,7 +278,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA,
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;

-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if ((n > devVecA->size_) || (n>devVecB->size_ ))
    return SPGPU_UNSUPPORTED;
@ -291,7 +291,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA,
 int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA)
 { int i = 0; 
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  if (n > devVecA->size_)
    return SPGPU_UNSUPPORTED;

--- a/cuda/zvectordev.c
+++ b/cuda/zvectordev.c
@ -89,7 +89,7 @@ int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last,
 { int i=0;
  int pitch = 0;
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_);
  
@ -104,7 +104,7 @@ int geinsMultiVecDeviceDoubleComplex(int n, void* devMultiVecIrl, void* devMulti
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
  struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
  struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  pitch = devVecIrl->pitch_;
  if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -144,7 +144,7 @@ int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
  int i, *idx =(int *) indexes;;
  cuDoubleComplex *hv = (cuDoubleComplex *) host_values;;  
  struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  
  i=0; 
  hv  = &(hv[hfirst-indexBase]);
@ -174,7 +174,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
  cuDoubleComplex *hv  = (cuDoubleComplex *) host_values;
  int *idx=(int *) indexes;
  struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  idx = &(idx[first-indexBase]);
  hv  = &(hv[hfirst-indexBase]);
@ -186,7 +186,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,

 int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@ -195,7 +195,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult

 int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, 
@ -205,7 +205,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult

 int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

  spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, 
@ -216,7 +216,7 @@ int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult

 int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
  // Note: inner kernel can handle aliased input/output
  spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_, 
@ -228,7 +228,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti
 {int i=0;
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_,
 	     (cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_);
@ -241,7 +241,7 @@ int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMulti
  int pitch = 0;
  struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
  struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  pitch = devVecY->pitch_;
  if ((n > devVecY->size_) || (n>devVecX->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -258,7 +258,7 @@ int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha,
 { int i = 0; 
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  if ((n > devVecA->size_) || (n>devVecB->size_ )) 
    return SPGPU_UNSUPPORTED;

@ -275,7 +275,7 @@ int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceV
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
  struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) 
    return SPGPU_UNSUPPORTED;
@ -293,7 +293,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;

-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();

  if ((n > devVecA->size_) || (n>devVecB->size_ ))
    return SPGPU_UNSUPPORTED;
@ -307,7 +307,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe
 int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA)
 { int i = 0; 
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
  if (n > devVecA->size_)
    return SPGPU_UNSUPPORTED;

--- a/test/cudakern/dpdegenmv.F90
+++ b/test/cudakern/dpdegenmv.F90
@ -573,8 +573,8 @@ program pdgenmv
  ! dense matrices
  type(psb_d_vect_type), target :: xv, bv, xg, bg  
 #ifdef HAVE_GPU
-  type(psb_d_vect_gpu)  :: vmold
-  type(psb_i_vect_gpu)  :: imold
+  type(psb_d_vect_cuda)  :: vmold
+  type(psb_i_vect_cuda)  :: imold 
 #endif
  real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:)
  ! blacs parameters
@ -595,14 +595,14 @@ program pdgenmv
  type(psb_d_rsb_sparse_mat), target   :: arsb
 #endif
 #ifdef HAVE_GPU
-  type(psb_d_elg_sparse_mat), target   :: aelg
-  type(psb_d_csrg_sparse_mat), target  :: acsrg
+  type(psb_d_cuda_elg_sparse_mat), target   :: aelg
+  type(psb_d_cuda_csrg_sparse_mat), target  :: acsrg
 #if CUDA_SHORT_VERSION <= 10
-  type(psb_d_hybg_sparse_mat), target  :: ahybg
+  type(psb_d_cuda_hybg_sparse_mat), target  :: ahybg
 #endif
-  type(psb_d_hlg_sparse_mat), target   :: ahlg
-  type(psb_d_hdiag_sparse_mat), target   :: ahdiag
-  type(psb_d_dnsg_sparse_mat), target   :: adnsg
+  type(psb_d_cuda_hlg_sparse_mat), target   :: ahlg
+  type(psb_d_cuda_hdiag_sparse_mat), target   :: ahdiag
+  type(psb_d_cuda_dnsg_sparse_mat), target   :: adnsg
 #endif
  class(psb_d_base_sparse_mat), pointer :: agmold, acmold
  ! other variables