From 9b713c177bf90424c634c7c29c5a773835629459 Mon Sep 17 00:00:00 2001
From: sfilippone <filippone.salvatore@gmail.com>
Date: Tue, 28 Nov 2023 19:46:20 +0100
Subject: [PATCH] Fix cuda interfaces for renaming

---
 cuda/cuda_util.c            | 46 ++++++++++++++++++-------------------
 cuda/cuda_util.h            | 18 +++++++--------
 cuda/cvectordev.c           | 28 +++++++++++-----------
 cuda/diagdev.c              |  4 ++--
 cuda/dnsdev.c               |  8 +++----
 cuda/dvectordev.c           | 28 +++++++++++-----------
 cuda/elldev.c               | 24 +++++++++----------
 cuda/hdiagdev.c             |  4 ++--
 cuda/hlldev.c               | 16 ++++++-------
 cuda/ivectordev.c           |  8 +++----
 cuda/svectordev.c           | 28 +++++++++++-----------
 cuda/zvectordev.c           | 28 +++++++++++-----------
 test/cudakern/dpdegenmv.F90 | 16 ++++++-------
 13 files changed, 128 insertions(+), 128 deletions(-)

diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c
index 63c38b53..0fe4a8b7 100644
--- a/cuda/cuda_util.c
+++ b/cuda/cuda_util.c
@@ -37,7 +37,7 @@
 
 static int hasUVA=-1;
 static struct cudaDeviceProp *prop=NULL;
-static spgpuHandle_t psb_gpu_handle = NULL;
+static spgpuHandle_t psb_cuda_handle = NULL;
 static cublasHandle_t psb_cublas_handle = NULL;
 
 
@@ -228,7 +228,7 @@ int gpuInit(int dev)
     return SPGPU_UNSPECIFIED;
   }
   if (!psb_cublas_handle)
-    psb_gpuCreateCublasHandle();
+    psb_cudaCreateCublasHandle();
   hasUVA=getDeviceHasUVA();
   
   return err;
@@ -238,14 +238,14 @@ int gpuInit(int dev)
 void gpuClose()
 {
   cudaStream_t st1, st2;
-  if (! psb_gpu_handle)
-    st1=spgpuGetStream(psb_gpu_handle);
+  if (! psb_cuda_handle)
+    st1=spgpuGetStream(psb_cuda_handle);
   if (! psb_cublas_handle)
     cublasGetStream(psb_cublas_handle,&st2);
 
-  psb_gpuDestroyHandle();
+  psb_cudaDestroyHandle();
   if (st1 != st2) 
-    psb_gpuDestroyCublasHandle();
+    psb_cudaDestroyCublasHandle();
   free(prop);
   prop=NULL;
   hasUVA=-1;
@@ -391,49 +391,49 @@ void cudaReset()
 }
 
 
-spgpuHandle_t psb_gpuGetHandle()
+spgpuHandle_t psb_cudaGetHandle()
 {
-  return psb_gpu_handle;
+  return psb_cuda_handle;
 }
 
-void psb_gpuCreateHandle()
+void psb_cudaCreateHandle()
 {
-  if (!psb_gpu_handle)
-    spgpuCreate(&psb_gpu_handle, getDevice());
+  if (!psb_cuda_handle)
+    spgpuCreate(&psb_cuda_handle, getDevice());
   
 }
 
-void psb_gpuDestroyHandle()
+void psb_cudaDestroyHandle()
 {
-  if (!psb_gpu_handle)
-    spgpuDestroy(psb_gpu_handle);
-  psb_gpu_handle = NULL; 
+  if (!psb_cuda_handle)
+    spgpuDestroy(psb_cuda_handle);
+  psb_cuda_handle = NULL; 
 }
 
-cudaStream_t psb_gpuGetStream()
+cudaStream_t psb_cudaGetStream()
 {
-  return spgpuGetStream(psb_gpu_handle);
+  return spgpuGetStream(psb_cuda_handle);
 }
 
-void  psb_gpuSetStream(cudaStream_t stream)
+void  psb_cudaSetStream(cudaStream_t stream)
 {
-  spgpuSetStream(psb_gpu_handle, stream);
+  spgpuSetStream(psb_cuda_handle, stream);
   return ;
 }
 
 
 
-cublasHandle_t psb_gpuGetCublasHandle()
+cublasHandle_t psb_cudaGetCublasHandle()
 {
   if (!psb_cublas_handle)
-    psb_gpuCreateCublasHandle();
+    psb_cudaCreateCublasHandle();
   return psb_cublas_handle;
 }
-void psb_gpuCreateCublasHandle()
+void psb_cudaCreateCublasHandle()
 {  if (!psb_cublas_handle)
     cublasCreate(&psb_cublas_handle);
 }
-void psb_gpuDestroyCublasHandle()
+void psb_cudaDestroyCublasHandle()
 {
   if (!psb_cublas_handle)
     cublasDestroy(psb_cublas_handle);
diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h
index 03c7b488..789c08f4 100644
--- a/cuda/cuda_util.h
+++ b/cuda/cuda_util.h
@@ -71,15 +71,15 @@ void cudaReset();
 void gpuClose();
 
 
-spgpuHandle_t psb_gpuGetHandle(); 
-void psb_gpuCreateHandle();
-void psb_gpuDestroyHandle();
-cudaStream_t psb_gpuGetStream();
-void  psb_gpuSetStream(cudaStream_t stream);
-
-cublasHandle_t psb_gpuGetCublasHandle(); 
-void psb_gpuCreateCublasHandle();
-void psb_gpuDestroyCublasHandle();
+spgpuHandle_t psb_cudaGetHandle(); 
+void psb_cudaCreateHandle();
+void psb_cudaDestroyHandle();
+cudaStream_t psb_cudaGetStream();
+void  psb_cudaSetStream(cudaStream_t stream);
+
+cublasHandle_t psb_cudaGetCublasHandle(); 
+void psb_cudaCreateCublasHandle();
+void psb_cudaDestroyCublasHandle();
 
 
 int allocateInt(void **, int);
diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c
index db55caef..1dc16667 100644
--- a/cuda/cvectordev.c
+++ b/cuda/cvectordev.c
@@ -89,7 +89,7 @@ int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last,
 { int i=0;
   int pitch = 0;
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_);
   
@@ -104,7 +104,7 @@ int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiV
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
   struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
   struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   pitch = devVecIrl->pitch_;
   if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -144,7 +144,7 @@ int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
   int i, *idx =(int *) indexes;;
   cuFloatComplex *hv = (cuFloatComplex *) host_values;;  
   struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   
   i=0; 
   hv  = &(hv[hfirst-indexBase]);
@@ -175,7 +175,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
   cuFloatComplex *hv  = (cuFloatComplex *) host_values;
   int *idx=(int *) indexes;
   struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   idx = &(idx[first-indexBase]);
   hv  = &(hv[hfirst-indexBase]);
@@ -187,7 +187,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
 
 int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_, 
@@ -197,7 +197,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
 
 int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_, 
@@ -207,7 +207,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
 
 int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_, 
@@ -218,7 +218,7 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
 
 int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
   // Note: inner kernel can handle aliased input/output
   spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_, 
@@ -231,7 +231,7 @@ int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n,
 {int i=0;
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_, 
 	     (cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_);
@@ -244,7 +244,7 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe
   int pitch = 0;
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
   struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   pitch = devVecY->pitch_;
   if ((n > devVecY->size_) || (n>devVecX->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -261,7 +261,7 @@ int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha,
 { int i = 0; 
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   if ((n > devVecA->size_) || (n>devVecB->size_ )) 
     return SPGPU_UNSUPPORTED;
 
@@ -279,7 +279,7 @@ int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVec
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
   struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -297,7 +297,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
 
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if ((n > devVecA->size_) || (n>devVecB->size_ ))
     return SPGPU_UNSUPPORTED;
@@ -311,7 +311,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA
 int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA)
 { int i = 0; 
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   if (n > devVecA->size_)
     return SPGPU_UNSUPPORTED;
 
diff --git a/cuda/diagdev.c b/cuda/diagdev.c
index 64879455..a2acf1f4 100644
--- a/cuda/diagdev.c
+++ b/cuda/diagdev.c
@@ -186,7 +186,7 @@ int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
   struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
   struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
   struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
@@ -268,7 +268,7 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
   struct DiagDevice *devMat = (struct DiagDevice *) deviceMat;
   struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
   struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
diff --git a/cuda/dnsdev.c b/cuda/dnsdev.c
index fb4d339c..25cddc87 100644
--- a/cuda/dnsdev.c
+++ b/cuda/dnsdev.c
@@ -126,7 +126,7 @@ int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha,
   int status;
 #ifdef HAVE_SPGPU
 
-  cublasHandle_t handle=psb_gpuGetCublasHandle();
+  cublasHandle_t handle=psb_cudaGetCublasHandle();
   cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
   /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
   if (n == 1) {
@@ -157,7 +157,7 @@ int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha,
   int status;
 #ifdef HAVE_SPGPU
 
-  cublasHandle_t handle=psb_gpuGetCublasHandle();
+  cublasHandle_t handle=psb_cudaGetCublasHandle();
   cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
   /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
   if (n == 1) {
@@ -188,7 +188,7 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a
   int status;
 #ifdef HAVE_SPGPU
 
-  cublasHandle_t handle=psb_gpuGetCublasHandle();
+  cublasHandle_t handle=psb_cudaGetCublasHandle();
   cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
   /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
   if (n == 1) {
@@ -219,7 +219,7 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex
   int status;
 #ifdef HAVE_SPGPU
 
-  cublasHandle_t handle=psb_gpuGetCublasHandle();
+  cublasHandle_t handle=psb_cudaGetCublasHandle();
   cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C));
   /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
   if (n == 1) {
diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c
index 8b020c16..eae82c1e 100644
--- a/cuda/dvectordev.c
+++ b/cuda/dvectordev.c
@@ -88,7 +88,7 @@ int setscalMultiVecDeviceDouble(double val, int first, int last,
 { int i=0;
   int pitch = 0;
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_);
   
@@ -104,7 +104,7 @@ int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal,
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
   struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
   struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   pitch = devVecIrl->pitch_;
   if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -143,7 +143,7 @@ int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n,
   int i, *idx =(int *) indexes;;
   double *hv = (double *) host_values;;  
   struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   
   i=0; 
   hv  = &(hv[hfirst-indexBase]);
@@ -168,7 +168,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v
   double *hv  = (double *) host_values;
   int *idx=(int *) indexes;
   struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   idx = &(idx[first-indexBase]);
   hv  = &(hv[hfirst-indexBase]);
@@ -179,7 +179,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v
 
 int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
   // Note: inner kernel can handle aliased input/output
   spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_, 
@@ -189,7 +189,7 @@ int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA)
 
 int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@@ -198,7 +198,7 @@ int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
 
 int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@@ -207,7 +207,7 @@ int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
 
 int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@@ -219,7 +219,7 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM
 {int i=0;
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
   return(i);
@@ -231,7 +231,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
   int pitch = 0;
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
   struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   pitch = devVecY->pitch_;
   if ((n > devVecY->size_) || (n>devVecX->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -246,7 +246,7 @@ int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceV
 { int i = 0; 
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   if ((n > devVecA->size_) || (n>devVecB->size_ )) 
     return SPGPU_UNSUPPORTED;
 
@@ -262,7 +262,7 @@ int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
   struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -278,7 +278,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA,
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
 
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if ((n > devVecA->size_) || (n>devVecB->size_ ))
     return SPGPU_UNSUPPORTED;
@@ -291,7 +291,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA,
 int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA)
 { int i = 0; 
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   if (n > devVecA->size_)
     return SPGPU_UNSUPPORTED;
 
diff --git a/cuda/elldev.c b/cuda/elldev.c
index 8fd7aeb5..eff89efa 100644
--- a/cuda/elldev.c
+++ b/cuda/elldev.c
@@ -158,7 +158,7 @@ void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, i
 		 int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex)
 {
   int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   for (i=0; i<s; i++)
     {
@@ -208,7 +208,7 @@ dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, i
 	     double *x, double beta, int firstIndex)
 {
   int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   for (i=0; i<s; i++)
     {
       if (PASS_RS) {
@@ -258,7 +258,7 @@ cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y,
 	     cuFloatComplex *x, cuFloatComplex beta, int firstIndex)
 {
   int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   for (i=0; i<s; i++)
     {
       if (PASS_RS) {
@@ -300,7 +300,7 @@ zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDouble
 	     cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex)
 {
   int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   for (i=0; i<s; i++)
     {
       if (PASS_RS) {
@@ -520,7 +520,7 @@ int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm,
   float *devVal;
   int *devIdisp, *devJa;
   spgpuHandle_t handle; 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
 
   allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
   allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@@ -563,7 +563,7 @@ int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm,
   double *devVal;
   int *devIdisp, *devJa;
   spgpuHandle_t handle; 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
 
   allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
   allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@@ -605,7 +605,7 @@ int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, in
   float complex *devVal;
   int *devIdisp, *devJa;
   spgpuHandle_t handle; 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
 
   allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
   allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@@ -648,7 +648,7 @@ int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, i
   double complex *devVal;
   int *devIdisp, *devJa;
   spgpuHandle_t handle; 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
 
   allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int));
   allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int));
@@ -690,7 +690,7 @@ int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *
   struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
   struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
   float  alpha=1.0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if (nnz <=0) return SPGPU_SUCCESS; 
   //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
@@ -711,7 +711,7 @@ int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void
   struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
   struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
   double  alpha=1.0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if (nnz <=0) return SPGPU_SUCCESS; 
   //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
@@ -734,7 +734,7 @@ int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz,
   struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
   struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
   cuFloatComplex alpha =  make_cuFloatComplex(1.0, 0.0);
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if (nnz <=0) return SPGPU_SUCCESS; 
   //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
@@ -756,7 +756,7 @@ int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz,
   struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia;
   struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja;
   cuDoubleComplex alpha =  make_cuDoubleComplex(1.0, 0.0);
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if (nnz <=0) return SPGPU_SUCCESS; 
   //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt);
diff --git a/cuda/hdiagdev.c b/cuda/hdiagdev.c
index 4e640278..dd5a23cd 100644
--- a/cuda/hdiagdev.c
+++ b/cuda/hdiagdev.c
@@ -264,7 +264,7 @@ int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX,
   struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
   struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
   struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
@@ -395,7 +395,7 @@ int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX,
   struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat;
   struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
   struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
diff --git a/cuda/hlldev.c b/cuda/hlldev.c
index afff1df1..42f396dd 100644
--- a/cuda/hlldev.c
+++ b/cuda/hlldev.c
@@ -168,7 +168,7 @@ int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
   HllDevice *devMat = (HllDevice *) deviceMat;
   struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
   struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
@@ -197,7 +197,7 @@ int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
   HllDevice *devMat = (HllDevice *) deviceMat;
   struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
   struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
 #ifdef HAVE_SPGPU
 #ifdef VERBOSE
@@ -225,7 +225,7 @@ int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device
   HllDevice *devMat = (HllDevice *) deviceMat;
   struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
   struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
 #ifdef HAVE_SPGPU
   cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha));
@@ -255,7 +255,7 @@ int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi
   HllDevice *devMat = (HllDevice *) deviceMat;
   struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
   struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
 #ifdef HAVE_SPGPU
   cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha));
@@ -454,7 +454,7 @@ int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz
   if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
   //cudaSync();
 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
   psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
 			  (int *) devMat->rS, (int *) devMat->hackOffs,
 			  devIdisp,devJa,devVal,
@@ -502,7 +502,7 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is
   //fprintf(stderr,"WriteRemoteBuffer   idisp  %d\n",i);
   //cudaSync();
   //fprintf(stderr," hacksz: %d \n",hacksz);
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
   psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
 			  (int *) devMat->rS, (int *) devMat->hackOffs,
 			  devIdisp,devJa,devVal,
@@ -545,7 +545,7 @@ int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs,
   if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
   //cudaSync();
 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
   psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
 			  (int *) devMat->rS, (int *) devMat->hackOffs,
 			  devIdisp,devJa,devVal,
@@ -588,7 +588,7 @@ int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs,
   if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int));
   //cudaSync();
 
-  handle = psb_gpuGetHandle();
+  handle = psb_cudaGetHandle();
   psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz,
 			  (int *) devMat->rS, (int *) devMat->hackOffs,
 			  devIdisp,devJa,devVal,
diff --git a/cuda/ivectordev.c b/cuda/ivectordev.c
index 93636465..71d5c472 100644
--- a/cuda/ivectordev.c
+++ b/cuda/ivectordev.c
@@ -90,7 +90,7 @@ int setscalMultiVecDeviceInt(int val, int first, int last,
 { int i=0;
   int pitch = 0;
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_);
   
@@ -105,7 +105,7 @@ int geinsMultiVecDeviceInt(int n, void* devMultiVecIrl, void* devMultiVecVal,
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
   struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
   struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   pitch = devVecIrl->pitch_;
   if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -144,7 +144,7 @@ int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n,
   int i, *idx =(int *) indexes;;
   int *hv = (int *) host_values;;  
   struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   
   i=0; 
   hv  = &(hv[hfirst-indexBase]);
@@ -169,7 +169,7 @@ int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void
   int *hv  = (int *) host_values;
   int *idx=(int *) indexes;
   struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   idx = &(idx[first-indexBase]);
   hv  = &(hv[hfirst-indexBase]);
diff --git a/cuda/svectordev.c b/cuda/svectordev.c
index d193a4d8..9a41ae1a 100644
--- a/cuda/svectordev.c
+++ b/cuda/svectordev.c
@@ -88,7 +88,7 @@ int setscalMultiVecDeviceFloat(float val, int first, int last,
 { int i=0;
   int pitch = 0;
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_);
   
@@ -103,7 +103,7 @@ int geinsMultiVecDeviceFloat(int n, void* devMultiVecIrl, void* devMultiVecVal,
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
   struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
   struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   pitch = devVecIrl->pitch_;
   if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -142,7 +142,7 @@ int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n,
   int i, *idx =(int *) indexes;;
   float *hv = (float *) host_values;;  
   struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   
   i=0; 
   hv  = &(hv[hfirst-indexBase]);
@@ -167,7 +167,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo
   float *hv  = (float *) host_values;
   int *idx=(int *) indexes;
   struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   idx = &(idx[first-indexBase]);
   hv  = &(hv[hfirst-indexBase]);
@@ -179,7 +179,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo
 
 int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@@ -188,7 +188,7 @@ int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
 
 int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@@ -197,7 +197,7 @@ int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
 
 int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@@ -207,7 +207,7 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA)
 
 int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
   // Note: inner kernel can handle aliased input/output
   spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_, 
@@ -219,7 +219,7 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul
 {int i=0;
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_);
   return(i);
@@ -231,7 +231,7 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX,
   int pitch = 0;
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
   struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   pitch = devVecY->pitch_;
   if ((n > devVecY->size_) || (n>devVecX->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -246,7 +246,7 @@ int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVec
 { int i = 0; 
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   if ((n > devVecA->size_) || (n>devVecB->size_ )) 
     return SPGPU_UNSUPPORTED;
 
@@ -262,7 +262,7 @@ int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA,
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
   struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -278,7 +278,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA,
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
 
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if ((n > devVecA->size_) || (n>devVecB->size_ ))
     return SPGPU_UNSUPPORTED;
@@ -291,7 +291,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA,
 int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA)
 { int i = 0; 
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   if (n > devVecA->size_)
     return SPGPU_UNSUPPORTED;
 
diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c
index c245719f..c3671a86 100644
--- a/cuda/zvectordev.c
+++ b/cuda/zvectordev.c
@@ -89,7 +89,7 @@ int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last,
 { int i=0;
   int pitch = 0;
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_);
   
@@ -104,7 +104,7 @@ int geinsMultiVecDeviceDoubleComplex(int n, void* devMultiVecIrl, void* devMulti
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
   struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl;
   struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   pitch = devVecIrl->pitch_;
   if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -144,7 +144,7 @@ int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
   int i, *idx =(int *) indexes;;
   cuDoubleComplex *hv = (cuDoubleComplex *) host_values;;  
   struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   
   i=0; 
   hv  = &(hv[hfirst-indexBase]);
@@ -174,7 +174,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
   cuDoubleComplex *hv  = (cuDoubleComplex *) host_values;
   int *idx=(int *) indexes;
   struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   idx = &(idx[first-indexBase]);
   hv  = &(hv[hfirst-indexBase]);
@@ -186,7 +186,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
 
 int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_);
@@ -195,7 +195,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
 
 int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, 
@@ -205,7 +205,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
 
 int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
 
   spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, 
@@ -216,7 +216,7 @@ int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
 
 int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA)
 { int i=0;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
   // Note: inner kernel can handle aliased input/output
   spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_, 
@@ -228,7 +228,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti
 {int i=0;
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_,
 	     (cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_);
@@ -241,7 +241,7 @@ int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMulti
   int pitch = 0;
   struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
   struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   pitch = devVecY->pitch_;
   if ((n > devVecY->size_) || (n>devVecX->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -258,7 +258,7 @@ int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha,
 { int i = 0; 
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   if ((n > devVecA->size_) || (n>devVecB->size_ )) 
     return SPGPU_UNSUPPORTED;
 
@@ -275,7 +275,7 @@ int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceV
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
   struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) 
     return SPGPU_UNSUPPORTED;
@@ -293,7 +293,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
   struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB;
 
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
 
   if ((n > devVecA->size_) || (n>devVecB->size_ ))
     return SPGPU_UNSUPPORTED;
@@ -307,7 +307,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe
 int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA)
 { int i = 0; 
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;
-  spgpuHandle_t handle=psb_gpuGetHandle();
+  spgpuHandle_t handle=psb_cudaGetHandle();
   if (n > devVecA->size_)
     return SPGPU_UNSUPPORTED;
 
diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90
index b8a2ba2c..d2cc2172 100644
--- a/test/cudakern/dpdegenmv.F90
+++ b/test/cudakern/dpdegenmv.F90
@@ -573,8 +573,8 @@ program pdgenmv
   ! dense matrices
   type(psb_d_vect_type), target :: xv, bv, xg, bg  
 #ifdef HAVE_GPU
-  type(psb_d_vect_gpu)  :: vmold
-  type(psb_i_vect_gpu)  :: imold
+  type(psb_d_vect_cuda)  :: vmold
+  type(psb_i_vect_cuda)  :: imold 
 #endif
   real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:)
   ! blacs parameters
@@ -595,14 +595,14 @@ program pdgenmv
   type(psb_d_rsb_sparse_mat), target   :: arsb
 #endif
 #ifdef HAVE_GPU
-  type(psb_d_elg_sparse_mat), target   :: aelg
-  type(psb_d_csrg_sparse_mat), target  :: acsrg
+  type(psb_d_cuda_elg_sparse_mat), target   :: aelg
+  type(psb_d_cuda_csrg_sparse_mat), target  :: acsrg
 #if CUDA_SHORT_VERSION <= 10
-  type(psb_d_hybg_sparse_mat), target  :: ahybg
+  type(psb_d_cuda_hybg_sparse_mat), target  :: ahybg
 #endif
-  type(psb_d_hlg_sparse_mat), target   :: ahlg
-  type(psb_d_hdiag_sparse_mat), target   :: ahdiag
-  type(psb_d_dnsg_sparse_mat), target   :: adnsg
+  type(psb_d_cuda_hlg_sparse_mat), target   :: ahlg
+  type(psb_d_cuda_hdiag_sparse_mat), target   :: ahdiag
+  type(psb_d_cuda_dnsg_sparse_mat), target   :: adnsg
 #endif
   class(psb_d_base_sparse_mat), pointer :: agmold, acmold
   ! other variables