diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c
index 65d41893..b05bca55 100644
--- a/cuda/cvectordev.c
+++ b/cuda/cvectordev.c
@@ -184,7 +184,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
 }
 
 
-int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
 { int i=0;
   spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@@ -194,7 +194,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
   return(i);
 }
 
-int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
 { int i=0;
   spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@@ -204,7 +204,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
   return(i);
 }
 
-int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
 { int i=0;
   spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h
index 8c40b95d..423da33e 100644
--- a/cuda/cvectordev.h
+++ b/cuda/cvectordev.h
@@ -37,6 +37,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
+#include "vector.h"
 
 int registerMappedFloatComplex(void *, void **, int, cuFloatComplex);
 int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
@@ -63,9 +64,9 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int fi
 			      int hfirst, void* host_values, int indexBase, cuFloatComplex beta);
 
 int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA);
-int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
-int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
-int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
+int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
+int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
+int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
 int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
 
 int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);
diff --git a/cuda/dnsdev.c b/cuda/dnsdev.c
index 0a991012..3cf57976 100644
--- a/cuda/dnsdev.c
+++ b/cuda/dnsdev.c
@@ -178,12 +178,12 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a
   /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
   if (n == 1) {
     status = cublasCgemv(handle, trans, m,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,1,
-			 beta,  y->v_,1);
+			 (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
+			 (const cuComplex *) beta,  y->v_,1);
   } else {
     status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
-			 beta,  y->v_,y->pitch_);
+			 (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
+			 (const cuComplex *) beta,  y->v_,y->pitch_);
   }    
   
   if (status == CUBLAS_STATUS_SUCCESS)  
@@ -205,12 +205,12 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex
   /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
   if (n == 1) {
     status = cublasZgemv(handle, trans, m,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,1,
-			 beta,  y->v_,1);
+			 (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
+			 (const cuDoubleComplex *) beta,  y->v_,1);
   } else {
     status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
-			 beta,  y->v_,y->pitch_);
+			 (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
+			 (const cuDoubleComplex *) beta,  y->v_,y->pitch_);
   }    
   
   if (status == CUBLAS_STATUS_SUCCESS)  
diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h
index 3834c0d3..0d2d2ab3 100644
--- a/cuda/dvectordev.h
+++ b/cuda/dvectordev.h
@@ -35,6 +35,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
+#include "vector.h"
 
 int registerMappedDouble(void *, void **, int, double);
 int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);
diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h
index 12be21bd..ec7166f5 100644
--- a/cuda/fcusparse_fct.h
+++ b/cuda/fcusparse_fct.h
@@ -439,7 +439,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
   T_CSRGDeviceMat *cMat= Matrix->mat;
   cusparseFillMode_t  mode=type;
 
-  CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
+  CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
 					   CUSPARSE_SPMAT_FILL_MODE,
 					   (const void*) &mode,
 					   sizeof(cusparseFillMode_t)));
@@ -450,7 +450,7 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
 {
   T_CSRGDeviceMat *cMat= Matrix->mat;
   cusparseDiagType_t  cutype=type;
-  CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
+  CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
 					   CUSPARSE_SPMAT_DIAG_TYPE,
 					   (const void*) &cutype,
 					   sizeof(cusparseDiagType_t)));
diff --git a/cuda/ivectordev.h b/cuda/ivectordev.h
index 2db54be4..6f3a32a0 100644
--- a/cuda/ivectordev.h
+++ b/cuda/ivectordev.h
@@ -35,6 +35,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
+#include "vector.h"
 
 int registerMappedInt(void *, void **, int, int);
 int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec);
diff --git a/cuda/svectordev.h b/cuda/svectordev.h
index d5c85f78..887a7755 100644
--- a/cuda/svectordev.h
+++ b/cuda/svectordev.h
@@ -35,6 +35,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
+#include "vector.h"
 
 int registerMappedFloat(void *, void **, int, float);
 int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec);
diff --git a/cuda/vectordev.h b/cuda/vectordev.h
index df5fbd82..93cf1189 100644
--- a/cuda/vectordev.h
+++ b/cuda/vectordev.h
@@ -34,6 +34,7 @@
 #include "cuda_runtime.h"
 //#include "common.h"
 //#include "cintrf.h"
+#include "cuda_util.h"
 #include <complex.h>
 
 struct MultiVectDevice
diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c
index 3a5b0738..49741582 100644
--- a/cuda/zvectordev.c
+++ b/cuda/zvectordev.c
@@ -183,7 +183,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
 }
 
 
-int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
 { int i=0;
   spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@@ -192,7 +192,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
   return(i);
 }
 
-int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
 { int i=0;
   spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@@ -202,7 +202,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
   return(i);
 }
 
-int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
 { int i=0;
   spgpuHandle_t handle=psb_cudaGetHandle();
   struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h
index e15802f0..023c7f13 100644
--- a/cuda/zvectordev.h
+++ b/cuda/zvectordev.h
@@ -37,6 +37,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
+#include "vector.h"
 
 int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex);
 int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec);
@@ -69,9 +70,9 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
 				     int indexBase, cuDoubleComplex beta);
 
 int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA);
-int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
-int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
-int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
+int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
+int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
+int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
 int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, 
 				   void* devVecA, void* devVecB);