From 9b713c177bf90424c634c7c29c5a773835629459 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 28 Nov 2023 19:46:20 +0100 Subject: [PATCH] Fix cuda interfaces for renaming --- cuda/cuda_util.c | 46 ++++++++++++++++++------------------- cuda/cuda_util.h | 18 +++++++-------- cuda/cvectordev.c | 28 +++++++++++----------- cuda/diagdev.c | 4 ++-- cuda/dnsdev.c | 8 +++---- cuda/dvectordev.c | 28 +++++++++++----------- cuda/elldev.c | 24 +++++++++---------- cuda/hdiagdev.c | 4 ++-- cuda/hlldev.c | 16 ++++++------- cuda/ivectordev.c | 8 +++---- cuda/svectordev.c | 28 +++++++++++----------- cuda/zvectordev.c | 28 +++++++++++----------- test/cudakern/dpdegenmv.F90 | 16 ++++++------- 13 files changed, 128 insertions(+), 128 deletions(-) diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 63c38b53..0fe4a8b7 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -37,7 +37,7 @@ static int hasUVA=-1; static struct cudaDeviceProp *prop=NULL; -static spgpuHandle_t psb_gpu_handle = NULL; +static spgpuHandle_t psb_cuda_handle = NULL; static cublasHandle_t psb_cublas_handle = NULL; @@ -228,7 +228,7 @@ int gpuInit(int dev) return SPGPU_UNSPECIFIED; } if (!psb_cublas_handle) - psb_gpuCreateCublasHandle(); + psb_cudaCreateCublasHandle(); hasUVA=getDeviceHasUVA(); return err; @@ -238,14 +238,14 @@ int gpuInit(int dev) void gpuClose() { cudaStream_t st1, st2; - if (! psb_gpu_handle) - st1=spgpuGetStream(psb_gpu_handle); + if (! psb_cuda_handle) + st1=spgpuGetStream(psb_cuda_handle); if (! psb_cublas_handle) cublasGetStream(psb_cublas_handle,&st2); - psb_gpuDestroyHandle(); + psb_cudaDestroyHandle(); if (st1 != st2) - psb_gpuDestroyCublasHandle(); + psb_cudaDestroyCublasHandle(); free(prop); prop=NULL; hasUVA=-1; @@ -391,49 +391,49 @@ void cudaReset() } -spgpuHandle_t psb_gpuGetHandle() +spgpuHandle_t psb_cudaGetHandle() { - return psb_gpu_handle; + return psb_cuda_handle; } -void psb_gpuCreateHandle() +void psb_cudaCreateHandle() { - if (!psb_gpu_handle) - spgpuCreate(&psb_gpu_handle, getDevice()); + if (!psb_cuda_handle) + spgpuCreate(&psb_cuda_handle, getDevice()); } -void psb_gpuDestroyHandle() +void psb_cudaDestroyHandle() { - if (!psb_gpu_handle) - spgpuDestroy(psb_gpu_handle); - psb_gpu_handle = NULL; + if (!psb_cuda_handle) + spgpuDestroy(psb_cuda_handle); + psb_cuda_handle = NULL; } -cudaStream_t psb_gpuGetStream() +cudaStream_t psb_cudaGetStream() { - return spgpuGetStream(psb_gpu_handle); + return spgpuGetStream(psb_cuda_handle); } -void psb_gpuSetStream(cudaStream_t stream) +void psb_cudaSetStream(cudaStream_t stream) { - spgpuSetStream(psb_gpu_handle, stream); + spgpuSetStream(psb_cuda_handle, stream); return ; } -cublasHandle_t psb_gpuGetCublasHandle() +cublasHandle_t psb_cudaGetCublasHandle() { if (!psb_cublas_handle) - psb_gpuCreateCublasHandle(); + psb_cudaCreateCublasHandle(); return psb_cublas_handle; } -void psb_gpuCreateCublasHandle() +void psb_cudaCreateCublasHandle() { if (!psb_cublas_handle) cublasCreate(&psb_cublas_handle); } -void psb_gpuDestroyCublasHandle() +void psb_cudaDestroyCublasHandle() { if (!psb_cublas_handle) cublasDestroy(psb_cublas_handle); diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h index 03c7b488..789c08f4 100644 --- a/cuda/cuda_util.h +++ b/cuda/cuda_util.h @@ -71,15 +71,15 @@ void cudaReset(); void gpuClose(); -spgpuHandle_t psb_gpuGetHandle(); -void psb_gpuCreateHandle(); -void psb_gpuDestroyHandle(); -cudaStream_t psb_gpuGetStream(); -void psb_gpuSetStream(cudaStream_t stream); - -cublasHandle_t psb_gpuGetCublasHandle(); -void psb_gpuCreateCublasHandle(); -void psb_gpuDestroyCublasHandle(); +spgpuHandle_t psb_cudaGetHandle(); +void psb_cudaCreateHandle(); +void psb_cudaDestroyHandle(); +cudaStream_t psb_cudaGetStream(); +void psb_cudaSetStream(cudaStream_t stream); + +cublasHandle_t psb_cudaGetCublasHandle(); +void psb_cudaCreateCublasHandle(); +void psb_cudaDestroyCublasHandle(); int allocateInt(void **, int); diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index db55caef..1dc16667 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -89,7 +89,7 @@ int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_); @@ -104,7 +104,7 @@ int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiV struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -144,7 +144,7 @@ int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; cuFloatComplex *hv = (cuFloatComplex *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -175,7 +175,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, cuFloatComplex *hv = (cuFloatComplex *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -187,7 +187,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_, @@ -197,7 +197,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_, @@ -207,7 +207,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_, @@ -218,7 +218,7 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_, @@ -231,7 +231,7 @@ int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_, (cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); @@ -244,7 +244,7 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -261,7 +261,7 @@ int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -279,7 +279,7 @@ int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVec struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -297,7 +297,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -311,7 +311,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/cuda/diagdev.c b/cuda/diagdev.c index 64879455..a2acf1f4 100644 --- a/cuda/diagdev.c +++ b/cuda/diagdev.c @@ -186,7 +186,7 @@ int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); #ifdef HAVE_SPGPU #ifdef VERBOSE @@ -268,7 +268,7 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); #ifdef HAVE_SPGPU #ifdef VERBOSE diff --git a/cuda/dnsdev.c b/cuda/dnsdev.c index fb4d339c..25cddc87 100644 --- a/cuda/dnsdev.c +++ b/cuda/dnsdev.c @@ -126,7 +126,7 @@ int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { @@ -157,7 +157,7 @@ int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { @@ -188,7 +188,7 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { @@ -219,7 +219,7 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index 8b020c16..eae82c1e 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -88,7 +88,7 @@ int setscalMultiVecDeviceDouble(double val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_); @@ -104,7 +104,7 @@ int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal, struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -143,7 +143,7 @@ int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; double *hv = (double *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -168,7 +168,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v double *hv = (double *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -179,7 +179,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_, @@ -189,7 +189,7 @@ int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -198,7 +198,7 @@ int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -207,7 +207,7 @@ int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -219,7 +219,7 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_); return(i); @@ -231,7 +231,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -246,7 +246,7 @@ int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceV { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -262,7 +262,7 @@ int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -278,7 +278,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -291,7 +291,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/cuda/elldev.c b/cuda/elldev.c index 8fd7aeb5..eff89efa 100644 --- a/cuda/elldev.c +++ b/cuda/elldev.c @@ -158,7 +158,7 @@ void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, i int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); for (i=0; irows+1)*sizeof(int)); //cudaSync(); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, @@ -502,7 +502,7 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is //fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i); //cudaSync(); //fprintf(stderr," hacksz: %d \n",hacksz); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, @@ -545,7 +545,7 @@ int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); //cudaSync(); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, @@ -588,7 +588,7 @@ int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); //cudaSync(); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, diff --git a/cuda/ivectordev.c b/cuda/ivectordev.c index 93636465..71d5c472 100644 --- a/cuda/ivectordev.c +++ b/cuda/ivectordev.c @@ -90,7 +90,7 @@ int setscalMultiVecDeviceInt(int val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_); @@ -105,7 +105,7 @@ int geinsMultiVecDeviceInt(int n, void* devMultiVecIrl, void* devMultiVecVal, struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -144,7 +144,7 @@ int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; int *hv = (int *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -169,7 +169,7 @@ int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void int *hv = (int *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); diff --git a/cuda/svectordev.c b/cuda/svectordev.c index d193a4d8..9a41ae1a 100644 --- a/cuda/svectordev.c +++ b/cuda/svectordev.c @@ -88,7 +88,7 @@ int setscalMultiVecDeviceFloat(float val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_); @@ -103,7 +103,7 @@ int geinsMultiVecDeviceFloat(int n, void* devMultiVecIrl, void* devMultiVecVal, struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -142,7 +142,7 @@ int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; float *hv = (float *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -167,7 +167,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo float *hv = (float *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -179,7 +179,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -188,7 +188,7 @@ int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -197,7 +197,7 @@ int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -207,7 +207,7 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_, @@ -219,7 +219,7 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_); return(i); @@ -231,7 +231,7 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -246,7 +246,7 @@ int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVec { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -262,7 +262,7 @@ int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -278,7 +278,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -291,7 +291,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index c245719f..c3671a86 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -89,7 +89,7 @@ int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_); @@ -104,7 +104,7 @@ int geinsMultiVecDeviceDoubleComplex(int n, void* devMultiVecIrl, void* devMulti struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -144,7 +144,7 @@ int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; cuDoubleComplex *hv = (cuDoubleComplex *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -174,7 +174,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, cuDoubleComplex *hv = (cuDoubleComplex *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -186,7 +186,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -195,7 +195,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, @@ -205,7 +205,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, @@ -216,7 +216,7 @@ int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_, @@ -228,7 +228,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_, (cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); @@ -241,7 +241,7 @@ int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMulti int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -258,7 +258,7 @@ int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -275,7 +275,7 @@ int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceV struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -293,7 +293,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -307,7 +307,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index b8a2ba2c..d2cc2172 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -573,8 +573,8 @@ program pdgenmv ! dense matrices type(psb_d_vect_type), target :: xv, bv, xg, bg #ifdef HAVE_GPU - type(psb_d_vect_gpu) :: vmold - type(psb_i_vect_gpu) :: imold + type(psb_d_vect_cuda) :: vmold + type(psb_i_vect_cuda) :: imold #endif real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:) ! blacs parameters @@ -595,14 +595,14 @@ program pdgenmv type(psb_d_rsb_sparse_mat), target :: arsb #endif #ifdef HAVE_GPU - type(psb_d_elg_sparse_mat), target :: aelg - type(psb_d_csrg_sparse_mat), target :: acsrg + type(psb_d_cuda_elg_sparse_mat), target :: aelg + type(psb_d_cuda_csrg_sparse_mat), target :: acsrg #if CUDA_SHORT_VERSION <= 10 - type(psb_d_hybg_sparse_mat), target :: ahybg + type(psb_d_cuda_hybg_sparse_mat), target :: ahybg #endif - type(psb_d_hlg_sparse_mat), target :: ahlg - type(psb_d_hdiag_sparse_mat), target :: ahdiag - type(psb_d_dnsg_sparse_mat), target :: adnsg + type(psb_d_cuda_hlg_sparse_mat), target :: ahlg + type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_d_cuda_dnsg_sparse_mat), target :: adnsg #endif class(psb_d_base_sparse_mat), pointer :: agmold, acmold ! other variables