diff --git a/cuda/spgpu/kernels/caxpby.cu b/cuda/spgpu/kernels/caxpby.cu index 3e97f75f..817fdf53 100644 --- a/cuda/spgpu/kernels/caxpby.cu +++ b/cuda/spgpu/kernels/caxpby.cu @@ -78,6 +78,7 @@ void spgpuCaxpby(spgpuHandle_t handle, #else + __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -94,6 +95,7 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c } } + void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex *z, int n, @@ -103,15 +105,9 @@ void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex* x) { int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; - int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; - num_blocks_mp = max_threads_mp/BLOCK_SIZE; - num_blocks = num_blocks_mp*num_mp; - dim3 grid(num_blocks); + dim3 grid(msize); spgpuCaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu index fa87d996..e4823b34 100644 --- a/cuda/spgpu/kernels/daxpby.cu +++ b/cuda/spgpu/kernels/daxpby.cu @@ -89,6 +89,7 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double } } + void spgpuDaxpby_(spgpuHandle_t handle, __device double *z, int n, @@ -98,15 +99,9 @@ void spgpuDaxpby_(spgpuHandle_t handle, __device double* x) { int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; - int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; - num_blocks_mp = max_threads_mp/BLOCK_SIZE; - num_blocks = num_blocks_mp*num_mp; - dim3 grid(num_blocks); + dim3 grid(msize); spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } @@ -134,6 +129,7 @@ void spgpuDaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on daxpby"); } + #endif void spgpuDmaxpby(spgpuHandle_t handle, __device double *z,