Switch FOR and IF in AXPBY

11 months ago · 1ba8dfc7b7
parent f9677bc892
commit 1ba8dfc7b7
4 changed files with 60 additions and 40 deletions
--- a/cuda/spgpu/kernels/caxpby.cu
+++ b/cuda/spgpu/kernels/caxpby.cu
@ -33,18 +33,23 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c
 {
 	int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
 	unsigned int gridSize = blockDim.x * gridDim.x;
 	if (cuFloatComplex_isZero(beta)) {
 	  for ( ; id < n; id +=gridSize)
 	    //if (id,n) 
 	    {
 	      // Since z, x and y are accessed with the same offset by the same thread,
 	      // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
 		if (cuFloatComplex_isZero(beta))
 	      z[id] = cuCmulf(alpha,x[id]);
-		else
+	    }
 	} else {
 	  for ( ; id < n; id +=gridSize)
 	    //if (id,n) 
 	    {
 	      z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id]));
 	    }
 	}
 }
 #if 1
 void spgpuCaxpby(spgpuHandle_t handle,
--- a/cuda/spgpu/kernels/daxpby.cu
+++ b/cuda/spgpu/kernels/daxpby.cu
@ -33,18 +33,23 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double
 {
 	int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
 	unsigned int gridSize = blockDim.x * gridDim.x;
 	if (beta == 0.0) {
 	  for ( ; id < n; id +=gridSize)
 		//if (id,n) 
 	    {
 	      // Since z, x and y are accessed with the same offset by the same thread,
 	      // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
 		if (beta == 0.0)
 	      z[id] = PREC_DMUL(alpha,x[id]);
-		else
+	    }
 	} else {
 	  for ( ; id < n; id +=gridSize)
 	    {
 	      // Since z, x and y are accessed with the same offset by the same thread,
 	      // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
 	      z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id]));
 	    }
 	}
 }
 #if 1
--- a/cuda/spgpu/kernels/saxpby.cu
+++ b/cuda/spgpu/kernels/saxpby.cu
@ -31,18 +31,23 @@ __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alp
 {
 	int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
 	unsigned int gridSize = blockDim.x * gridDim.x;
 	if (beta == 0.0f) {
 	  for ( ; id < n; id +=gridSize)
 		//if (id,n) 
 	    {
 	      // Since z, x and y are accessed with the same offset by the same thread,
 	      // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
 		if (beta == 0.0f)
 	      z[id] = PREC_FMUL(alpha,x[id]);
-		else
+	    }
 	} else {
 	  for ( ; id < n; id +=gridSize)
 	    {
 	      // Since z, x and y are accessed with the same offset by the same thread,
 	      // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
 	      z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id]));
 	    }
 	}
 }
 #if 1
--- a/cuda/spgpu/kernels/zaxpby.cu
+++ b/cuda/spgpu/kernels/zaxpby.cu
@ -34,16 +34,21 @@ __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta,
 {
 	int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
 	unsigned int gridSize = blockDim.x * gridDim.x;
 	if (cuDoubleComplex_isZero(beta)) {
 	  for ( ; id < n; id +=gridSize)
 	    //if (id,n) 
 	    {
 	      // Since z, x and y are accessed with the same offset by the same thread,
 	      // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
 		if (cuDoubleComplex_isZero(beta))
 	      z[id] = cuCmul(alpha,x[id]);
-		else
+	    }
-			z[id] = cuCfma(alpha, x[id], cuCmul(beta,y[id]));
+	} else {
 	  for ( ; id < n; id +=gridSize)
 	    //if (id,n) 
 	    {
 	      z[id] = cuCfma(beta, y[id], cuCmul(alpha, x[id]));
 	    }
 	}
 }