Switch FOR and IF in AXPBY

nond-rep
Salvatore Filippone 10 months ago
parent f9677bc892
commit 1ba8dfc7b7

@ -33,16 +33,21 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
if (cuFloatComplex_isZero(beta))
z[id] = cuCmulf(alpha,x[id]);
else
z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id]));
if (cuFloatComplex_isZero(beta)) {
for ( ; id < n; id +=gridSize)
//if (id,n)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
z[id] = cuCmulf(alpha,x[id]);
}
} else {
for ( ; id < n; id +=gridSize)
//if (id,n)
{
z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id]));
}
}
}

@ -33,16 +33,21 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
if (beta == 0.0)
z[id] = PREC_DMUL(alpha,x[id]);
else
z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id]));
if (beta == 0.0) {
for ( ; id < n; id +=gridSize)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
z[id] = PREC_DMUL(alpha,x[id]);
}
} else {
for ( ; id < n; id +=gridSize)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id]));
}
}
}

@ -31,16 +31,21 @@ __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alp
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
if (beta == 0.0f)
z[id] = PREC_FMUL(alpha,x[id]);
else
z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id]));
if (beta == 0.0f) {
for ( ; id < n; id +=gridSize)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
z[id] = PREC_FMUL(alpha,x[id]);
}
} else {
for ( ; id < n; id +=gridSize)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id]));
}
}
}

@ -34,16 +34,21 @@ __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta,
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
unsigned int gridSize = blockDim.x * gridDim.x;
for ( ; id < n; id +=gridSize)
//if (id,n)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
if (cuDoubleComplex_isZero(beta))
z[id] = cuCmul(alpha,x[id]);
else
z[id] = cuCfma(alpha, x[id], cuCmul(beta,y[id]));
if (cuDoubleComplex_isZero(beta)) {
for ( ; id < n; id +=gridSize)
//if (id,n)
{
// Since z, x and y are accessed with the same offset by the same thread,
// and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing).
z[id] = cuCmul(alpha,x[id]);
}
} else {
for ( ; id < n; id +=gridSize)
//if (id,n)
{
z[id] = cuCfma(beta, y[id], cuCmul(alpha, x[id]));
}
}
}

Loading…
Cancel
Save