You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psblas3/cuda/spgpu/vector.h

1327 lines
48 KiB
C

#pragma once
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2012
* Davide Barbieri - University of Rome Tor Vergata
* Salvatore Filippone - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "core.h"
/** \addtogroup vecFun Vectors and sparse vectors routines
* @{
*/
#ifdef __cplusplus
extern "C" {
#endif
/**
* \fn void spgpuIgath(spgpuHandle_t handle, __device int *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device int* y)
* Integer gather from y to sparse(x). Computes the integer gather from y to xValues (using xIndices).
* \param handle the spgpu handle used to call this routine
* \param xValues the destination array for gathered values
* \param xNnz the number of elements to gather
* \param xIndices the array of indices for the elements to be gathered
* \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran).
* \param y the source vector (from which the elements will be gathered)
*/
void spgpuIgath(spgpuHandle_t handle,
__device int *xValues,
int xNnz,
const __device int *xIndices,
int xBaseIndex,
const __device int* y);
/**
* \fn void spgpuIscat(spgpuHandle_t handle, __device int* y, int xNnz, const __device int *xValues, const __device int *xIndices, int xBaseIndex, int beta)
* Integer scatter from sparse(x) to y. Computes the integer scatter from xValues to y (using xIndices).
* The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that
* y values will be multiplied with beta just for scattered values).
* \param handle the spgpu handle used to call this routine
* \param y the destination vector (to which the elements will be scattered)
* \param xNnz the number of elements to scatter
* \param xValues the source array from which the values will be read
* \param xIndices the array of indices for the elements to be scattered
* \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran).
* \param beta the beta value
*/
void spgpuIscat(spgpuHandle_t handle,
__device int* y,
int xNnz,
const __device int *xValues,
const __device int *xIndices,
int xBaseIndex, int beta);
/**
* \fn float spgpuSdot (spgpuHandle_t handle, int n, __device float* a, __device float* b)
* Computes single precision dot product of a and b vectors.
* \param handle The spgpu handle used to call this routine
* \param n the vectors length
* \param a the first input vector
* \param b the second input vector
* \return the dot product
*/
float spgpuSdot(spgpuHandle_t handle,
int n,
__device float* a,
__device float* b);
/**
* \fn float spgpuSmdot (spgpuHandle_t handle, float* y, int n, __device float* a, __device float* b, int count, int pitch)
* Computes single precision dot product of a and b multivectors.
* \param handle the spgpu handle used to call this routine
* \param y the result, made by dot products of every vector couples from the multivectors a and b
* \param n the vectors' length
* \param a the first input multivector
* \param b the second input multivector
* \param count the number of vectors in every multivector
* \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.).
*/
void spgpuSmdot(spgpuHandle_t handle,
float* y,
int n,
__device float* a,
__device float* b,
int count,
int pitch);
/**
* \fn void spgpuSabs (spgpuHandle_t handle, __device float* y, int n, float alpha, __device float* x)
* Computes single precision (y = alpha * abs(x)) for each element in x.
* \param handle The spgpu handle used to call this routine
* \param y the resulting vector (could be x)
* \param n the vectors length
* \param alpha the alpha value
* \param x the input vector
*/
void spgpuSabs(spgpuHandle_t handle,
__device float* y,
int n,
float alpha,
__device float* x);
/**
* \fn float spgpuSnrm2(spgpuHandle_t handle, int n, __device float* x)
* Computes the single precision Euclidean vector norm of x.
* \param handle the spgpu handle used to call this routine
* \param n the vector's length
* \param x the input vector
* \return the euclidean vector norm
*/
float spgpuSnrm2(spgpuHandle_t handle,
int n,
__device float* x);
/**
* \fn void spgpuSmnrm2(spgpuHandle_t handle, float *y, int n, __device float *x, int count, int pitch)
* Computes the single precision Euclidean vector norm for every vector in the multivector x.
* \param handle the spgpu handle used to call this routine
* \param y the array of results
* \param n the vectors' length in the x multivector
* \param x the input multivector
* \param count the number of vectors in x
* \param pitch the multivector's pitch
*/
void spgpuSmnrm2(spgpuHandle_t handle,
float *y,
int n,
__device float *x,
int count,
int pitch);
/**
* \fn void spgpuSscal(spgpuHandle_t handle, __device float *y, int n, float alpha, __device float *x)
* Computes the single precision y = alpha * x. y could be exactly x (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param y the resulting vector
* \param n the vectors' length
* \param alpha the alpha value
* \param x the input vector
*/
void spgpuSscal(spgpuHandle_t handle,
__device float *y,
int n,
float alpha,
__device float *x);
/**
* \fn void spgpuSaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x)
* Computes the single precision z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param y the first input vector
* \param alpha the alpha value
* \param x the second input vector
*/
void spgpuSaxpby(spgpuHandle_t handle,
__device float *z,
int n,
float beta,
__device float *y,
float alpha,
__device float* x);
void spgpuSabgdxyz(spgpuHandle_t handle,
int n,
float alpha,
float beta,
float gamma,
float delta,
__device float* x,
__device float *y,
__device float *z)
;
void spgpuSxyzw(spgpuHandle_t handle,
int n,
float a, float b,
float c, float d,
float e, float f,
__device float* x,
__device float *y,
__device float *z,
__device float *w)
;
/**
* \fn void spgpuSmaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x, int count, int pitch)
* Computes the single precision z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param y the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param count the number of vectors in z,x and y multivectors
* \param pitch the multivectors pitch
*/
void spgpuSmaxpby(spgpuHandle_t handle,
__device float *z,
int n,
float beta,
__device float *y,
float alpha,
__device float* x,
int count, int pitch);
/**
* \fn void spgpuSaxy(spgpuHandle_t handle, __device float *z, int n, float alpha, __device float *x, __device float* y)
* Computes the single precision z = alpha * x * y. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length
* \param alpha the alpha value
* \param x the first input vector
* \param y the second input vector
*/
void spgpuSaxy(spgpuHandle_t handle,
__device float *z,
int n,
float alpha,
__device float *x,
__device float *y);
/**
* \fn void spgpuSaxypbz(spgpuHandle_t handle, __device float *w, int n, float beta, __device float *z, float alpha, __device float* x, __device float *y)
* Computes the single precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param w the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param z the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param y the third input vector
*/
void spgpuSaxypbz(spgpuHandle_t handle,
__device float *w,
int n,
float beta,
__device float *z,
float alpha,
__device float* x,
__device float *y);
/**
* \fn void spgpuSmaxy(spgpuHandle_t handle, __device float *z, int n, float alpha, __device float *x, __device float* y, int count, int pitch)
* Computes the single precision z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting multivector
* \param n the vectors' length in the multivectors
* \param alpha the alpha value
* \param x the first input multivector
* \param y the second input multivector
* \param count the number of vectors in z,x and y multivectors
* \param pitch the multivectors pitch
*/
void spgpuSmaxy(spgpuHandle_t handle,
__device float *z,
int n,
float alpha,
__device float* x,
__device float *y,
int count,
int pitch);
/**
* \fn void spgpuSmaxypbz(spgpuHandle_t handle, __device float *w, int n, float beta, __device float *z, float alpha, __device float* x, __device float *y, int count, int pitch)
* Computes the single precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param w the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param z the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param y the third input vector
* \param count the number of vectors in w,z,x and y multivectors
* \param pitch the multivectors' pitch
*/
void spgpuSmaxypbz(spgpuHandle_t handle,
__device float *w,
int n,
float beta,
__device float *z,
float alpha,
__device float* x,
__device float *y,
int count,
int pitch);
/**
* \fn void spgpuSgath(spgpuHandle_t handle, __device float *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device float* y)
* Single precision gather from y to sparse(x). Computes the single precision gather from y to xValues (using xIndices).
* \param handle the spgpu handle used to call this routine
* \param xValues the destination array for gathered values
* \param xNnz the number of elements to gather
* \param xIndices the array of indices for the elements to be gathered
* \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran).
* \param y the source vector (from which the elements will be gathered)
*/
void spgpuSgath(spgpuHandle_t handle,
__device float *xValues,
int xNnz,
const __device int *xIndices,
int xBaseIndex,
const __device float* y);
/**
* \fn void spgpuSscat(spgpuHandle_t handle, __device float* y, int xNnz, const __device float *xValues, const __device int *xIndices, int xBaseIndex, float beta)
* Single precision scatter from sparse(x) to y. Computes the single precision scatter from xValues to y (using xIndices).
* The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that
* y values will be multiplied with beta just for scattered values).
* \param handle the spgpu handle used to call this routine
* \param y the destination vector (to which the elements will be scattered)
* \param xNnz the number of elements to scatter
* \param xValues the source array from which the values will be read
* \param xIndices the array of indices for the elements to be scattered
* \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran).
* \param beta the beta value
*/
void spgpuSscat(spgpuHandle_t handle,
__device float* y,
int xNnz,
const __device float *xValues,
const __device int *xIndices,
int xBaseIndex, float beta);
float spgpuSasum(spgpuHandle_t handle,
int n,
float* x);
float spgpuSamax(spgpuHandle_t handle,
int n,
float* x);
void spgpuSmasum(spgpuHandle_t handle,
float* y,
int n,
float* x,
int count,
int pitch);
void spgpuSmamax(spgpuHandle_t handle,
float* y,
int n,
float* x,
int count,
int pitch);
/**
* \fn void spgpuDscal(spgpuHandle_t handle, __device double *y, int n, double alpha, __device double *x)
* Computes the Double precision y = alpha * x. y could be exactly x (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param y the resulting vector
* \param n the vectors' length
* \param alpha the alpha value
* \param x the input vector
*/
void spgpuDscal(spgpuHandle_t handle,
__device double *y,
int n,
double alpha,
__device double *x);
/**
* \fn float spgpuDdot (spgpuHandle_t handle, int n, __device double* a, __device double* b)
* Computes double precision dot product of a and b vectors.
* \param handle The spgpu handle used to call this routine
* \param n the vectors length
* \param a the first input vector
* \param b the second input vector
* \return the dot product
*/
double spgpuDdot(spgpuHandle_t handle,
int n,
__device double* a,
__device double* b);
/**
* \fn void spgpuDabs (spgpuHandle_t handle, __device double* y, int n, double alpha, __device double* x)
* Computes double precision (y = alpha * abs(x)) for each element in x.
* \param handle The spgpu handle used to call this routine
* \param y the resulting vector (could be x)
* \param n the vectors length
* \param alpha the alpha value
* \param x the input vector
*/
void spgpuDabs(spgpuHandle_t handle,
__device double* y,
int n,
double alpha,
__device double* x);
/**
* \fn float spgpuDmdot (spgpuHandle_t handle, double* y, int n, __device double* a, __device double* b, int count, int pitch)
* Computes double precision dot product of a and b multivectors.
* \param handle the spgpu handle used to call this routine
* \param y the result, made by dot products of every vector couples from the multivectors a and b
* \param n the vectors' length
* \param a the first input multivector
* \param b the second input multivector
* \param count the number of vectors in every multivector
* \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.).
*/
void spgpuDmdot(spgpuHandle_t handle,
double* y,
int n,
__device double* a,
__device double* b,
int count,
int pitch);
/**
* \fn double spgpuDnrm2(spgpuHandle_t handle, int n, __device double* x)
* Computes the double precision Euclidean vector norm of x.
* \param handle the spgpu handle used to call this routine
* \param n the vector's length
* \param x the input vector
* \return the euclidean vector norm
*/
double spgpuDnrm2(spgpuHandle_t handle,
int n,
__device double* x);
/**
* \fn void spgpuDmnrm2(spgpuHandle_t handle, double *y, int n, __device double *x, int count, int pitch)
* Computes the double precision Euclidean vector norm for every vector in the multivector x.
* \param handle the spgpu handle used to call this routine
* \param y the array of results
* \param n the vectors' length in the x multivector
* \param x the input multivector
* \param count the number of vectors in x
* \param pitch the multivector's pitch
*/
void spgpuDmnrm2(spgpuHandle_t handle,
double *y,
int n,
__device double *x,
int count,
int pitch);
/**
* \fn void spgpuDaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, __device double *y, double alpha, __device double* x)
* Computes the double precision z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param y the first input vector
* \param alpha the alpha value
* \param x the second input vector
*/
void spgpuDaxpby(spgpuHandle_t handle,
__device double *z,
int n,
double beta,
__device double *y,
double alpha,
__device double* x);
void spgpuDabgdxyz(spgpuHandle_t handle,
int n,
double alpha,
double beta,
double gamma,
double delta,
__device double* x,
__device double *y,
__device double *z)
;
void spgpuDxyzw(spgpuHandle_t handle,
int n,
double a, double b,
double c, double d,
double e, double f,
__device double* x,
__device double *y,
__device double *z,
__device double *w)
;
/**
* \fn void spgpuDmaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, __device double *y, double alpha, __device double* x, int count, int pitch)
* Computes the double precision z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param y the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param count the number of vectors in z,x and y multivectors
* \param pitch the multivector's pitch
*/
void spgpuDmaxpby(spgpuHandle_t handle,
__device double *z,
int n,
double beta,
__device double *y,
double alpha,
__device double* x,
int count, int pitch);
/**
* \fn void spgpuDaxy(spgpuHandle_t handle, __device double *z, int n, double alpha, __device double *x, __device double* y)
* Computes the double precision z = alpha * x * y. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length in the x multivector
* \param alpha the alpha value
* \param x the second input vector
* \param y the first input vector
*/
void spgpuDaxy(spgpuHandle_t handle,
__device double *z,
int n,
double alpha,
__device double *x,
__device double *y);
/**
* \fn void spgpuDaxypbz(spgpuHandle_t handle, __device double *w, int n, double beta, __device double *z, double alpha, __device double* x, __device double *y)
* Computes the double precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param w the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param z the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param y the third input vector
*/void spgpuDaxypbz(spgpuHandle_t handle,
__device double *w,
int n,
double beta,
__device double *z,
double alpha,
__device double* x,
__device double *y);
/**
* \fn void spgpuDmaxy(spgpuHandle_t handle, __device double *z, int n, double alpha, __device double *x, __device double* y, int count, int pitch)
* Computes the double precision z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting multivector
* \param n the vectors' length in the multivectors
* \param alpha the alpha value
* \param x the first input multivector
* \param y the second input multivector
* \param count the number of vectors in z,x and y multivectors
* \param pitch the multivectors pitch
*/
void spgpuDmaxy(spgpuHandle_t handle,
__device double *z,
int n,
double alpha,
__device double* x,
__device double *y,
int count,
int pitch);
/**
* \fn void spgpuDmaxypbz(spgpuHandle_t handle, __device double *w, int n, double beta, __device double *z, double alpha, __device double* x, __device double *y, int count, int pitch)
* Computes the double precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param w the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param z the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param y the third input vector
* \param count the number of vectors in w,z,x and y multivectors
* \param pitch the multivectors' pitch
*/
void spgpuDmaxypbz(spgpuHandle_t handle,
__device double *w,
int n,
double beta,
__device double *z,
double alpha,
__device double* x,
__device double *y,
int count,
int pitch);
/**
* \fn void spgpuDgath(spgpuHandle_t handle, __device double *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device double* y)
* Double precision gather from y to sparse(x). Computes the double precision gather from y to xValues (using xIndices).
* \param handle the spgpu handle used to call this routine
* \param xValues the destination array for gathered values
* \param xNnz the number of elements to gather
* \param xIndices the array of indices for the elements to be gathered
* \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran).
* \param y the source vector (from which the elements will be gathered)
*/
void spgpuDgath(spgpuHandle_t handle,
__device double *xValues,
int xNnz,
const __device int *xIndices,
int xBaseIndex,
const __device double* y);
/**
* \fn void spgpuDscat(spgpuHandle_t handle, __device double* y, int xNnz, const __device double *xValues, const __device int *xIndices, int xBaseIndex, double beta)
* Double precision scatter from sparse(x) to y. Computes the single precision scatter from xValues to y (using xIndices).
* The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that
* y values will be multiplied with beta just for scattered values).
* \param handle the spgpu handle used to call this routine
* \param y the destination vector (to which the elements will be scattered)
* \param xNnz the number of elements to scatter
* \param xValues the source array from which the values will be read
* \param xIndices the array of indices for the elements to be scattered
* \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran).
* \param beta the beta value
*/
void spgpuDscat(spgpuHandle_t handle,
__device double* y,
int xNnz,
const __device double *xValues,
const __device int *xIndices,
int xBaseIndex, double beta);
double spgpuDasum(spgpuHandle_t handle,
int n,
double* x);
double spgpuDamax(spgpuHandle_t handle,
int n,
double* x);
void spgpuDmasum(spgpuHandle_t handle,
double* y,
int n,
double* x,
int count,
int pitch);
void spgpuDmamax(spgpuHandle_t handle,
double* y,
int n,
double* x,
int count,
int pitch);
/**
* \fn cuFloatComplex spgpuCdot (spgpuHandle_t handle, int n, __device cuFloatComplex* a, __device cuFloatComplex* b)
* Computes single precision complex dot product of a and b vectors.
* \param handle The spgpu handle used to call this routine
* \param n the vectors length
* \param a the first input vector
* \param b the second input vector
* \return the dot product
*/
cuFloatComplex spgpuCdot(spgpuHandle_t handle,
int n,
__device cuFloatComplex* a,
__device cuFloatComplex* b);
/**
* \fn cuFloatComplex spgpuCmdot (spgpuHandle_t handle, cuFloatComplex* y, int n, __device cuFloatComplex* a, __device cuFloatComplex* b, int count, int pitch)
* Computes single precision complex dot product of a and b multivectors.
* \param handle the spgpu handle used to call this routine
* \param y the result, made by dot products of every vector couples from the multivectors a and b
* \param n the vectors' length
* \param a the first input multivector
* \param b the second input multivector
* \param count the number of vectors in every multivector
* \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.).
*/
void spgpuCmdot(spgpuHandle_t handle,
cuFloatComplex* y,
int n,
__device cuFloatComplex* a,
__device cuFloatComplex* b,
int count,
int pitch);
/**
* \fn void spgpuCabs (spgpuHandle_t handle, __device float* y, int n, float alpha, __device cuFloatComplex* x)
* Computes single precision complex (y = alpha * abs(x)) for each element in x.
* \param handle The spgpu handle used to call this routine
* \param y the resulting vector (could be x)
* \param n the vectors length
* \param alpha the alpha value
* \param x the input vector
*/
void spgpuCabs(spgpuHandle_t handle,
__device cuFloatComplex* y,
int n,
cuFloatComplex alpha,
__device cuFloatComplex* x);
/**
* \fn float spgpuCnrm2(spgpuHandle_t handle, int n, __device cuFloatComplex* x)
* Computes the single precision complex Euclidean vector norm of x.
* \param handle the spgpu handle used to call this routine
* \param n the vector's length
* \param x the input vector
* \return the euclidean vector norm
*/
float spgpuCnrm2(spgpuHandle_t handle,
int n,
__device cuFloatComplex* x);
/**
* \fn void spgpuCmnrm2(spgpuHandle_t handle, float *y, int n, __device cuFloatComplex *x, int count, int pitch)
* Computes the single precision complex Euclidean vector norm for every vector in the multivector x.
* \param handle the spgpu handle used to call this routine
* \param y the array of results
* \param n the vectors' length in the x multivector
* \param x the input multivector
* \param count the number of vectors in x
* \param pitch the multivector's pitch
*/
void spgpuCmnrm2(spgpuHandle_t handle,
float *y,
int n,
__device cuFloatComplex *x,
int count,
int pitch);
/**
* \fn void spgpuCscal(spgpuHandle_t handle, __device cuFloatComplex *y, int n, cuFloatComplex alpha, __device cuFloatComplex *x)
* Computes the single precision complex y = alpha * x. y could be exactly x (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param y the resulting vector
* \param n the vectors' length
* \param alpha the alpha value
* \param x the input vector
*/
void spgpuCscal(spgpuHandle_t handle,
__device cuFloatComplex *y,
int n,
cuFloatComplex alpha,
__device cuFloatComplex *x);
/**
* \fn void spgpuCaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x)
* Computes the single precision complex z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param y the first input vector
* \param alpha the alpha value
* \param x the second input vector
*/
void spgpuCaxpby(spgpuHandle_t handle,
__device cuFloatComplex *z,
int n,
cuFloatComplex beta,
__device cuFloatComplex *y,
cuFloatComplex alpha,
__device cuFloatComplex* x);
void spgpuCabgdxyz(spgpuHandle_t handle,
int n,
cuFloatComplex alpha,
cuFloatComplex beta,
cuFloatComplex gamma,
cuFloatComplex delta,
__device cuFloatComplex* x,
__device cuFloatComplex *y,
__device cuFloatComplex *z)
;
void spgpuCxyzw(spgpuHandle_t handle,
int n,
cuFloatComplex a, cuFloatComplex b,
cuFloatComplex c, cuFloatComplex d,
cuFloatComplex e, cuFloatComplex f,
__device cuFloatComplex* x,
__device cuFloatComplex *y,
__device cuFloatComplex *z,
__device cuFloatComplex *w)
;
/**
* \fn void spgpuCmaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x, int count, int pitch)
* Computes the single precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param y the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param count the number of vectors in z,x and y multivectors
* \param pitch the multivectors pitch
*/
void spgpuCmaxpby(spgpuHandle_t handle,
__device cuFloatComplex *z,
int n,
cuFloatComplex beta,
__device cuFloatComplex *y,
cuFloatComplex alpha,
__device cuFloatComplex* x,
int count, int pitch);
/**
* \fn void spgpuCaxy(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex alpha, __device cuFloatComplex *x, __device cuFloatComplex* y)
* Computes the single precision complex z = alpha * x * y. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length
* \param alpha the alpha value
* \param x the first input vector
* \param y the second input vector
*/
void spgpuCaxy(spgpuHandle_t handle,
__device cuFloatComplex *z,
int n,
cuFloatComplex alpha,
__device cuFloatComplex *x,
__device cuFloatComplex *y);
/**
* \fn void spgpuCaxypbz(spgpuHandle_t handle, __device cuFloatComplex *w, int n, cuFloatComplex beta, __device cuFloatComplex *z, cuFloatComplex alpha, __device cuFloatComplex* x, __device cuFloatComplex *y)
* Computes the single precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param w the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param z the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param y the third input vector
*/
void spgpuCaxypbz(spgpuHandle_t handle,
__device cuFloatComplex *w,
int n,
cuFloatComplex beta,
__device cuFloatComplex *z,
cuFloatComplex alpha,
__device cuFloatComplex* x,
__device cuFloatComplex *y);
/**
* \fn void spgpuCmaxy(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex alpha, __device cuFloatComplex *x, __device cuFloatComplex* y, int count, int pitch)
* Computes the single precision complex z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting multivector
* \param n the vectors' length in the multivectors
* \param alpha the alpha value
* \param x the first input multivector
* \param y the second input multivector
* \param count the number of vectors in z,x and y multivectors
* \param pitch the multivectors pitch
*/
void spgpuCmaxy(spgpuHandle_t handle,
__device cuFloatComplex *z,
int n,
cuFloatComplex alpha,
__device cuFloatComplex* x,
__device cuFloatComplex *y,
int count,
int pitch);
/**
* \fn void spgpuCmaxypbz(spgpuHandle_t handle, __device cuFloatComplex *w, int n, cuFloatComplex beta, __device cuFloatComplex *z, cuFloatComplex alpha, __device cuFloatComplex* x, __device cuFloatComplex *y, int count, int pitch)
* Computes the single precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param w the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param z the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param y the third input vector
* \param count the number of vectors in w,z,x and y multivectors
* \param pitch the multivectors' pitch
*/
void spgpuCmaxypbz(spgpuHandle_t handle,
__device cuFloatComplex *w,
int n,
cuFloatComplex beta,
__device cuFloatComplex *z,
cuFloatComplex alpha,
__device cuFloatComplex* x,
__device cuFloatComplex *y,
int count,
int pitch);
/**
* \fn void spgpuCgath(spgpuHandle_t handle, __device cuFloatComplex *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device cuFloatComplex* y)
* Single precision complex gather from y to sparse(x). Computes the single precision complex gather from y to xValues (using xIndices).
* \param handle the spgpu handle used to call this routine
* \param xValues the destination array for gathered values
* \param xNnz the number of elements to gather
* \param xIndices the array of indices for the elements to be gathered
* \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran).
* \param y the source vector (from which the elements will be gathered)
*/
void spgpuCgath(spgpuHandle_t handle,
__device cuFloatComplex *xValues,
int xNnz,
const __device int *xIndices,
int xBaseIndex,
const __device cuFloatComplex* y);
/**
* \fn void spgpuCscat(spgpuHandle_t handle, __device cuFloatComplex* y, int xNnz, const __device cuFloatComplex *xValues, const __device int *xIndices, int xBaseIndex, cuFloatComplex beta)
* Single precision complex scatter from sparse(x) to y. Computes the single precision complex scatter from xValues to y (using xIndices).
* The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that
* y values will be multiplied with beta just for scattered values).
* \param handle the spgpu handle used to call this routine
* \param y the destination vector (to which the elements will be scattered)
* \param xNnz the number of elements to scatter
* \param xValues the source array from which the values will be read
* \param xIndices the array of indices for the elements to be scattered
* \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran).
* \param beta the beta value
*/
void spgpuCscat(spgpuHandle_t handle,
__device cuFloatComplex* y,
int xNnz,
const __device cuFloatComplex *xValues,
const __device int *xIndices,
int xBaseIndex, cuFloatComplex beta);
float spgpuCasum(spgpuHandle_t handle,
int n,
cuFloatComplex* x);
float spgpuCamax(spgpuHandle_t handle,
int n,
cuFloatComplex* x);
void spgpuCmasum(spgpuHandle_t handle,
float* y,
int n,
cuFloatComplex* x,
int count,
int pitch);
void spgpuCmamax(spgpuHandle_t handle,
float* y,
int n,
cuFloatComplex* x,
int count,
int pitch);
/**
* \fn void spgpuZscal(spgpuHandle_t handle, __device cuDoubleComplex *y, int n, cuDoubleComplex alpha, __device cuDoubleComplex *x)
* Computes the double precision complex y = alpha * x. y could be exactly x (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param y the resulting vector
* \param n the vectors' length
* \param alpha the alpha value
* \param x the input vector
*/
void spgpuZscal(spgpuHandle_t handle,
__device cuDoubleComplex *y,
int n,
cuDoubleComplex alpha,
__device cuDoubleComplex *x);
/**
* \fn cuFloatComplex spgpuZdot (spgpuHandle_t handle, int n, __device cuDoubleComplex* a, __device cuDoubleComplex* b)
* Computes double precision complex dot product of a and b vectors.
* \param handle The spgpu handle used to call this routine
* \param n the vectors length
* \param a the first input vector
* \param b the second input vector
* \return the dot product
*/
cuDoubleComplex spgpuZdot(spgpuHandle_t handle,
int n,
__device cuDoubleComplex* a,
__device cuDoubleComplex* b);
/**
* \fn cuFloatComplex spgpuZmdot (spgpuHandle_t handle, cuDoubleComplex* y, int n, __device cuDoubleComplex* a, __device cuDoubleComplex* b, int count, int pitch)
* Computes double precision complex dot product of a and b multivectors.
* \param handle the spgpu handle used to call this routine
* \param y the result, made by dot products of every vector couples from the multivectors a and b
* \param n the vectors' length
* \param a the first input multivector
* \param b the second input multivector
* \param count the number of vectors in every multivector
* \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.).
*/
void spgpuZmdot(spgpuHandle_t handle,
cuDoubleComplex* y,
int n,
__device cuDoubleComplex* a,
__device cuDoubleComplex* b,
int count,
int pitch);
/**
* \fn void spgpuZabs (spgpuHandle_t handle, __device double* y, int n, double alpha, __device cuDoubleComplex* x)
* Computes double precision complex (y = alpha * abs(x)) for each element in x.
* \param handle The spgpu handle used to call this routine
* \param y the resulting vector (could be x)
* \param n the vectors length
* \param alpha the alpha value
* \param x the input vector
*/
void spgpuZabs(spgpuHandle_t handle,
__device cuDoubleComplex* y,
int n,
cuDoubleComplex alpha,
__device cuDoubleComplex* x);
/**
* \fn double spgpuZnrm2(spgpuHandle_t handle, int n, __device cuDoubleComplex* x)
* Computes the double precision complex Euclidean vector norm of x.
* \param handle the spgpu handle used to call this routine
* \param n the vector's length
* \param x the input vector
* \return the euclidean vector norm
*/
double spgpuZnrm2(spgpuHandle_t handle,
int n,
__device cuDoubleComplex* x);
/**
* \fn void spgpuZmnrm2(spgpuHandle_t handle, double *y, int n, __device cuDoubleComplex *x, int count, int pitch)
* Computes the double precision complex Euclidean vector norm for every vector in the multivector x.
* \param handle the spgpu handle used to call this routine
* \param y the array of results
* \param n the vectors' length in the x multivector
* \param x the input multivector
* \param count the number of vectors in x
* \param pitch the multivector's pitch
*/
void spgpuZmnrm2(spgpuHandle_t handle,
double *y,
int n,
__device cuDoubleComplex *x,
int count,
int pitch);
/**
* \fn void spgpuZaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x)
* Computes the double precision complex z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param y the first input vector
* \param alpha the alpha value
* \param x the second input vector
*/
void spgpuZaxpby(spgpuHandle_t handle,
__device cuDoubleComplex *z,
int n,
cuDoubleComplex beta,
__device cuDoubleComplex *y,
cuDoubleComplex alpha,
__device cuDoubleComplex* x);
void spgpuZabgdxyz(spgpuHandle_t handle,
int n,
cuDoubleComplex alpha,
cuDoubleComplex beta,
cuDoubleComplex gamma,
cuDoubleComplex delta,
__device cuDoubleComplex* x,
__device cuDoubleComplex *y,
__device cuDoubleComplex *z)
;
void spgpuZxyzw(spgpuHandle_t handle,
int n,
cuDoubleComplex a, cuDoubleComplex b,
cuDoubleComplex c, cuDoubleComplex d,
cuDoubleComplex e, cuDoubleComplex f,
__device cuDoubleComplex* x,
__device cuDoubleComplex *y,
__device cuDoubleComplex *z,
__device cuDoubleComplex *w)
;
/**
* \fn void spgpuZmaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x, int count, int pitch)
* Computes the double precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param y the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param count the number of vectors in z,x and y multivectors
* \param pitch the multivector's pitch
*/
void spgpuZmaxpby(spgpuHandle_t handle,
__device cuDoubleComplex *z,
int n,
cuDoubleComplex beta,
__device cuDoubleComplex *y,
cuDoubleComplex alpha,
__device cuDoubleComplex* x,
int count, int pitch);
/**
* \fn void spgpuZaxy(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex alpha, __device cuDoubleComplex *x, __device cuDoubleComplex* y)
* Computes the double precision complex z = alpha * x * y. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting vector
* \param n the vectors' length in the x multivector
* \param alpha the alpha value
* \param x the second input vector
* \param y the first input vector
*/
void spgpuZaxy(spgpuHandle_t handle,
__device cuDoubleComplex *z,
int n,
cuDoubleComplex alpha,
__device cuDoubleComplex *x,
__device cuDoubleComplex *y);
/**
* \fn void spgpuZaxypbz(spgpuHandle_t handle, __device cuDoubleComplex *w, int n, cuDoubleComplex beta, __device cuDoubleComplex *z, cuDoubleComplex alpha, __device cuDoubleComplex* x, __device cuDoubleComplex *y)
* Computes the double precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param w the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param z the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param y the third input vector
*/void spgpuZaxypbz(spgpuHandle_t handle,
__device cuDoubleComplex *w,
int n,
cuDoubleComplex beta,
__device cuDoubleComplex *z,
cuDoubleComplex alpha,
__device cuDoubleComplex* x,
__device cuDoubleComplex *y);
/**
* \fn void spgpuZmaxy(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex alpha, __device cuDoubleComplex *x, __device cuDoubleComplex* y, int count, int pitch)
* Computes the double precision complex z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param z the resulting multivector
* \param n the vectors' length in the multivectors
* \param alpha the alpha value
* \param x the first input multivector
* \param y the second input multivector
* \param count the number of vectors in z,x and y multivectors
* \param pitch the multivectors pitch
*/
void spgpuZmaxy(spgpuHandle_t handle,
__device cuDoubleComplex *z,
int n,
cuDoubleComplex alpha,
__device cuDoubleComplex* x,
__device cuDoubleComplex *y,
int count,
int pitch);
/**
* \fn void spgpuZmaxypbz(spgpuHandle_t handle, __device cuDoubleComplex *w, int n, cuDoubleComplex beta, __device cuDoubleComplex *z, cuDoubleComplex alpha, __device cuDoubleComplex* x, __device cuDoubleComplex *y, int count, int pitch)
* Computes the double precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector.
* \param handle the spgpu handle used to call this routine
* \param w the resulting vector
* \param n the vectors' length
* \param beta the beta value
* \param z the first input vector
* \param alpha the alpha value
* \param x the second input vector
* \param y the third input vector
* \param count the number of vectors in w,z,x and y multivectors
* \param pitch the multivectors' pitch
*/
void spgpuZmaxypbz(spgpuHandle_t handle,
__device cuDoubleComplex *w,
int n,
cuDoubleComplex beta,
__device cuDoubleComplex *z,
cuDoubleComplex alpha,
__device cuDoubleComplex* x,
__device cuDoubleComplex *y,
int count,
int pitch);
/**
* \fn void spgpuZgath(spgpuHandle_t handle, __device cuDoubleComplex *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device cuDoubleComplex* y)
* Computes the double precision complex gather from y to xValues (using xIndices).
* \param handle the spgpu handle used to call this routine
* \param xValues the destination array for gathered values
* \param xNnz the number of elements to gather
* \param xIndices the array of indices for the elements to be gathered
* \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran).
* \param y the source vector (from which the elements will be gathered)
*/
void spgpuZgath(spgpuHandle_t handle,
__device cuDoubleComplex *xValues,
int xNnz,
const __device int *xIndices,
int xBaseIndex,
const __device cuDoubleComplex* y);
/**
* \fn void spgpuZscat(spgpuHandle_t handle, __device cuDoubleComplex* y, int xNnz, const __device cuDoubleComplex *xValues, const __device int *xIndices, int xBaseIndex, cuDoubleComplex beta)
* Computes the double precision complex scatter from xValues to y (using xIndices).
* The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that
* y values will be multiplied with beta just for scattered values).
* \param handle the spgpu handle used to call this routine
* \param y the destination vector (to which the elements will be scattered)
* \param xNnz the number of elements to scatter
* \param xValues the source array from which the values will be read
* \param xIndices the array of indices for the elements to be scattered
* \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran).
* \param beta the beta value
*/
void spgpuZscat(spgpuHandle_t handle,
__device cuDoubleComplex* y,
int xNnz,
const __device cuDoubleComplex *xValues,
const __device int *xIndices,
int xBaseIndex, cuDoubleComplex beta);
double spgpuZasum(spgpuHandle_t handle,
int n,
cuDoubleComplex* x);
double spgpuZamax(spgpuHandle_t handle,
int n,
cuDoubleComplex* x);
void spgpuZmasum(spgpuHandle_t handle,
double* y,
int n,
cuDoubleComplex* x,
int count,
int pitch);
void spgpuZmamax(spgpuHandle_t handle,
double* y,
int n,
cuDoubleComplex* x,
int count,
int pitch);
/** @}*/
void spgpuIsetscal(spgpuHandle_t handle,
int first,
int last,
int baseIndex,
int val,
__device int *y);
void spgpuSsetscal(spgpuHandle_t handle,
int first,
int last,
int baseIndex,
float val,
__device float *y);
void spgpuDsetscal(spgpuHandle_t handle,
int first,
int last,
int baseIndex,
double val,
__device double *y);
void spgpuCsetscal(spgpuHandle_t handle,
int first,
int last,
int baseIndex,
cuFloatComplex val,
__device cuFloatComplex* y);
void spgpuZsetscal(spgpuHandle_t handle,
int first,
int last,
int baseIndex,
cuDoubleComplex val,
__device cuDoubleComplex* y);
#ifdef __cplusplus
}
#endif