#pragma once /* * spGPU - Sparse matrices on GPU library. * * Copyright (C) 2010 - 2013 * Davide Barbieri - University of Rome Tor Vergata * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * version 3 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include "core.h" /** \addtogroup diaFun DIA/HDIA Format * @{ */ #ifdef __cplusplus extern "C" { #endif // DIA/HDIA Compressed Matrix Format routines /// This is the pitch alignment that must be fullfilled by the coefficients and the row pointers allocations. #define DIA_PITCH_ALIGN_BYTE 128 /** * \fn spgpuShdiaspmv (spgpuHandle_t handle, float* z, const float *y, float alpha, const float* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const float *x, float beta) * Computes single precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. * \param handle The spgpu handle used to call this routine * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). * \param y The y input vector * \param alpha The alpha scalar * \param dM The stacked HDIA non zero values allocation pointer * \param offsets The stacked HDIA diagonals offsets vector * \param hackSize The constant size of every hack (must be a multiple of 32) * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector * \param rows the rows count * \param cols the columns count * \param x the x vector * \param beta the beta scalar */ void spgpuShdiaspmv (spgpuHandle_t handle, float* z, const float *y, float alpha, const float* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const float *x, float beta); /** * \fn spgpuShdiaspmm (int count, float *z, int zpitch, const float *y, int ypitch, float alpha, const __device float* dM, const __device int* offsets, int hackSize, const __device int* hackOffsets, int rows, int cols, const __device float *x, int xpitch, float beta) * Computes single precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. * \param handle The spgpu handle used to call this routine * \param count The cols count * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). * \param zpitch The pitch of the output vector * \param y The y input vector * \param ypitch The pitch of the y input vector * \param alpha The alpha scalar * \param dM The stacked HDIA non zero values allocation pointer * \param offsets The stacked HDIA diagonals offsets vector * \param hackSize The constant size of every hack (must be a multiple of 32) * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector * \param rows the rows count * \param cols the columns count * \param x the x vector * \param xpitch The pitch of the x input vector * \param beta the beta scalar */ void spgpuShdiaspmm (spgpuHandle_t handle, int count, __device float *z, int zpitch, const __device float *y, int ypitch, float alpha, const __device float* dM, const __device int* offsets, int hackSize, const __device int* hackOffsets, int rows, int cols, const __device float *x, int xpitch, float beta); /** * \fn spgpuDhdiaspmv (spgpuHandle_t handle, double* z, const double *y, double alpha, const double* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const double *x, double beta) * Computes double precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. * \param handle The spgpu handle used to call this routine * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). * \param y The y input vector * \param alpha The alpha scalar * \param dM The stacked HDIA non zero values allocation pointer * \param offsets The stacked HDIA diagonals offsets vector * \param hackSize The constant size of every hack (must be a multiple of 32) * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector * \param rows the rows count * \param cols the columns count * \param x the x vector * \param beta the beta scalar */ void spgpuDhdiaspmv (spgpuHandle_t handle, double* z, const double *y, double alpha, const double* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const double *x, double beta); /** * \fn spgpuDhdiaspmm (int count, double *z, int zpitch, const double *y, int ypitch, double alpha, const __device double* dM, const __device int* offsets, int hackSize, const __device int* hackOffsets, int rows, int cols, const __device float *x, int xpitch, double beta) * Computes single precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. * \param handle The spgpu handle used to call this routine * \param count The cols count * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). * \param zpitch The pitch of the output vector * \param y The y input vector * \param ypitch The pitch of the y input vector * \param alpha The alpha scalar * \param dM The stacked HDIA non zero values allocation pointer * \param offsets The stacked HDIA diagonals offsets vector * \param hackSize The constant size of every hack (must be a multiple of 32) * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector * \param rows the rows count * \param cols the columns count * \param x the x vector * \param xpitch The pitch of the x input vector * \param beta the beta scalar */ void spgpuDhdiaspmm (spgpuHandle_t handle, int count, __device double *z, int zpitch, const __device double *y, int ypitch, double alpha, const __device double* dM, const __device int* offsets, int hackSize, const __device int* hackOffsets, int rows, int cols, const __device double *x, int xpitch, double beta); /** * \fn spgpuChdiaspmv (spgpuHandle_t handle, cuFloatComplex* z, const cuFloatComplex *y, cuFloatComplex alpha, const cuFloatComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuFloatComplex *x, cuFloatComplex beta) * Computes single precision complex z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. * \param handle The spgpu handle used to call this routine * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). * \param y The y input vector * \param alpha The alpha scalar * \param dM The stacked HDIA non zero values allocation pointer * \param offsets The stacked HDIA diagonals offsets vector * \param hackSize The constant size of every hack (must be a multiple of 32) * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector * \param rows the rows count * \param cols the columns count * \param x the x vector * \param beta the beta scalar */ void spgpuChdiaspmv (spgpuHandle_t handle, cuFloatComplex* z, const cuFloatComplex *y, cuFloatComplex alpha, const cuFloatComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuFloatComplex *x, cuFloatComplex beta); /** * \fn spgpuChdiaspmm (int count, cuFloatComplex *z, int zpitch, const cuFloatComplex *y, int ypitch, cuFloatComplex alpha, const __device cuFloatComplex* dM, const __device int* offsets, int hackSize, const __device int* hackOffsets, int rows, int cols, const __device cuFloatComplex *x, int xpitch, cuFloatComplex beta) * Computes single precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. * \param handle The spgpu handle used to call this routine * \param count The cols count * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). * \param zpitch The pitch of the output vector * \param y The y input vector * \param ypitch The pitch of the y input vector * \param alpha The alpha scalar * \param dM The stacked HDIA non zero values allocation pointer * \param offsets The stacked HDIA diagonals offsets vector * \param hackSize The constant size of every hack (must be a multiple of 32) * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector * \param rows the rows count * \param cols the columns count * \param x the x vector * \param xpitch The pitch of the x input vector * \param beta the beta scalar */ void spgpuChdiaspmm (spgpuHandle_t handle, int count, __device cuFloatComplex *z, int zpitch, const __device cuFloatComplex *y, int ypitch, cuFloatComplex alpha, const __device cuFloatComplex* dM, const __device int* offsets, int hackSize, const __device int* hackOffsets, int rows, int cols, const __device cuFloatComplex *x, int xpitch, cuFloatComplex beta); /** * \fn spgpuZhdiaspmv (spgpuHandle_t handle, cuDoubleComplex* z, const cuDoubleComplex *y, cuDoubleComplex alpha, const cuDoubleComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuDoubleComplex *x, cuDoubleComplex beta) * Computes double precision complex z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. * \param handle The spgpu handle used to call this routine * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). * \param y The y input vector * \param alpha The alpha scalar * \param dM The stacked HDIA non zero values allocation pointer * \param offsets The stacked HDIA diagonals offsets vector * \param hackSize The constant size of every hack (must be a multiple of 32) * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector * \param rows the rows count * \param cols the columns count * \param x the x vector * \param beta the beta scalar */ void spgpuZhdiaspmv (spgpuHandle_t handle, cuDoubleComplex* z, const cuDoubleComplex *y, cuDoubleComplex alpha, const cuDoubleComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuDoubleComplex *x, cuDoubleComplex beta); /** * \fn spgpuZhdiaspmm (int count, cuDoubleComplex *z, int zpitch, const cuDoubleComplex *y, int ypitch, cuDoubleComplex alpha, const __device cuDoubleComplex* dM, const __device int* offsets, int hackSize, const __device int* hackOffsets, int rows, int cols, const __device cuDoubleComplex *x, int xpitch, cuDoubleComplex beta) * Computes single precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. * \param handle The spgpu handle used to call this routine * \param count The cols count * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). * \param zpitch The pitch of the output vector * \param y The y input vector * \param ypitch The pitch of the y input vector * \param alpha The alpha scalar * \param dM The stacked HDIA non zero values allocation pointer * \param offsets The stacked HDIA diagonals offsets vector * \param hackSize The constant size of every hack (must be a multiple of 32) * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector * \param rows the rows count * \param cols the columns count * \param x the x vector * \param xpitch The pitch of the x input vector * \param beta the beta scalar */ void spgpuZhdiaspmm (spgpuHandle_t handle, int count, __device cuDoubleComplex *z, int zpitch, const __device cuDoubleComplex *y, int ypitch, cuDoubleComplex alpha, const __device cuDoubleComplex* dM, const __device int* offsets, int hackSize, const __device int* hackOffsets, int rows, int cols, const __device cuDoubleComplex *x, int xpitch, cuDoubleComplex beta); /** @}*/ #ifdef __cplusplus } #endif