You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psblas3/cuda/spgpu/core.h

186 lines
6.4 KiB
C

#pragma once
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2015
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
/*! \mainpage The spGPU library documentation
*
* \section intro_sec Introduction
*
* spGPU is a set of custom matrix storages and CUDA kernels for sparse linear algebra computing on GPU. It isn't a replacement for cuBLAS/cuSPARSE that should be used for a full featured linear algebra environment on GPU.\n
* The main matrix storage used by spGPU is a GPU-friendly ELLpack format, as well as our HELL (Hacked ELLpack) format, an enhanced version of ELLpack with some interesting memory saving properties.\n
* HELL format provides a better memory storage compared to ELL (it avoids allocation inefficency provided by spikes in row sizes), while providing quite the same performances for sparse matrix-vector multiply routine..
*
* \section install_sec How to build spgpu
* \subsection linuxbuild Linux (and other unix systems)
* cd spgpu/build/cmake\n
* sh configure.sh\n
* make
* \section cr_sec Copyright
* Copyright (C) 2010 - 2015\n
* Davide Barbieri - University of Rome Tor Vergata\n
* Valeria Cardellini - University of Rome Tor Vergata\n
* Salvatore Filippone - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or\n
* modify it under the terms of the GNU General Public License\n
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,\n
* but WITHOUT ANY WARRANTY; without even the implied warranty of\n
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n
* GNU General Public License for more details.
*/
#include "driver_types.h"
#include "cuComplex.h"
/** \addtogroup coreFun Core Routines
* @{
*/
#ifdef __cplusplus
extern "C" {
#endif
/// __host pointers reference host allocations (it's just a placeholder)
#define __host
/// __device pointers reference device allocations (it's just a placeholder)
#define __device
/// The return code for synchronous functions
typedef int spgpuStatus_t;
#define SPGPU_SUCCESS 0
#define SPGPU_UNSUPPORTED 1
#define SPGPU_UNSPECIFIED 2
#define SPGPU_OUTOFMEMORY 3
/// Code to identify a primitive type
typedef int spgpuType_t;
#define SPGPU_TYPE_INT 0
#define SPGPU_TYPE_FLOAT 1
#define SPGPU_TYPE_DOUBLE 2
#define SPGPU_TYPE_COMPLEX_FLOAT 3
#define SPGPU_TYPE_COMPLEX_DOUBLE 4
/// this struct should be modified only internally by spgpu
typedef struct spgpuHandleStruct {
/// the current stream used by every calls on spgpu routines (passing this handle)
cudaStream_t currentStream;
/// the default stream, created during the handle creation.
cudaStream_t defaultStream;
/// the device associated to this handle
int device;
/// the warp size for this device
int warpSize;
/// the max threads per block count for this device
int maxThreadsPerBlock;
/// the max size for the X coordinate of the grid dimensions
int maxGridSizeX;
/// the max size for the Y coordinate of the grid dimensions
int maxGridSizeY;
/// the max size for the Z coordinate of the grid dimensions
int maxGridSizeZ;
/// Number of SM
int multiProcessorCount;
// compute capability
int capabilityMajor;
int capabilityMinor;
} SpgpuHandleStruct;
/// A spGPU handle represents a single CUDA device on your platform.
typedef const SpgpuHandleStruct* spgpuHandle_t;
/**
* \fn spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device)
* Create a spgpu context for a GPU device. Every call to spgpu routines using this
* handle will execute on the same GPU. This is re-entrant, so it will work if used by multiple host threads.
* \param pHandle outputs the handle
* \param device id of the device to be used by this context
*/
spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device);
/**
* \fn void spgpuDestroy(spgpuHandle_t pHandle)
* Destroy the spgpu context for pHandle.
* \param pHandle the handle previously created with spgpuCreate().
*/
void spgpuDestroy(spgpuHandle_t pHandle);
/**
* \fn void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream)
* Create a cuda stream according to the device of the spgpu handle.
* \param stream outputs the new stream
* \param pHandle the handle used to obtain the device id for the stream
*/
void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream);
/**
* \fn void spgpuStreamDestroy(cudaStream_t stream)
* Destroy a stream, previously created with spgpuStreamCreate().
* \param stream the stream to destroy
*/
void spgpuStreamDestroy(cudaStream_t stream);
/**
* \fn void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream)
* Change the current stream for the handle pHandle.
* \param pHandle the handle to configure.
* \param stream the stream to use for next spgpu routines call. Use 0 to reset to the default stream.
*/
void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream);
/**
* \fn cudaStram_t spgpuGetStream(spgpuHandle_t pHandle)
* Get the current stream from the handle pHandle.
* \param pHandle the handle from which get the stream.
*/
cudaStream_t spgpuGetStream(spgpuHandle_t pHandle);
/**
* \fn size_t spgpuSizeOf(spgpuType_t typeCode)
* Returns the size, in bytes, of the type denoted by typeCode (e.g. 4 for SPGPU_TYPE_FLOAT, 8 for SPGPU_TYPE_DOUBLE).
* \param typeCode outputs the handle
*/
size_t spgpuSizeOf(spgpuType_t typeCode);
/*
typedef struct {
spgpuMatrix
spgpuMatrixType_t MatrixType;
spgpuFillMode_t FillMode;
spgpuDiagType_t DiagType;
int baseIndex;
} spgpuMatrixDesc_t
*/
#define cuFloatComplex_isZero(a) (a.x == 0.0f && a.y == 0.0f)
#define cuDoubleComplex_isZero(a) (a.x == 0.0 && a.y == 0.0)
#define cuFloatComplex_isNotZero(a) (a.x != 0.0f || a.y != 0.0f)
#define cuDoubleComplex_isNotZero(a) (a.x != 0.0 || a.y != 0.0)
#ifdef __cplusplus
}
#endif
/** @}*/