You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
800 lines
19 KiB
C
800 lines
19 KiB
C
/* Parallel Sparse BLAS GPU plugin */
|
|
/* (C) Copyright 2013 */
|
|
|
|
/* Salvatore Filippone */
|
|
/* Alessandro Fanfarillo */
|
|
|
|
/* Redistribution and use in source and binary forms, with or without */
|
|
/* modification, are permitted provided that the following conditions */
|
|
/* are met: */
|
|
/* 1. Redistributions of source code must retain the above copyright */
|
|
/* notice, this list of conditions and the following disclaimer. */
|
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
|
/* notice, this list of conditions, and the following disclaimer in the */
|
|
/* documentation and/or other materials provided with the distribution. */
|
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
|
/* not be used to endorse or promote products derived from this */
|
|
/* software without specific written permission. */
|
|
|
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
|
|
|
|
|
|
#include "cuda_util.h"
|
|
|
|
|
|
static int hasUVA=-1;
|
|
static struct cudaDeviceProp *prop=NULL;
|
|
static spgpuHandle_t psb_cuda_handle = NULL;
|
|
static cublasHandle_t psb_cublas_handle = NULL;
|
|
|
|
|
|
int allocRemoteBuffer(void** buffer, int count)
|
|
{
|
|
cudaError_t err = cudaMalloc(buffer, count);
|
|
if (err == cudaSuccess)
|
|
{
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr,"CUDA allocRemoteBuffer for %d bytes Error: %s \n",
|
|
count, cudaGetErrorString(err));
|
|
if(err == cudaErrorMemoryAllocation)
|
|
return SPGPU_OUTOFMEMORY;
|
|
else
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
int hostRegisterMapped(void *pointer, long size)
|
|
{
|
|
cudaError_t err = cudaHostRegister(pointer, size, cudaHostRegisterMapped);
|
|
|
|
if (err == cudaSuccess)
|
|
{
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr,"CUDA hostRegisterMapped Error: %s\n", cudaGetErrorString(err));
|
|
if(err == cudaErrorMemoryAllocation)
|
|
return SPGPU_OUTOFMEMORY;
|
|
else
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
int getDevicePointer(void **d_p, void * h_p)
|
|
{
|
|
cudaError_t err = cudaHostGetDevicePointer(d_p,h_p,0);
|
|
|
|
if (err == cudaSuccess)
|
|
{
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr,"CUDA getDevicePointer Error: %s\n", cudaGetErrorString(err));
|
|
if(err == cudaErrorMemoryAllocation)
|
|
return SPGPU_OUTOFMEMORY;
|
|
else
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
int registerMappedMemory(void *buffer, void **dp, int size)
|
|
{
|
|
//cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
|
|
cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterMapped);
|
|
if (err == cudaSuccess) err = cudaHostGetDevicePointer(dp,buffer,0);
|
|
|
|
if (err == cudaSuccess)
|
|
{
|
|
err = cudaHostGetDevicePointer(dp,buffer,0);
|
|
if (err == cudaSuccess)
|
|
{
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err));
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err));
|
|
if(err == cudaErrorMemoryAllocation)
|
|
return SPGPU_OUTOFMEMORY;
|
|
else
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
int allocMappedMemory(void **buffer, void **dp, int size)
|
|
{
|
|
cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
|
|
if (err == 0) err = cudaHostGetDevicePointer(dp,*buffer,0);
|
|
|
|
if (err == cudaSuccess)
|
|
{
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr,"CUDA allocMappedMemory Error: %s\n", cudaGetErrorString(err));
|
|
if(err == cudaErrorMemoryAllocation)
|
|
return SPGPU_OUTOFMEMORY;
|
|
else
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
int unregisterMappedMemory(void *buffer)
|
|
{
|
|
//cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped);
|
|
cudaError_t err = cudaHostUnregister(buffer);
|
|
|
|
if (err == cudaSuccess)
|
|
{
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr,"CUDA unregisterMappedMemory Error: %s\n", cudaGetErrorString(err));
|
|
if(err == cudaErrorMemoryAllocation)
|
|
return SPGPU_OUTOFMEMORY;
|
|
else
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
int writeRemoteBuffer(void* hostSrc, void* buffer, int count)
|
|
{
|
|
cudaError_t err = cudaMemcpy(buffer, hostSrc, count, cudaMemcpyHostToDevice);
|
|
|
|
if (err == cudaSuccess)
|
|
return SPGPU_SUCCESS;
|
|
else {
|
|
fprintf(stderr,"CUDA Error writeRemoteBuffer: %s %p %p %d\n",
|
|
cudaGetErrorString(err),buffer, hostSrc, count);
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
int readRemoteBuffer(void* hostDest, void* buffer, int count)
|
|
{
|
|
|
|
|
|
cudaError_t err1;
|
|
cudaError_t err;
|
|
#if 0
|
|
{
|
|
err1 =cudaGetLastError();
|
|
fprintf(stderr,"CUDA Error prior to readRemoteBuffer: %s %d\n",
|
|
cudaGetErrorString(err1),err1);
|
|
}
|
|
|
|
#endif
|
|
err = cudaMemcpy(hostDest, buffer, count, cudaMemcpyDeviceToHost);
|
|
|
|
if (err == cudaSuccess)
|
|
return SPGPU_SUCCESS;
|
|
else {
|
|
fprintf(stderr,"CUDA Error readRemoteBuffer: %s %p %p %d %d\n",
|
|
cudaGetErrorString(err),hostDest,buffer,count,err);
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
int freeRemoteBuffer(void* buffer)
|
|
{
|
|
cudaError_t err = cudaFree(buffer);
|
|
if (err == cudaSuccess)
|
|
return SPGPU_SUCCESS;
|
|
else {
|
|
fprintf(stderr,"CUDA Error freeRemoteBuffer: %s %p\n", cudaGetErrorString(err),buffer);
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
int gpuInit(int dev)
|
|
{
|
|
|
|
int count,err;
|
|
|
|
if ((err=cudaSetDeviceFlags(cudaDeviceMapHost))!=cudaSuccess)
|
|
fprintf(stderr,"Error On SetDeviceFlags: %d '%s'\n",err,cudaGetErrorString(err));
|
|
if ((prop=(struct cudaDeviceProp *) malloc(sizeof(struct cudaDeviceProp)))==NULL) {
|
|
fprintf(stderr,"CUDA Error gpuInit3: not malloced prop\n");
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
err = setDevice(dev);
|
|
if (err != cudaSuccess) {
|
|
fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err));
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
if (!psb_cublas_handle)
|
|
psb_cudaCreateCublasHandle();
|
|
hasUVA=getDeviceHasUVA();
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
void gpuClose()
|
|
{
|
|
cudaStream_t st1, st2;
|
|
if (! psb_cuda_handle)
|
|
st1=spgpuGetStream(psb_cuda_handle);
|
|
if (! psb_cublas_handle)
|
|
cublasGetStream(psb_cublas_handle,&st2);
|
|
|
|
psb_cudaDestroyHandle();
|
|
if (st1 != st2)
|
|
psb_cudaDestroyCublasHandle();
|
|
free(prop);
|
|
prop=NULL;
|
|
hasUVA=-1;
|
|
}
|
|
|
|
|
|
int setDevice(int dev)
|
|
{
|
|
int count,err,idev;
|
|
|
|
err = cudaGetDeviceCount(&count);
|
|
if (err != cudaSuccess) {
|
|
fprintf(stderr,"CUDA Error setDevice: %s\n", cudaGetErrorString(err));
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
|
|
if ((0<=dev)&&(dev<count))
|
|
idev = dev;
|
|
else
|
|
idev = 0;
|
|
err = cudaSetDevice(idev);
|
|
if (err != cudaSuccess) {
|
|
fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err));
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
err = cudaGetDeviceProperties(prop,idev);
|
|
if (err != cudaSuccess) {
|
|
fprintf(stderr,"CUDA Error gpuInit4: %s\n", cudaGetErrorString(err));
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
|
|
return SPGPU_SUCCESS;
|
|
}
|
|
|
|
int getDevice()
|
|
{ int count;
|
|
|
|
cudaGetDevice(&count);
|
|
return(count);
|
|
}
|
|
|
|
int getDeviceHasUVA()
|
|
{ int count=0;
|
|
if (prop!=NULL)
|
|
count = prop->unifiedAddressing;
|
|
return(count);
|
|
}
|
|
|
|
int getGPUMultiProcessors()
|
|
{ int count=0;
|
|
if (prop!=NULL)
|
|
count = prop->multiProcessorCount;
|
|
return(count);
|
|
}
|
|
|
|
|
|
int getGPUMemoryBusWidth()
|
|
{ int count=0;
|
|
#if CUDART_VERSION >= 5000
|
|
if (prop!=NULL)
|
|
count = prop->memoryBusWidth;
|
|
#endif
|
|
return(count);
|
|
}
|
|
int getGPUMemoryClockRate()
|
|
{ int count=0;
|
|
#if CUDART_VERSION >= 5000
|
|
if (prop!=NULL)
|
|
count = prop->memoryClockRate;
|
|
#endif
|
|
return(count);
|
|
}
|
|
int getGPUWarpSize()
|
|
{ int count=0;
|
|
if (prop!=NULL)
|
|
count = prop->warpSize;
|
|
return(count);
|
|
}
|
|
int getGPUMaxThreadsPerBlock()
|
|
{ int count=0;
|
|
if (prop!=NULL)
|
|
count = prop->maxThreadsPerBlock;
|
|
return(count);
|
|
}
|
|
int getGPUMaxThreadsPerMP()
|
|
{ int count=0;
|
|
if (prop!=NULL)
|
|
count = prop->maxThreadsPerMultiProcessor;
|
|
return(count);
|
|
}
|
|
int getGPUMaxRegistersPerBlock()
|
|
{ int count=0;
|
|
if (prop!=NULL)
|
|
count = prop->regsPerBlock;
|
|
return(count);
|
|
}
|
|
|
|
void cpyGPUNameString(char *cstring)
|
|
{
|
|
*cstring='\0';
|
|
if (prop!=NULL)
|
|
strcpy(cstring,prop->name);
|
|
|
|
}
|
|
|
|
int DeviceHasUVA()
|
|
{
|
|
return(hasUVA == 1);
|
|
}
|
|
|
|
|
|
int getDeviceCount()
|
|
{ int count;
|
|
cudaError_t err;
|
|
err = cudaGetDeviceCount(&count);
|
|
if (err != cudaSuccess) {
|
|
fprintf(stderr,"CUDA Error getDeviceCount: %s\n", cudaGetErrorString(err));
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
return(count);
|
|
}
|
|
|
|
void cudaSync()
|
|
{
|
|
cudaError_t err;
|
|
err = cudaDeviceSynchronize();
|
|
if (err == cudaSuccess)
|
|
return SPGPU_SUCCESS;
|
|
else {
|
|
fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err));
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
void cudaReset()
|
|
{
|
|
cudaError_t err;
|
|
err = cudaDeviceReset();
|
|
if (err != cudaSuccess) {
|
|
fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err));
|
|
return SPGPU_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
|
|
spgpuHandle_t psb_cudaGetHandle()
|
|
{
|
|
return psb_cuda_handle;
|
|
}
|
|
|
|
void psb_cudaCreateHandle()
|
|
{
|
|
if (!psb_cuda_handle)
|
|
spgpuCreate(&psb_cuda_handle, getDevice());
|
|
|
|
}
|
|
|
|
void psb_cudaDestroyHandle()
|
|
{
|
|
if (!psb_cuda_handle)
|
|
spgpuDestroy(psb_cuda_handle);
|
|
psb_cuda_handle = NULL;
|
|
}
|
|
|
|
cudaStream_t psb_cudaGetStream()
|
|
{
|
|
return spgpuGetStream(psb_cuda_handle);
|
|
}
|
|
|
|
void psb_cudaSetStream(cudaStream_t stream)
|
|
{
|
|
spgpuSetStream(psb_cuda_handle, stream);
|
|
return ;
|
|
}
|
|
|
|
|
|
|
|
cublasHandle_t psb_cudaGetCublasHandle()
|
|
{
|
|
if (!psb_cublas_handle)
|
|
psb_cudaCreateCublasHandle();
|
|
return psb_cublas_handle;
|
|
}
|
|
void psb_cudaCreateCublasHandle()
|
|
{ if (!psb_cublas_handle)
|
|
cublasCreate(&psb_cublas_handle);
|
|
}
|
|
void psb_cudaDestroyCublasHandle()
|
|
{
|
|
if (!psb_cublas_handle)
|
|
cublasDestroy(psb_cublas_handle);
|
|
psb_cublas_handle=NULL;
|
|
}
|
|
|
|
/* Simple memory tools */
|
|
|
|
int allocateInt(void **d_int, int n)
|
|
{
|
|
return allocRemoteBuffer((void **)(d_int), n*sizeof(int));
|
|
}
|
|
|
|
int writeInt(void *d_int, int* h_int, int n)
|
|
{
|
|
int i,j;
|
|
int *di;
|
|
i = writeRemoteBuffer((void*)h_int, (void*)d_int, n*sizeof(int));
|
|
return i;
|
|
}
|
|
|
|
int readInt(void* d_int, int* h_int, int n)
|
|
{ int i;
|
|
i = readRemoteBuffer((void *) h_int, (void *) d_int, n*sizeof(int));
|
|
//cudaSync();
|
|
return(i);
|
|
}
|
|
|
|
int writeIntFirst(int first, void *d_int, int* h_int, int n, int IndexBase)
|
|
{
|
|
int i,j;
|
|
int *di=(int *) d_int;
|
|
di = &(di[first-IndexBase]);
|
|
i = writeRemoteBuffer((void*)h_int, (void*)di, n*sizeof(int));
|
|
return i;
|
|
}
|
|
|
|
int readIntFirst(int first,void* d_int, int* h_int, int n, int IndexBase)
|
|
{ int i;
|
|
int *di=(int *) d_int;
|
|
di = &(di[first-IndexBase]);
|
|
i = readRemoteBuffer((void *) h_int, (void *) di, n*sizeof(int));
|
|
//cudaSync();
|
|
return(i);
|
|
}
|
|
|
|
int allocateMultiInt(void **d_int, int m, int n)
|
|
{
|
|
return allocRemoteBuffer((void **)(d_int), m*n*sizeof(int));
|
|
}
|
|
|
|
int writeMultiInt(void *d_int, int* h_int, int m, int n)
|
|
{
|
|
int i,j;
|
|
int *di;
|
|
i = writeRemoteBuffer((void*)h_int, (void*)d_int, m*n*sizeof(int));
|
|
return i;
|
|
}
|
|
|
|
int readMultiInt(void* d_int, int* h_int, int m, int n)
|
|
{ int i;
|
|
i = readRemoteBuffer((void *) h_int, (void *) d_int, m*n*sizeof(int));
|
|
//cudaSync();
|
|
return(i);
|
|
}
|
|
|
|
void freeInt(void *d_int)
|
|
{
|
|
//printf("Before freeInt\n");
|
|
freeRemoteBuffer(d_int);
|
|
}
|
|
|
|
|
|
|
|
|
|
int allocateFloat(void **d_float, int n)
|
|
{
|
|
return allocRemoteBuffer((void **)(d_float), n*sizeof(float));
|
|
}
|
|
|
|
int writeFloat(void *d_float, float* h_float, int n)
|
|
{
|
|
int i;
|
|
|
|
i = writeRemoteBuffer((void*)h_float, (void*)d_float, n*sizeof(float));
|
|
|
|
return i;
|
|
}
|
|
|
|
int readFloat(void* d_float, float* h_float, int n)
|
|
{ int i;
|
|
i = readRemoteBuffer((void *) h_float, (void *) d_float, n*sizeof(float));
|
|
|
|
return(i);
|
|
}
|
|
|
|
int writeFloatFirst(int df, void *d_float, float* h_float, int n, int IndexBase)
|
|
{
|
|
int i;
|
|
|
|
float *dv=(float *) d_float;
|
|
dv = &dv[df-IndexBase];
|
|
i = writeRemoteBuffer((void*)h_float, (void*)dv, n*sizeof(float));
|
|
|
|
return i;
|
|
}
|
|
|
|
int readFloatFirst(int df, void* d_float, float* h_float, int n, int IndexBase)
|
|
{ int i;
|
|
float *dv=(float *) d_float;
|
|
dv = &dv[df-IndexBase];
|
|
//fprintf(stderr,"readFloatFirst: %d %p %p %p %d \n",df,d_float,dv,h_float,n);
|
|
i = readRemoteBuffer((void *) h_float, (void *) dv, n*sizeof(float));
|
|
|
|
return(i);
|
|
}
|
|
|
|
|
|
int allocateMultiFloat(void **d_float, int m, int n)
|
|
{
|
|
return allocRemoteBuffer((void **)(d_float), m*n*sizeof(float));
|
|
}
|
|
|
|
int writeMultiFloat(void *d_float, float* h_float, int m, int n)
|
|
{
|
|
int i,j;
|
|
i = writeRemoteBuffer((void*)h_float, (void*)d_float, m*n*sizeof(float));
|
|
return i;
|
|
}
|
|
|
|
int readMultiFloat(void* d_float, float* h_float, int m, int n)
|
|
{ int i;
|
|
i = readRemoteBuffer((void *) h_float, (void *) d_float, m*n*sizeof(float));
|
|
//cudaSync();
|
|
return(i);
|
|
}
|
|
|
|
void freeFloat(void *d_float)
|
|
{
|
|
freeRemoteBuffer(d_float);
|
|
}
|
|
|
|
|
|
|
|
int allocateDouble(void **d_double, int n)
|
|
{
|
|
return allocRemoteBuffer((void **)(d_double), n*sizeof(double));
|
|
}
|
|
|
|
int writeDouble(void *d_double, double* h_double, int n)
|
|
{
|
|
int i;
|
|
|
|
i = writeRemoteBuffer((void*)h_double, (void*)d_double, n*sizeof(double));
|
|
|
|
return i;
|
|
}
|
|
|
|
int readDouble(void* d_double, double* h_double, int n)
|
|
{ int i;
|
|
i = readRemoteBuffer((void *) h_double, (void *) d_double, n*sizeof(double));
|
|
|
|
return(i);
|
|
}
|
|
|
|
int writeDoubleFirst(int df, void *d_double, double* h_double, int n, int IndexBase)
|
|
{
|
|
int i;
|
|
|
|
double *dv=(double *) d_double;
|
|
dv = &dv[df-IndexBase];
|
|
i = writeRemoteBuffer((void*)h_double, (void*)dv, n*sizeof(double));
|
|
|
|
return i;
|
|
}
|
|
|
|
int readDoubleFirst(int df, void* d_double, double* h_double, int n, int IndexBase)
|
|
{ int i;
|
|
double *dv=(double *) d_double;
|
|
dv = &dv[df-IndexBase];
|
|
//fprintf(stderr,"readDoubleFirst: %d %p %p %p %d \n",df,d_double,dv,h_double,n);
|
|
i = readRemoteBuffer((void *) h_double, (void *) dv, n*sizeof(double));
|
|
|
|
return(i);
|
|
}
|
|
|
|
int allocateMultiDouble(void **d_double, int m, int n)
|
|
{
|
|
return allocRemoteBuffer((void **)(d_double), m*n*sizeof(double));
|
|
}
|
|
|
|
int writeMultiDouble(void *d_double, double* h_double, int m, int n)
|
|
{
|
|
int i,j;
|
|
i = writeRemoteBuffer((void*)h_double, (void*)d_double, m*n*sizeof(double));
|
|
return i;
|
|
}
|
|
|
|
int readMultiDouble(void* d_double, double* h_double, int m, int n)
|
|
{ int i;
|
|
i = readRemoteBuffer((void *) h_double, (void *) d_double, m*n*sizeof(double));
|
|
//cudaSync();
|
|
return(i);
|
|
}
|
|
|
|
void freeDouble(void *d_double)
|
|
{
|
|
freeRemoteBuffer(d_double);
|
|
}
|
|
|
|
|
|
|
|
int allocateFloatComplex(void **d_FloatComplex, int n)
|
|
{
|
|
return allocRemoteBuffer((void **)(d_FloatComplex), n*sizeof(cuFloatComplex));
|
|
}
|
|
|
|
int writeFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int n)
|
|
{
|
|
int i;
|
|
|
|
i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, n*sizeof(cuFloatComplex));
|
|
|
|
return i;
|
|
}
|
|
|
|
int readFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int n)
|
|
{ int i;
|
|
i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, n*sizeof(cuFloatComplex));
|
|
|
|
return(i);
|
|
}
|
|
|
|
int allocateMultiFloatComplex(void **d_FloatComplex, int m, int n)
|
|
{
|
|
return allocRemoteBuffer((void **)(d_FloatComplex), m*n*sizeof(cuFloatComplex));
|
|
}
|
|
|
|
int writeMultiFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n)
|
|
{
|
|
int i,j;
|
|
i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, m*n*sizeof(cuFloatComplex));
|
|
return i;
|
|
}
|
|
|
|
int readMultiFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n)
|
|
{ int i;
|
|
i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, m*n*sizeof(cuFloatComplex));
|
|
//cudaSync();
|
|
return(i);
|
|
}
|
|
|
|
int writeFloatComplexFirst(int df, void *d_floatComplex,
|
|
cuFloatComplex* h_floatComplex, int n, int IndexBase)
|
|
{
|
|
int i;
|
|
|
|
cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex;
|
|
dv = &dv[df-IndexBase];
|
|
i = writeRemoteBuffer((void*)h_floatComplex, (void*)dv, n*sizeof(cuFloatComplex));
|
|
|
|
return i;
|
|
}
|
|
|
|
int readFloatComplexFirst(int df, void* d_floatComplex, cuFloatComplex* h_floatComplex,
|
|
int n, int IndexBase)
|
|
{ int i;
|
|
cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex;
|
|
dv = &dv[df-IndexBase];
|
|
i = readRemoteBuffer((void *) h_floatComplex, (void *) dv, n*sizeof(cuFloatComplex));
|
|
|
|
return(i);
|
|
}
|
|
|
|
void freeFloatComplex(void *d_FloatComplex)
|
|
{
|
|
freeRemoteBuffer(d_FloatComplex);
|
|
}
|
|
|
|
|
|
|
|
|
|
int allocateDoubleComplex(void **d_DoubleComplex, int n)
|
|
{
|
|
return allocRemoteBuffer((void **)(d_DoubleComplex), n*sizeof(cuDoubleComplex));
|
|
}
|
|
|
|
int writeDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n)
|
|
{
|
|
int i;
|
|
|
|
i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, n*sizeof(cuDoubleComplex));
|
|
|
|
return i;
|
|
}
|
|
|
|
int readDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n)
|
|
{ int i;
|
|
i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, n*sizeof(cuDoubleComplex));
|
|
|
|
return(i);
|
|
}
|
|
|
|
int writeDoubleComplexFirst(int df, void *d_doubleComplex,
|
|
cuDoubleComplex* h_doubleComplex, int n, int IndexBase)
|
|
{
|
|
int i;
|
|
|
|
cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex;
|
|
dv = &dv[df-IndexBase];
|
|
i = writeRemoteBuffer((void*)h_doubleComplex, (void*)dv, n*sizeof(cuDoubleComplex));
|
|
|
|
return i;
|
|
}
|
|
|
|
int readDoubleComplexFirst(int df, void* d_doubleComplex, cuDoubleComplex* h_doubleComplex,
|
|
int n, int IndexBase)
|
|
{ int i;
|
|
cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex;
|
|
dv = &dv[df-IndexBase];
|
|
i = readRemoteBuffer((void *) h_doubleComplex, (void *) dv, n*sizeof(cuDoubleComplex));
|
|
|
|
return(i);
|
|
}
|
|
|
|
int allocateMultiDoubleComplex(void **d_DoubleComplex, int m, int n)
|
|
{
|
|
return allocRemoteBuffer((void **)(d_DoubleComplex), m*n*sizeof(cuDoubleComplex));
|
|
}
|
|
|
|
int writeMultiDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n)
|
|
{
|
|
int i,j;
|
|
i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, m*n*sizeof(cuDoubleComplex));
|
|
return i;
|
|
}
|
|
|
|
int readMultiDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n)
|
|
{ int i;
|
|
i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, m*n*sizeof(cuDoubleComplex));
|
|
//cudaSync();
|
|
return(i);
|
|
}
|
|
|
|
void freeDoubleComplex(void *d_DoubleComplex)
|
|
{
|
|
freeRemoteBuffer(d_DoubleComplex);
|
|
}
|
|
|
|
|
|
|
|
double etime()
|
|
{
|
|
struct timeval tt;
|
|
struct timezone tz;
|
|
double temp;
|
|
if (gettimeofday(&tt,&tz) != 0) {
|
|
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
|
|
exit(-1);
|
|
}
|
|
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
|
|
return(temp);
|
|
}
|
|
|
|
|