diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index c0e5c6e5..3fe61cc0 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -37,11 +37,19 @@ static int hasUVA=-1; static struct cudaDeviceProp *prop=NULL; static spgpuHandle_t psb_cuda_handle = NULL; static cublasHandle_t psb_cublas_handle = NULL; - +#if defined(TRACK_CUDA_MALLOC) +static long long total_cuda_mem = 0; +#endif int allocRemoteBuffer(void** buffer, int count) { cudaError_t err = cudaMalloc(buffer, count); +#if defined(TRACK_CUDA_MALLOC) + total_cuda_mem += count; + fprintf(stderr,"Tracking CUDA allocRemoteBuffer for %ld bytes total %ld address %p\n", + count, total_cuda_mem, *buffer); +#endif + if (err == cudaSuccess) { return SPGPU_SUCCESS; diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 06facdc0..689bdc93 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -178,7 +178,8 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, CHECK_CUDA(cudaFree(cMat->mvbuffer)); cMat->mvbuffer = NULL; } - CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz); cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle, @@ -215,7 +216,9 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, CHECK_CUDA(cudaFree(cMat->mvbuffer)); cMat->mvbuffer = NULL; } - CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz); + cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -287,7 +290,9 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, CHECK_CUDA(cudaFree(cMat->svbuffer)); cMat->svbuffer = NULL; } - CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz); + cMat->svbsize=bfsz; CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -382,7 +387,9 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) /* cMat->svbuffer = NULL; */ /* } */ if (bfsz > 0) { - CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz); + } else { cMat->svbuffer=NULL; }