/* Parallel Sparse BLAS GPU plugin */ /* (C) Copyright 2013 */ /* Salvatore Filippone */ /* Alessandro Fanfarillo */ /* Redistribution and use in source and binary forms, with or without */ /* modification, are permitted provided that the following conditions */ /* are met: */ /* 1. Redistributions of source code must retain the above copyright */ /* notice, this list of conditions and the following disclaimer. */ /* 2. Redistributions in binary form must reproduce the above copyright */ /* notice, this list of conditions, and the following disclaimer in the */ /* documentation and/or other materials provided with the distribution. */ /* 3. The name of the PSBLAS group or the names of its contributors may */ /* not be used to endorse or promote products derived from this */ /* software without specific written permission. */ /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ typedef struct T_CSRGDeviceMat { #if CUDA_SHORT_VERSION <= 10 cusparseMatDescr_t descr; cusparseSolveAnalysisInfo_t triang; #elif CUDA_VERSION < 11030 cusparseMatDescr_t descr; csrsv2Info_t triang; size_t mvbsize, svbsize; void *mvbuffer, *svbuffer; #else cusparseSpMatDescr_t descr; cusparseSpSVDescr_t *spsvDescr; size_t mvbsize, svbsize; void *mvbuffer, *svbuffer; #endif int m, n, nz; TYPE *val; int *irp; int *ja; } T_CSRGDeviceMat; /* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ typedef struct T_Cmat { T_CSRGDeviceMat *mat; } T_Cmat; #if CUDA_SHORT_VERSION <= 10 typedef struct T_HYBGDeviceMat { cusparseMatDescr_t descr; cusparseSolveAnalysisInfo_t triang; cusparseHybMat_t hybA; int m, n, nz; TYPE *val; int *irp; int *ja; } T_HYBGDeviceMat; /* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ typedef struct T_Hmat { T_HYBGDeviceMat *mat; } T_Hmat; #endif int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, TYPE beta, void *deviceY); int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, TYPE beta, void *deviceY); int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz); int T_CSRGDeviceFree(T_Cmat *Mat); int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz, int *irp, int *ja, TYPE *val); int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz, int *irp, int *ja, TYPE *val); int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz); #if CUDA_SHORT_VERSION <= 10 int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat); #elif CUDA_VERSION < 11030 int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); #endif #if CUDA_SHORT_VERSION <= 10 int T_HYBGDeviceFree(T_Hmat *Matrix); int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, TYPE beta, void *deviceY); int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz); int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type); int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type); int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type); int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type); int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix); int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, TYPE beta, void *deviceY); int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, int *irp, int *ja, TYPE *val); #endif int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, TYPE beta, void *deviceY) { T_CSRGDeviceMat *cMat=Matrix->mat; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; void *vX, *vY; int r,n; cusparseHandle_t *my_handle=getHandle(); TYPE ealpha=alpha, ebeta=beta; #if CUDA_SHORT_VERSION <= 10 /*getAddrMultiVecDevice(deviceX, &vX); getAddrMultiVecDevice(deviceY, &vY); */ vX=x->v_; vY=y->v_; return cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, cMat->val, cMat->irp, cMat->ja, (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY); #elif CUDA_VERSION < 11030 size_t bfsz; vX=x->v_; vY=y->v_; #if 1 CHECK_CUSPARSE(cusparseCsrmvEx_bufferSize(*my_handle,CUSPARSE_ALG_MERGE_PATH, CUSPARSE_OPERATION_NON_TRANSPOSE, cMat->m,cMat->n,cMat->nz, (const void *) &ealpha,CUSPARSE_BASE_TYPE, cMat->descr, (const void *) cMat->val, CUSPARSE_BASE_TYPE, (const int *) cMat->irp, (const int *) cMat->ja, (const void *) vX, CUSPARSE_BASE_TYPE, (const void *) &ebeta, CUSPARSE_BASE_TYPE, (void *) vY, CUSPARSE_BASE_TYPE, CUSPARSE_BASE_TYPE, &bfsz)); #else bfsz=cMat->nz; #endif if (bfsz > cMat->mvbsize) { if (cMat->mvbuffer != NULL) { CHECK_CUDA(cudaFree(cMat->mvbuffer)); cMat->mvbuffer = NULL; } CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle, CUSPARSE_ALG_MERGE_PATH, CUSPARSE_OPERATION_NON_TRANSPOSE, cMat->m,cMat->n,cMat->nz, (const void *) &ealpha,CUSPARSE_BASE_TYPE, cMat->descr, (const void *) cMat->val, CUSPARSE_BASE_TYPE, (const int *) cMat->irp, (const int *) cMat->ja, (const void *) vX, CUSPARSE_BASE_TYPE, (const void *) &ebeta, CUSPARSE_BASE_TYPE, (void *) vY, CUSPARSE_BASE_TYPE, CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); #elif CUDA_VERSION <= 12030 cusparseDnVecDescr_t vecX, vecY; size_t bfsz; vX=x->v_; vY=y->v_; CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha,cMat->descr,vecX,&beta,vecY, CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, &bfsz)); if (bfsz > cMat->mvbsize) { if (cMat->mvbuffer != NULL) { CHECK_CUDA(cudaFree(cMat->mvbuffer)); cMat->mvbuffer = NULL; } CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha,cMat->descr,vecX,&beta,vecY, CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, cMat->mvbuffer)); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); #else fprintf(stderr,"Unsupported CUSPARSE version\n"); #endif } int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, TYPE beta, void *deviceY) { T_CSRGDeviceMat *cMat=Matrix->mat; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; void *vX, *vY; int r,n; cusparseHandle_t *my_handle=getHandle(); #if CUDA_SHORT_VERSION <= 10 vX=x->v_; vY=y->v_; return cusparseTcsrsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, cMat->m,(const TYPE *) &alpha,cMat->descr, cMat->val, cMat->irp, cMat->ja, cMat->triang, (const TYPE *) vX, (TYPE *) vY); #elif CUDA_VERSION < 11030 vX=x->v_; vY=y->v_; CHECK_CUSPARSE(cusparseTcsrsv2_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, cMat->m,cMat->nz, (const TYPE *) &alpha, cMat->descr, cMat->val, cMat->irp, cMat->ja, cMat->triang, (const TYPE *) vX, (TYPE *) vY, CUSPARSE_SOLVE_POLICY_USE_LEVEL, (void *) cMat->svbuffer)); #elif CUDA_VERSION <= 12030 cusparseDnVecDescr_t vecX, vecY; size_t bfsz; vX=x->v_; vY=y->v_; cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha,cMat->descr,vecX,vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr), &bfsz)); if (bfsz > cMat->svbsize) { if (cMat->svbuffer != NULL) { CHECK_CUDA(cudaFree(cMat->svbuffer)); cMat->svbuffer = NULL; } CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); cMat->svbsize=bfsz; } if (cMat->spsvDescr==NULL) { CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, cMat->descr, vecX, vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr), cMat->svbuffer)); } CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha,cMat->descr,vecX,vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr))); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); #else fprintf(stderr,"Unsupported CUSPARSE version\n"); #endif } int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) { T_CSRGDeviceMat *cMat; int nr1=nr, nz1=nz, rc; cusparseHandle_t *my_handle=getHandle(); int bfsz; if ((nr<0)||(nc<0)||(nz<0)) return((int) CUSPARSE_STATUS_INVALID_VALUE); if ((cMat=(T_CSRGDeviceMat *) malloc(sizeof(T_CSRGDeviceMat)))==NULL) return((int) CUSPARSE_STATUS_ALLOC_FAILED); cMat->m = nr; cMat->n = nc; cMat->nz = nz; if (nr1 == 0) nr1 = 1; if (nz1 == 0) nz1 = 1; if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), ((nr1+1)*sizeof(int)))) != 0) return(rc); if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), ((nz1)*sizeof(int)))) != 0) return(rc); if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), ((nz1)*sizeof(TYPE)))) != 0) return(rc); #if CUDA_SHORT_VERSION <= 10 if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) return(rc); if ((rc= cusparseCreateSolveAnalysisInfo(&(cMat->triang))) !=0) return(rc); #elif CUDA_VERSION < 11030 if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) return(rc); CHECK_CUSPARSE(cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL)); CHECK_CUSPARSE(cusparseSetMatDiagType(cMat->descr,CUSPARSE_DIAG_TYPE_NON_UNIT)); CHECK_CUSPARSE(cusparseSetMatIndexBase(cMat->descr,CUSPARSE_INDEX_BASE_ONE)); CHECK_CUSPARSE(cusparseCreateCsrsv2Info(&(cMat->triang))); if (cMat->nz > 0) { CHECK_CUSPARSE(cusparseTcsrsv2_bufferSize(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, cMat->m,cMat->nz, cMat->descr, cMat->val, cMat->irp, cMat->ja, cMat->triang, &bfsz)); } else { bfsz = 0; } /* if (cMat->svbuffer != NULL) { */ /* fprintf(stderr,"Calling cudaFree\n"); */ /* CHECK_CUDA(cudaFree(cMat->svbuffer)); */ /* cMat->svbuffer = NULL; */ /* } */ if (bfsz > 0) { CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); } else { cMat->svbuffer=NULL; } cMat->svbsize=bfsz; cMat->mvbuffer=NULL; cMat->mvbsize = 0; #else int64_t rows=nr, cols=nc, nnz=nz; CHECK_CUSPARSE(cusparseCreateCsr(&(cMat->descr), rows, cols, nnz, (void *) cMat->irp, (void *) cMat->ja, (void *) cMat->val, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ONE, CUSPARSE_BASE_TYPE) ); cMat->spsvDescr=NULL; cMat->mvbuffer=NULL; cMat->svbuffer=NULL; cMat->mvbsize=0; cMat->svbsize=0; #endif Matrix->mat = cMat; return(CUSPARSE_STATUS_SUCCESS); } int T_CSRGDeviceFree(T_Cmat *Matrix) { T_CSRGDeviceMat *cMat= Matrix->mat; if (cMat!=NULL) { freeRemoteBuffer(cMat->irp); freeRemoteBuffer(cMat->ja); freeRemoteBuffer(cMat->val); #if CUDA_SHORT_VERSION <= 10 cusparseDestroyMatDescr(cMat->descr); cusparseDestroySolveAnalysisInfo(cMat->triang); #elif CUDA_VERSION < 11030 cusparseDestroyMatDescr(cMat->descr); cusparseDestroyCsrsv2Info(cMat->triang); #else cusparseDestroySpMat(cMat->descr); if (cMat->spsvDescr!=NULL) { CHECK_CUSPARSE( cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); free(cMat->spsvDescr); cMat->spsvDescr=NULL; } if (cMat->mvbuffer!=NULL) CHECK_CUDA( cudaFree(cMat->mvbuffer)); if (cMat->svbuffer!=NULL) CHECK_CUDA( cudaFree(cMat->svbuffer)); cMat->spsvDescr=NULL; cMat->mvbuffer=NULL; cMat->svbuffer=NULL; cMat->mvbsize=0; cMat->svbsize=0; #endif free(cMat); Matrix->mat = NULL; } return(CUSPARSE_STATUS_SUCCESS); } int T_CSRGDeviceGetParms(T_Cmat *Matrix,int *nr, int *nc, int *nz) { T_CSRGDeviceMat *cMat= Matrix->mat; if (cMat!=NULL) { *nr = cMat->m ; *nc = cMat->n ; *nz = cMat->nz ; return(CUSPARSE_STATUS_SUCCESS); } else { return((int) CUSPARSE_STATUS_ALLOC_FAILED); } } #if CUDA_SHORT_VERSION <= 10 int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; return ((int) cusparseSetMatType(cMat->descr,type)); } int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; return ((int) cusparseSetMatFillMode(cMat->descr,type)); } int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; return ((int) cusparseSetMatDiagType(cMat->descr,type)); } int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; return ((int) cusparseSetMatIndexBase(cMat->descr,type)); } int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Matrix) { T_CSRGDeviceMat *cMat= Matrix->mat; int rc, buffersize; cusparseHandle_t *my_handle=getHandle(); cusparseSolveAnalysisInfo_t info; rc= (int) cusparseTcsrsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, cMat->m,cMat->nz,cMat->descr, cMat->val, cMat->irp, cMat->ja, cMat->triang); if (rc !=0) { fprintf(stderr,"From csrsv_analysis: %d\n",rc); } return(rc); } #elif CUDA_VERSION < 11030 int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; return ((int) cusparseSetMatType(cMat->descr,type)); } int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; return ((int) cusparseSetMatFillMode(cMat->descr,type)); } int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; return ((int) cusparseSetMatDiagType(cMat->descr,type)); } int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; return ((int) cusparseSetMatIndexBase(cMat->descr,type)); } #else int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; cusparseFillMode_t mode=type; CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, CUSPARSE_SPMAT_FILL_MODE, (const void*) &mode, sizeof(cusparseFillMode_t))); return(0); } int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; cusparseDiagType_t cutype=type; CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, CUSPARSE_SPMAT_DIAG_TYPE, (const void*) &cutype, sizeof(cusparseDiagType_t))); return(0); } #endif int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, int *irp, int *ja, TYPE *val) { int rc; T_CSRGDeviceMat *cMat= Matrix->mat; cusparseHandle_t *my_handle=getHandle(); if ((rc=writeRemoteBuffer((void *) irp, (void *) cMat->irp, (m+1)*sizeof(int))) != SPGPU_SUCCESS) return(rc); if ((rc=writeRemoteBuffer((void *) ja,(void *) cMat->ja, (nz)*sizeof(int))) != SPGPU_SUCCESS) return(rc); if ((rc=writeRemoteBuffer((void *) val, (void *) cMat->val, (nz)*sizeof(TYPE))) != SPGPU_SUCCESS) return(rc); #if (CUDA_SHORT_VERSION > 10 ) && (CUDA_VERSION < 11030) if (cusparseGetMatType(cMat->descr)== CUSPARSE_MATRIX_TYPE_TRIANGULAR) { // Why do we need to set TYPE_GENERAL??? cuSPARSE can be misterious sometimes. cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL); CHECK_CUSPARSE(cusparseTcsrsv2_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, cMat->m,cMat->nz, cMat->descr, cMat->val, cMat->irp, cMat->ja, cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL, cMat->svbuffer)); } #endif return(CUSPARSE_STATUS_SUCCESS); } int T_CSRGDevice2Host(T_Cmat *Matrix, int m, int n, int nz, int *irp, int *ja, TYPE *val) { int rc; T_CSRGDeviceMat *cMat = Matrix->mat; if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, (m+1)*sizeof(int))) != SPGPU_SUCCESS) return(rc); if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, (nz)*sizeof(int))) != SPGPU_SUCCESS) return(rc); if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, (nz)*sizeof(TYPE))) != SPGPU_SUCCESS) return(rc); return(CUSPARSE_STATUS_SUCCESS); } #if CUDA_SHORT_VERSION <= 10 int T_HYBGDeviceFree(T_Hmat *Matrix) { T_HYBGDeviceMat *hMat= Matrix->mat; if (hMat != NULL) { cusparseDestroyMatDescr(hMat->descr); cusparseDestroySolveAnalysisInfo(hMat->triang); cusparseDestroyHybMat(hMat->hybA); free(hMat); } Matrix->mat = NULL; return(CUSPARSE_STATUS_SUCCESS); } int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, TYPE beta, void *deviceY) { T_HYBGDeviceMat *hMat=Matrix->mat; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; void *vX, *vY; int r,n,rc; cusparseMatrixType_t type; cusparseHandle_t *my_handle=getHandle(); /*getAddrMultiVecDevice(deviceX, &vX); getAddrMultiVecDevice(deviceY, &vY); */ vX=x->v_; vY=y->v_; /* rc = (int) cusparseGetMatType(hMat->descr); */ /* fprintf(stderr,"Spmv MatType: %d\n",rc); */ /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ /* fprintf(stderr,"Spmv DiagType: %d\n",rc); */ /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ /* fprintf(stderr,"Spmv FillMode: %d\n",rc); */ /* Dirty trick: apparently hybmv does not accept a triangular matrix even though it should not make a difference. So we claim it's general anyway */ type = cusparseGetMatType(hMat->descr); rc = cusparseSetMatType(hMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL); if (rc == 0) rc = (int) cusparseThybmv(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, (const TYPE *) &alpha, hMat->descr, hMat->hybA, (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY); if (rc == 0) rc = cusparseSetMatType(hMat->descr,type); return(rc); } int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz) { T_HYBGDeviceMat *hMat; int nr1=nr, nz1=nz, rc; if ((nr<0)||(nc<0)||(nz<0)) return((int) CUSPARSE_STATUS_INVALID_VALUE); if ((hMat=(T_HYBGDeviceMat *) malloc(sizeof(T_HYBGDeviceMat)))==NULL) return((int) CUSPARSE_STATUS_ALLOC_FAILED); hMat->m = nr; hMat->n = nc; hMat->nz = nz; if ((rc= cusparseCreateMatDescr(&(hMat->descr))) !=0) return(rc); if ((rc= cusparseCreateSolveAnalysisInfo(&(hMat->triang))) !=0) return(rc); if((rc = cusparseCreateHybMat(&(hMat->hybA))) != 0) return(rc); Matrix->mat = hMat; return(CUSPARSE_STATUS_SUCCESS); } int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type) { T_HYBGDeviceMat *hMat= Matrix->mat; return ((int) cusparseSetMatDiagType(hMat->descr,type)); } int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type) { T_HYBGDeviceMat *hMat= Matrix->mat; return ((int) cusparseSetMatIndexBase(hMat->descr,type)); } int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type) { T_HYBGDeviceMat *hMat= Matrix->mat; return ((int) cusparseSetMatType(hMat->descr,type)); } int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type) { T_HYBGDeviceMat *hMat= Matrix->mat; return ((int) cusparseSetMatFillMode(hMat->descr,type)); } int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, TYPE beta, void *deviceY) { //beta?? T_HYBGDeviceMat *hMat=Matrix->mat; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; void *vX, *vY; int r,n; cusparseHandle_t *my_handle=getHandle(); /*getAddrMultiVecDevice(deviceX, &vX); getAddrMultiVecDevice(deviceY, &vY); */ vX=x->v_; vY=y->v_; return cusparseThybsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, (const TYPE *) &alpha, hMat->descr, hMat->hybA, hMat->triang, (const TYPE *) vX, (TYPE *) vY); } int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix) { T_HYBGDeviceMat *hMat= Matrix->mat; cusparseSolveAnalysisInfo_t info; int rc; cusparseHandle_t *my_handle=getHandle(); /* rc = (int) cusparseGetMatType(hMat->descr); */ /* fprintf(stderr,"Analysis MatType: %d\n",rc); */ /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ /* fprintf(stderr,"Analysis DiagType: %d\n",rc); */ /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ /* fprintf(stderr,"Analysis FillMode: %d\n",rc); */ rc = (int) cusparseThybsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, hMat->descr, hMat->hybA, hMat->triang); if (rc !=0) { fprintf(stderr,"From csrsv_analysis: %d\n",rc); } return(rc); } int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, int *irp, int *ja, TYPE *val) { int rc; double t1,t2; int nr1=m, nz1=nz; T_HYBGDeviceMat *hMat= Matrix->mat; cusparseHandle_t *my_handle=getHandle(); if (nr1 == 0) nr1 = 1; if (nz1 == 0) nz1 = 1; if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), ((nr1+1)*sizeof(int)))) != 0) return(rc); if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), ((nz1)*sizeof(int)))) != 0) return(rc); if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), ((nz1)*sizeof(TYPE)))) != 0) return(rc); if ((rc=writeRemoteBuffer((void *) irp, (void *) hMat->irp, (m+1)*sizeof(int))) != SPGPU_SUCCESS) return(rc); if ((rc=writeRemoteBuffer((void *) ja,(void *) hMat->ja, (nz)*sizeof(int))) != SPGPU_SUCCESS) return(rc); if ((rc=writeRemoteBuffer((void *) val, (void *) hMat->val, (nz)*sizeof(TYPE))) != SPGPU_SUCCESS) return(rc); /* rc = (int) cusparseGetMatType(hMat->descr); */ /* fprintf(stderr,"Conversion MatType: %d\n",rc); */ /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ /* fprintf(stderr,"Conversion DiagType: %d\n",rc); */ /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ /* fprintf(stderr,"Conversion FillMode: %d\n",rc); */ //t1=etime(); rc = (int) cusparseTcsr2hyb(*my_handle, m, n, hMat->descr, (const TYPE *)hMat->val, (const int *)hMat->irp, (const int *)hMat->ja, hMat->hybA,0, CUSPARSE_HYB_PARTITION_AUTO); freeRemoteBuffer(hMat->irp); hMat->irp = NULL; freeRemoteBuffer(hMat->ja); hMat->ja = NULL; freeRemoteBuffer(hMat->val); hMat->val = NULL; //cudaSync(); //t2 = etime(); //fprintf(stderr,"Inner call to cusparseTcsr2hyb: %lf\n",(t2-t1)); if (rc != 0) { fprintf(stderr,"From csr2hyb: %d\n",rc); } return(rc); } #endif