diff --git a/cuda/c_cusparse_mod.F90 b/cuda/c_cusparse_mod.F90 index 59f37732..07e57a55 100644 --- a/cuda/c_cusparse_mod.F90 +++ b/cuda/c_cusparse_mod.F90 @@ -97,6 +97,7 @@ module c_cusparse_mod end function c_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function c_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,7 +107,18 @@ module c_cusparse_mod integer(c_int) :: res end function c_CSRGDeviceCsrsmAnalysis end interface - +#else + interface CSRGIsNullSvBuffer + function c_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="c_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGIsNullSvBuffer + end interface +#endif + interface CSRGDeviceAlloc function c_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="c_CSRGDeviceAlloc") result(res) diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index c5430306..bab6ede0 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -38,8 +38,9 @@ #include "cintrf.h" #include "fcusparse.h" -/* Single precision complex */ -#define TYPE float complex + +/* Double precision real */ +#define TYPE float complex #define CUSPARSE_BASE_TYPE CUDA_C_32F #define T_CSRGDeviceMat c_CSRGDeviceMat #define T_Cmat c_Cmat @@ -54,25 +55,12 @@ #define T_CSRGDeviceGetParms c_CSRGDeviceGetParms #if CUDA_SHORT_VERSION <= 10 - #define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType #define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase #define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis #define cusparseTcsrmv cusparseCcsrmv #define cusparseTcsrsv_solve cusparseCcsrsv_solve #define cusparseTcsrsv_analysis cusparseCcsrsv_analysis - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve - -#else - #define T_HYBGDeviceMat c_HYBGDeviceMat #define T_Hmat c_Hmat #define T_HYBGDeviceFree c_HYBGDeviceFree @@ -89,6 +77,22 @@ #define cusparseThybsv_solve cusparseChybsv_solve #define cusparseThybsv_analysis cusparseChybsv_analysis #define cusparseTcsr2hyb cusparseCcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr + #endif #include "fcusparse_fct.h" diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 09265410..c0e5c6e5 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -228,7 +228,7 @@ int gpuInit(int dev) if (!psb_cublas_handle) psb_cudaCreateCublasHandle(); hasUVA=getDeviceHasUVA(); - + FcusparseCreate(); return err; } @@ -240,7 +240,7 @@ void gpuClose() st1=spgpuGetStream(psb_cuda_handle); if (! psb_cublas_handle) cublasGetStream(psb_cublas_handle,&st2); - + FcusparseDestroy(); psb_cudaDestroyHandle(); if (st1 != st2) psb_cudaDestroyCublasHandle(); diff --git a/cuda/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 index ae9bcceb..399ac085 100644 --- a/cuda/d_cusparse_mod.F90 +++ b/cuda/d_cusparse_mod.F90 @@ -107,8 +107,18 @@ module d_cusparse_mod integer(c_int) :: res end function d_CSRGDeviceCsrsmAnalysis end interface +#else + interface CSRGIsNullSvBuffer + function d_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="d_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGIsNullSvBuffer + end interface #endif - + interface CSRGDeviceAlloc function d_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="d_CSRGDeviceAlloc") result(res) diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index f14e787c..657ca5be 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -86,6 +86,12 @@ #define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize #define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis #define cusparseTcsrsv2_solve cusparseDcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr #endif diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c index c1b661ab..1b37272c 100644 --- a/cuda/fcusparse.c +++ b/cuda/fcusparse.c @@ -53,14 +53,17 @@ int FcusparseCreate() if (ret == CUSPARSE_STATUS_SUCCESS) cusparse_handle = handle; } + fprintf(stderr,"Created cusparses_handle\n"); return (ret); } int FcusparseDestroy() { int val; - val = (int) cusparseDestroy(*cusparse_handle); - free(cusparse_handle); + if (cusparse_handle!=NULL){ + val = (int) cusparseDestroy(*cusparse_handle); + free(cusparse_handle); + } cusparse_handle=NULL; return(val); } diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 5afe410d..578c8b51 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -39,7 +39,7 @@ typedef struct T_CSRGDeviceMat size_t mvbsize, svbsize; void *mvbuffer, *svbuffer; #else - cusparseSpMatDescr_t descr; + cusparseSpMatDescr_t *spmvDescr; cusparseSpSVDescr_t *spsvDescr; size_t mvbsize, svbsize; void *mvbuffer, *svbuffer; @@ -102,6 +102,12 @@ int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +#else + +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat); #endif @@ -187,15 +193,20 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, (void *) vY, CUSPARSE_BASE_TYPE, CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); -#elif CUDA_VERSION <= 12030 +#else cusparseDnVecDescr_t vecX, vecY; size_t bfsz; + + if (T_CSRGIsNullMvDescr(cMat)) { + cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *)); + } + T_CSRGCreateSpMVDescr(cMat); vX=x->v_; vY=y->v_; CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,&beta,vecY, + &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY, CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, &bfsz)); if (bfsz > cMat->mvbsize) { @@ -207,13 +218,12 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,&beta,vecY, + &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY, CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, cMat->mvbuffer)); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); -#else - fprintf(stderr,"Unsupported CUSPARSE version\n"); + CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr))); #endif } @@ -246,16 +256,24 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, (const TYPE *) vX, (TYPE *) vY, CUSPARSE_SOLVE_POLICY_USE_LEVEL, (void *) cMat->svbuffer)); -#elif CUDA_VERSION <= 12030 +#else cusparseDnVecDescr_t vecX, vecY; size_t bfsz; vX=x->v_; vY=y->v_; - cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + // fprintf(stderr,"Entry to SpSVDevice: %d %p\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr); + if (T_CSRGIsNullSvDescr(cMat)) { + cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); + cMat->svbsize=0; + CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) ); + } + //fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize); CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,vecY, + &alpha,*(cMat->spmvDescr),vecX,vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr), @@ -267,31 +285,49 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, } CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); cMat->svbsize=bfsz; - } - if (cMat->spsvDescr==NULL) { CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, - cMat->descr, + *(cMat->spmvDescr), vecX, vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr), cMat->svbuffer)); } - + if (T_CSRGIsNullSvBuffer(cMat)) { + fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n"); + } + T_CSRGCreateSpMVDescr(cMat); CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,vecY, + &alpha,*(cMat->spmvDescr),vecX,vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr))); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); -#else - fprintf(stderr,"Unsupported CUSPARSE version\n"); + CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr))); #endif } + +T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) +{ + int64_t tr,tc,tz; + tr = cMat->m; + tc = cMat->n; + tz = cMat->nz; + CHECK_CUSPARSE(cusparseCreateCsr(cMat->spmvDescr, + tr,tc,tz, + (void *) cMat->irp, + (void *) cMat->ja, + (void *) cMat->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ONE, + CUSPARSE_BASE_TYPE) ); +} + int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) { T_CSRGDeviceMat *cMat; @@ -353,17 +389,8 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) #else - int64_t rows=nr, cols=nc, nnz=nz; - CHECK_CUSPARSE(cusparseCreateCsr(&(cMat->descr), - rows, cols, nnz, - (void *) cMat->irp, - (void *) cMat->ja, - (void *) cMat->val, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ONE, - CUSPARSE_BASE_TYPE) ); + cMat->spmvDescr=NULL; cMat->spsvDescr=NULL; cMat->mvbuffer=NULL; cMat->svbuffer=NULL; @@ -389,20 +416,23 @@ int T_CSRGDeviceFree(T_Cmat *Matrix) cusparseDestroyMatDescr(cMat->descr); cusparseDestroyCsrsv2Info(cMat->triang); #else - cusparseDestroySpMat(cMat->descr); - if (cMat->spsvDescr!=NULL) { - CHECK_CUSPARSE( cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); - free(cMat->spsvDescr); - cMat->spsvDescr=NULL; + if (!T_CSRGIsNullMvDescr(cMat)) { + // already destroyed spmvDescr, just free the pointer + free(cMat->spmvDescr); + cMat->spmvDescr=NULL; } if (cMat->mvbuffer!=NULL) CHECK_CUDA( cudaFree(cMat->mvbuffer)); + cMat->mvbuffer=NULL; + cMat->mvbsize=0; + if (!T_CSRGIsNullSvDescr(cMat)) { + CHECK_CUSPARSE(cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); + free(cMat->spsvDescr); + cMat->spsvDescr=NULL; + } if (cMat->svbuffer!=NULL) CHECK_CUDA( cudaFree(cMat->svbuffer)); - cMat->spsvDescr=NULL; - cMat->mvbuffer=NULL; cMat->svbuffer=NULL; - cMat->mvbsize=0; cMat->svbsize=0; #endif free(cMat); @@ -500,7 +530,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) T_CSRGDeviceMat *cMat= Matrix->mat; cusparseFillMode_t mode=type; - CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, CUSPARSE_SPMAT_FILL_MODE, (const void*) &mode, sizeof(cusparseFillMode_t))); @@ -511,13 +541,27 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; cusparseDiagType_t cutype=type; - CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, CUSPARSE_SPMAT_DIAG_TYPE, (const void*) &cutype, sizeof(cusparseDiagType_t))); return(0); } +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat) +{ + return(cMat->spmvDescr == NULL); +} + +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat) +{ + return(cMat->svbuffer == NULL); +} +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat) +{ + return(cMat->spsvDescr == NULL); +} + #endif int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, @@ -550,6 +594,8 @@ int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL, cMat->svbuffer)); } +#else + //cusparseSetMatType(*(cMat->spmvDescr),CUSPARSE_MATRIX_TYPE_GENERAL); #endif return(CUSPARSE_STATUS_SUCCESS); } diff --git a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index aebb07e4..8e7d25a9 100644 --- a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,66 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index 16cb541d..4ecb0bbc 100644 --- a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -308,6 +308,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if + #else if (a%is_unit()) then @@ -325,9 +326,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) end if end if !!! We are explicitly adding the diagonal - !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then -!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -366,24 +365,6 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) else if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) - !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) -!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) -!!$ if (a%is_triangle()) then -!!$ if (info == 0) then -!!$ if (a%is_unit()) then -!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) -!!$ else -!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) -!!$ end if -!!$ end if -!!$ if ((info == 0) )then -!!$ if ((info == 0).and.a%is_upper()) then -!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) -!!$ else -!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) -!!$ end if -!!$ end if -!!$ end if if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) endif diff --git a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index cf052e13..246e780d 100644 --- a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,66 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index f7e65627..41c96f68 100644 --- a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,66 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/s_cusparse_mod.F90 b/cuda/s_cusparse_mod.F90 index ab322129..a0214110 100644 --- a/cuda/s_cusparse_mod.F90 +++ b/cuda/s_cusparse_mod.F90 @@ -97,6 +97,7 @@ module s_cusparse_mod end function s_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function s_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="s_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,7 +107,18 @@ module s_cusparse_mod integer(c_int) :: res end function s_CSRGDeviceCsrsmAnalysis end interface - +#else + interface CSRGIsNullSvBuffer + function s_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="s_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int) :: res + end function s_CSRGIsNullSvBuffer + end interface +#endif + interface CSRGDeviceAlloc function s_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="s_CSRGDeviceAlloc") result(res) diff --git a/cuda/scusparse.c b/cuda/scusparse.c index 2ad2e2dc..d4db9b7c 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -38,8 +38,9 @@ #include "cintrf.h" #include "fcusparse.h" -/* Single precision real */ -#define TYPE float + +/* Double precision real */ +#define TYPE float #define CUSPARSE_BASE_TYPE CUDA_R_32F #define T_CSRGDeviceMat s_CSRGDeviceMat #define T_Cmat s_Cmat @@ -60,7 +61,6 @@ #define cusparseTcsrmv cusparseScsrmv #define cusparseTcsrsv_solve cusparseScsrsv_solve #define cusparseTcsrsv_analysis cusparseScsrsv_analysis - #define T_HYBGDeviceMat s_HYBGDeviceMat #define T_Hmat s_Hmat #define T_HYBGDeviceFree s_HYBGDeviceFree @@ -78,7 +78,6 @@ #define cusparseThybsv_analysis cusparseShybsv_analysis #define cusparseTcsr2hyb cusparseScsr2hyb - #elif CUDA_VERSION < 11030 #define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType @@ -87,6 +86,13 @@ #define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize #define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis #define cusparseTcsrsv2_solve cusparseScsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr + #endif #include "fcusparse_fct.h" diff --git a/cuda/z_cusparse_mod.F90 b/cuda/z_cusparse_mod.F90 index c3f21c0c..91a6fcbd 100644 --- a/cuda/z_cusparse_mod.F90 +++ b/cuda/z_cusparse_mod.F90 @@ -97,6 +97,7 @@ module z_cusparse_mod end function z_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function z_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="z_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,7 +107,18 @@ module z_cusparse_mod integer(c_int) :: res end function z_CSRGDeviceCsrsmAnalysis end interface - +#else + interface CSRGIsNullSvBuffer + function z_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="z_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGIsNullSvBuffer + end interface +#endif + interface CSRGDeviceAlloc function z_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="z_CSRGDeviceAlloc") result(res) diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index 050c0ccd..a70a6573 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -38,8 +38,9 @@ #include "cintrf.h" #include "fcusparse.h" -/* Double precision complex */ -#define TYPE double complex + +/* Double precision real */ +#define TYPE double complex #define CUSPARSE_BASE_TYPE CUDA_C_64F #define T_CSRGDeviceMat z_CSRGDeviceMat #define T_Cmat z_Cmat @@ -85,8 +86,14 @@ #define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize #define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis #define cusparseTcsrsv2_solve cusparseZcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr + #endif #include "fcusparse_fct.h" -