Modified CSRG to work with latest versions; cusparse docs are unclear

repack-nvid
Salvatore Filippone 1 year ago
parent 6b65199afb
commit d28ea462d9

@ -97,6 +97,7 @@ module c_cusparse_mod
end function c_CSRGDeviceSetMatIndexBase end function c_CSRGDeviceSetMatIndexBase
end interface end interface
#if CUDA_SHORT_VERSION <= 10
interface CSRGDeviceCsrsmAnalysis interface CSRGDeviceCsrsmAnalysis
function c_CSRGDeviceCsrsmAnalysis(Mat) & function c_CSRGDeviceCsrsmAnalysis(Mat) &
& bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res) & bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res)
@ -106,6 +107,17 @@ module c_cusparse_mod
integer(c_int) :: res integer(c_int) :: res
end function c_CSRGDeviceCsrsmAnalysis end function c_CSRGDeviceCsrsmAnalysis
end interface end interface
#else
interface CSRGIsNullSvBuffer
function c_CSRGIsNullSvBuffer(Mat) &
& bind(c,name="c_CSRGIsNullSvBuffer") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int) :: res
end function c_CSRGIsNullSvBuffer
end interface
#endif
interface CSRGDeviceAlloc interface CSRGDeviceAlloc
function c_CSRGDeviceAlloc(Mat,nr,nc,nz) & function c_CSRGDeviceAlloc(Mat,nr,nc,nz) &

@ -38,7 +38,8 @@
#include "cintrf.h" #include "cintrf.h"
#include "fcusparse.h" #include "fcusparse.h"
/* Single precision complex */
/* Double precision real */
#define TYPE float complex #define TYPE float complex
#define CUSPARSE_BASE_TYPE CUDA_C_32F #define CUSPARSE_BASE_TYPE CUDA_C_32F
#define T_CSRGDeviceMat c_CSRGDeviceMat #define T_CSRGDeviceMat c_CSRGDeviceMat
@ -54,25 +55,12 @@
#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms #define T_CSRGDeviceGetParms c_CSRGDeviceGetParms
#if CUDA_SHORT_VERSION <= 10 #if CUDA_SHORT_VERSION <= 10
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType #define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase #define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis #define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis
#define cusparseTcsrmv cusparseCcsrmv #define cusparseTcsrmv cusparseCcsrmv
#define cusparseTcsrsv_solve cusparseCcsrsv_solve #define cusparseTcsrsv_solve cusparseCcsrsv_solve
#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis #define cusparseTcsrsv_analysis cusparseCcsrsv_analysis
#elif CUDA_VERSION < 11030
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis
#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve
#else
#define T_HYBGDeviceMat c_HYBGDeviceMat #define T_HYBGDeviceMat c_HYBGDeviceMat
#define T_Hmat c_Hmat #define T_Hmat c_Hmat
#define T_HYBGDeviceFree c_HYBGDeviceFree #define T_HYBGDeviceFree c_HYBGDeviceFree
@ -89,6 +77,22 @@
#define cusparseThybsv_solve cusparseChybsv_solve #define cusparseThybsv_solve cusparseChybsv_solve
#define cusparseThybsv_analysis cusparseChybsv_analysis #define cusparseThybsv_analysis cusparseChybsv_analysis
#define cusparseTcsr2hyb cusparseCcsr2hyb #define cusparseTcsr2hyb cusparseCcsr2hyb
#elif CUDA_VERSION < 11030
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis
#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve
#else
#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer
#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr
#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr
#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr
#endif #endif
#include "fcusparse_fct.h" #include "fcusparse_fct.h"

@ -228,7 +228,7 @@ int gpuInit(int dev)
if (!psb_cublas_handle) if (!psb_cublas_handle)
psb_cudaCreateCublasHandle(); psb_cudaCreateCublasHandle();
hasUVA=getDeviceHasUVA(); hasUVA=getDeviceHasUVA();
FcusparseCreate();
return err; return err;
} }
@ -240,7 +240,7 @@ void gpuClose()
st1=spgpuGetStream(psb_cuda_handle); st1=spgpuGetStream(psb_cuda_handle);
if (! psb_cublas_handle) if (! psb_cublas_handle)
cublasGetStream(psb_cublas_handle,&st2); cublasGetStream(psb_cublas_handle,&st2);
FcusparseDestroy();
psb_cudaDestroyHandle(); psb_cudaDestroyHandle();
if (st1 != st2) if (st1 != st2)
psb_cudaDestroyCublasHandle(); psb_cudaDestroyCublasHandle();

@ -107,6 +107,16 @@ module d_cusparse_mod
integer(c_int) :: res integer(c_int) :: res
end function d_CSRGDeviceCsrsmAnalysis end function d_CSRGDeviceCsrsmAnalysis
end interface end interface
#else
interface CSRGIsNullSvBuffer
function d_CSRGIsNullSvBuffer(Mat) &
& bind(c,name="d_CSRGIsNullSvBuffer") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int) :: res
end function d_CSRGIsNullSvBuffer
end interface
#endif #endif
interface CSRGDeviceAlloc interface CSRGDeviceAlloc

@ -86,6 +86,12 @@
#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize #define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis #define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve #define cusparseTcsrsv2_solve cusparseDcsrsv2_solve
#else
#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer
#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr
#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr
#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr
#endif #endif

@ -53,14 +53,17 @@ int FcusparseCreate()
if (ret == CUSPARSE_STATUS_SUCCESS) if (ret == CUSPARSE_STATUS_SUCCESS)
cusparse_handle = handle; cusparse_handle = handle;
} }
fprintf(stderr,"Created cusparses_handle\n");
return (ret); return (ret);
} }
int FcusparseDestroy() int FcusparseDestroy()
{ {
int val; int val;
val = (int) cusparseDestroy(*cusparse_handle); if (cusparse_handle!=NULL){
free(cusparse_handle); val = (int) cusparseDestroy(*cusparse_handle);
free(cusparse_handle);
}
cusparse_handle=NULL; cusparse_handle=NULL;
return(val); return(val);
} }

@ -39,7 +39,7 @@ typedef struct T_CSRGDeviceMat
size_t mvbsize, svbsize; size_t mvbsize, svbsize;
void *mvbuffer, *svbuffer; void *mvbuffer, *svbuffer;
#else #else
cusparseSpMatDescr_t descr; cusparseSpMatDescr_t *spmvDescr;
cusparseSpSVDescr_t *spsvDescr; cusparseSpSVDescr_t *spsvDescr;
size_t mvbsize, svbsize; size_t mvbsize, svbsize;
void *mvbuffer, *svbuffer; void *mvbuffer, *svbuffer;
@ -102,6 +102,12 @@ int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type);
#else
int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat);
int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat);
int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat);
int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat);
#endif #endif
@ -187,15 +193,20 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
(void *) vY, CUSPARSE_BASE_TYPE, (void *) vY, CUSPARSE_BASE_TYPE,
CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer));
#elif CUDA_VERSION <= 12030 #else
cusparseDnVecDescr_t vecX, vecY; cusparseDnVecDescr_t vecX, vecY;
size_t bfsz; size_t bfsz;
if (T_CSRGIsNullMvDescr(cMat)) {
cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *));
}
T_CSRGCreateSpMVDescr(cMat);
vX=x->v_; vX=x->v_;
vY=y->v_; vY=y->v_;
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,cMat->descr,vecX,&beta,vecY, &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
&bfsz)); &bfsz));
if (bfsz > cMat->mvbsize) { if (bfsz > cMat->mvbsize) {
@ -207,13 +218,12 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
cMat->mvbsize = bfsz; cMat->mvbsize = bfsz;
} }
CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,cMat->descr,vecX,&beta,vecY, &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
cMat->mvbuffer)); cMat->mvbuffer));
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
#else CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr)));
fprintf(stderr,"Unsupported CUSPARSE version\n");
#endif #endif
} }
@ -246,16 +256,24 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
(const TYPE *) vX, (TYPE *) vY, (const TYPE *) vX, (TYPE *) vY,
CUSPARSE_SOLVE_POLICY_USE_LEVEL, CUSPARSE_SOLVE_POLICY_USE_LEVEL,
(void *) cMat->svbuffer)); (void *) cMat->svbuffer));
#elif CUDA_VERSION <= 12030 #else
cusparseDnVecDescr_t vecX, vecY; cusparseDnVecDescr_t vecX, vecY;
size_t bfsz; size_t bfsz;
vX=x->v_; vX=x->v_;
vY=y->v_; vY=y->v_;
cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *));
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
// fprintf(stderr,"Entry to SpSVDevice: %d %p\n",
// T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr);
if (T_CSRGIsNullSvDescr(cMat)) {
cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *));
cMat->svbsize=0;
CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) );
}
//fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n",
// T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize);
CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,cMat->descr,vecX,vecY, &alpha,*(cMat->spmvDescr),vecX,vecY,
CUSPARSE_BASE_TYPE, CUSPARSE_BASE_TYPE,
CUSPARSE_SPSV_ALG_DEFAULT, CUSPARSE_SPSV_ALG_DEFAULT,
*(cMat->spsvDescr), *(cMat->spsvDescr),
@ -267,31 +285,49 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
} }
CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz));
cMat->svbsize=bfsz; cMat->svbsize=bfsz;
}
if (cMat->spsvDescr==NULL) {
CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle,
CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, &alpha,
cMat->descr, *(cMat->spmvDescr),
vecX, vecY, vecX, vecY,
CUSPARSE_BASE_TYPE, CUSPARSE_BASE_TYPE,
CUSPARSE_SPSV_ALG_DEFAULT, CUSPARSE_SPSV_ALG_DEFAULT,
*(cMat->spsvDescr), *(cMat->spsvDescr),
cMat->svbuffer)); cMat->svbuffer));
} }
if (T_CSRGIsNullSvBuffer(cMat)) {
fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n");
}
T_CSRGCreateSpMVDescr(cMat);
CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,cMat->descr,vecX,vecY, &alpha,*(cMat->spmvDescr),vecX,vecY,
CUSPARSE_BASE_TYPE, CUSPARSE_BASE_TYPE,
CUSPARSE_SPSV_ALG_DEFAULT, CUSPARSE_SPSV_ALG_DEFAULT,
*(cMat->spsvDescr))); *(cMat->spsvDescr)));
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
#else CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr)));
fprintf(stderr,"Unsupported CUSPARSE version\n");
#endif #endif
} }
T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat)
{
int64_t tr,tc,tz;
tr = cMat->m;
tc = cMat->n;
tz = cMat->nz;
CHECK_CUSPARSE(cusparseCreateCsr(cMat->spmvDescr,
tr,tc,tz,
(void *) cMat->irp,
(void *) cMat->ja,
(void *) cMat->val,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE,
CUSPARSE_BASE_TYPE) );
}
int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz)
{ {
T_CSRGDeviceMat *cMat; T_CSRGDeviceMat *cMat;
@ -353,17 +389,8 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz)
#else #else
int64_t rows=nr, cols=nc, nnz=nz;
CHECK_CUSPARSE(cusparseCreateCsr(&(cMat->descr), cMat->spmvDescr=NULL;
rows, cols, nnz,
(void *) cMat->irp,
(void *) cMat->ja,
(void *) cMat->val,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE,
CUSPARSE_BASE_TYPE) );
cMat->spsvDescr=NULL; cMat->spsvDescr=NULL;
cMat->mvbuffer=NULL; cMat->mvbuffer=NULL;
cMat->svbuffer=NULL; cMat->svbuffer=NULL;
@ -389,20 +416,23 @@ int T_CSRGDeviceFree(T_Cmat *Matrix)
cusparseDestroyMatDescr(cMat->descr); cusparseDestroyMatDescr(cMat->descr);
cusparseDestroyCsrsv2Info(cMat->triang); cusparseDestroyCsrsv2Info(cMat->triang);
#else #else
cusparseDestroySpMat(cMat->descr); if (!T_CSRGIsNullMvDescr(cMat)) {
if (cMat->spsvDescr!=NULL) { // already destroyed spmvDescr, just free the pointer
CHECK_CUSPARSE( cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); free(cMat->spmvDescr);
free(cMat->spsvDescr); cMat->spmvDescr=NULL;
cMat->spsvDescr=NULL;
} }
if (cMat->mvbuffer!=NULL) if (cMat->mvbuffer!=NULL)
CHECK_CUDA( cudaFree(cMat->mvbuffer)); CHECK_CUDA( cudaFree(cMat->mvbuffer));
cMat->mvbuffer=NULL;
cMat->mvbsize=0;
if (!T_CSRGIsNullSvDescr(cMat)) {
CHECK_CUSPARSE(cusparseSpSV_destroyDescr(*(cMat->spsvDescr)));
free(cMat->spsvDescr);
cMat->spsvDescr=NULL;
}
if (cMat->svbuffer!=NULL) if (cMat->svbuffer!=NULL)
CHECK_CUDA( cudaFree(cMat->svbuffer)); CHECK_CUDA( cudaFree(cMat->svbuffer));
cMat->spsvDescr=NULL;
cMat->mvbuffer=NULL;
cMat->svbuffer=NULL; cMat->svbuffer=NULL;
cMat->mvbsize=0;
cMat->svbsize=0; cMat->svbsize=0;
#endif #endif
free(cMat); free(cMat);
@ -500,7 +530,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
T_CSRGDeviceMat *cMat= Matrix->mat; T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseFillMode_t mode=type; cusparseFillMode_t mode=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
CUSPARSE_SPMAT_FILL_MODE, CUSPARSE_SPMAT_FILL_MODE,
(const void*) &mode, (const void*) &mode,
sizeof(cusparseFillMode_t))); sizeof(cusparseFillMode_t)));
@ -511,13 +541,27 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
{ {
T_CSRGDeviceMat *cMat= Matrix->mat; T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseDiagType_t cutype=type; cusparseDiagType_t cutype=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
CUSPARSE_SPMAT_DIAG_TYPE, CUSPARSE_SPMAT_DIAG_TYPE,
(const void*) &cutype, (const void*) &cutype,
sizeof(cusparseDiagType_t))); sizeof(cusparseDiagType_t)));
return(0); return(0);
} }
int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat)
{
return(cMat->spmvDescr == NULL);
}
int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat)
{
return(cMat->svbuffer == NULL);
}
int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat)
{
return(cMat->spsvDescr == NULL);
}
#endif #endif
int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz,
@ -550,6 +594,8 @@ int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz,
cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL, cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL,
cMat->svbuffer)); cMat->svbuffer));
} }
#else
//cusparseSetMatType(*(cMat->spmvDescr),CUSPARSE_MATRIX_TYPE_GENERAL);
#endif #endif
return(CUSPARSE_STATUS_SUCCESS); return(CUSPARSE_STATUS_SUCCESS);
} }

@ -227,7 +227,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm)
endif endif
#else #elif 0
if (a%is_unit()) then if (a%is_unit()) then
! !
@ -309,6 +309,65 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm)
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
!!$ end if !!$ end if
#else
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
if ((info == 0) .and. a%is_triangle()) then
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = cone
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = cone
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif
#endif #endif
call a%set_sync() call a%set_sync()

@ -308,6 +308,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
!!$ if ((info == 0) .and. a%is_triangle()) then !!$ if ((info == 0) .and. a%is_triangle()) then
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
!!$ end if !!$ end if
#else #else
if (a%is_unit()) then if (a%is_unit()) then
@ -325,9 +326,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
end if end if
end if end if
!!! We are explicitly adding the diagonal !!! We are explicitly adding the diagonal
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
if ((info == 0) .and. a%is_triangle()) then if ((info == 0) .and. a%is_triangle()) then
!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
if ((info == 0).and.a%is_upper()) then if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else else
@ -366,24 +365,6 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
else else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
!info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general)
!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
!!$ if (a%is_triangle()) then
!!$ if (info == 0) then
!!$ if (a%is_unit()) then
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
!!$ else
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
!!$ end if
!!$ end if
!!$ if ((info == 0) )then
!!$ if ((info == 0).and.a%is_upper()) then
!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
!!$ else
!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
!!$ end if
!!$ end if
!!$ end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif endif

@ -227,7 +227,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm)
endif endif
#else #elif 0
if (a%is_unit()) then if (a%is_unit()) then
! !
@ -309,6 +309,65 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm)
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
!!$ end if !!$ end if
#else
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
if ((info == 0) .and. a%is_triangle()) then
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = sone
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = sone
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif
#endif #endif
call a%set_sync() call a%set_sync()

@ -227,7 +227,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm)
endif endif
#else #elif 0
if (a%is_unit()) then if (a%is_unit()) then
! !
@ -309,6 +309,65 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm)
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
!!$ end if !!$ end if
#else
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
if ((info == 0) .and. a%is_triangle()) then
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = zone
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = zone
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif
#endif #endif
call a%set_sync() call a%set_sync()

@ -97,6 +97,7 @@ module s_cusparse_mod
end function s_CSRGDeviceSetMatIndexBase end function s_CSRGDeviceSetMatIndexBase
end interface end interface
#if CUDA_SHORT_VERSION <= 10
interface CSRGDeviceCsrsmAnalysis interface CSRGDeviceCsrsmAnalysis
function s_CSRGDeviceCsrsmAnalysis(Mat) & function s_CSRGDeviceCsrsmAnalysis(Mat) &
& bind(c,name="s_CSRGDeviceCsrsmAnalysis") result(res) & bind(c,name="s_CSRGDeviceCsrsmAnalysis") result(res)
@ -106,6 +107,17 @@ module s_cusparse_mod
integer(c_int) :: res integer(c_int) :: res
end function s_CSRGDeviceCsrsmAnalysis end function s_CSRGDeviceCsrsmAnalysis
end interface end interface
#else
interface CSRGIsNullSvBuffer
function s_CSRGIsNullSvBuffer(Mat) &
& bind(c,name="s_CSRGIsNullSvBuffer") result(res)
use iso_c_binding
import s_Cmat
type(s_Cmat) :: Mat
integer(c_int) :: res
end function s_CSRGIsNullSvBuffer
end interface
#endif
interface CSRGDeviceAlloc interface CSRGDeviceAlloc
function s_CSRGDeviceAlloc(Mat,nr,nc,nz) & function s_CSRGDeviceAlloc(Mat,nr,nc,nz) &

@ -38,8 +38,9 @@
#include "cintrf.h" #include "cintrf.h"
#include "fcusparse.h" #include "fcusparse.h"
/* Single precision real */
#define TYPE float /* Double precision real */
#define TYPE float
#define CUSPARSE_BASE_TYPE CUDA_R_32F #define CUSPARSE_BASE_TYPE CUDA_R_32F
#define T_CSRGDeviceMat s_CSRGDeviceMat #define T_CSRGDeviceMat s_CSRGDeviceMat
#define T_Cmat s_Cmat #define T_Cmat s_Cmat
@ -60,7 +61,6 @@
#define cusparseTcsrmv cusparseScsrmv #define cusparseTcsrmv cusparseScsrmv
#define cusparseTcsrsv_solve cusparseScsrsv_solve #define cusparseTcsrsv_solve cusparseScsrsv_solve
#define cusparseTcsrsv_analysis cusparseScsrsv_analysis #define cusparseTcsrsv_analysis cusparseScsrsv_analysis
#define T_HYBGDeviceMat s_HYBGDeviceMat #define T_HYBGDeviceMat s_HYBGDeviceMat
#define T_Hmat s_Hmat #define T_Hmat s_Hmat
#define T_HYBGDeviceFree s_HYBGDeviceFree #define T_HYBGDeviceFree s_HYBGDeviceFree
@ -78,7 +78,6 @@
#define cusparseThybsv_analysis cusparseShybsv_analysis #define cusparseThybsv_analysis cusparseShybsv_analysis
#define cusparseTcsr2hyb cusparseScsr2hyb #define cusparseTcsr2hyb cusparseScsr2hyb
#elif CUDA_VERSION < 11030 #elif CUDA_VERSION < 11030
#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType #define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType
@ -87,6 +86,13 @@
#define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize #define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis #define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseScsrsv2_solve #define cusparseTcsrsv2_solve cusparseScsrsv2_solve
#else
#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer
#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr
#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr
#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr
#endif #endif
#include "fcusparse_fct.h" #include "fcusparse_fct.h"

@ -97,6 +97,7 @@ module z_cusparse_mod
end function z_CSRGDeviceSetMatIndexBase end function z_CSRGDeviceSetMatIndexBase
end interface end interface
#if CUDA_SHORT_VERSION <= 10
interface CSRGDeviceCsrsmAnalysis interface CSRGDeviceCsrsmAnalysis
function z_CSRGDeviceCsrsmAnalysis(Mat) & function z_CSRGDeviceCsrsmAnalysis(Mat) &
& bind(c,name="z_CSRGDeviceCsrsmAnalysis") result(res) & bind(c,name="z_CSRGDeviceCsrsmAnalysis") result(res)
@ -106,6 +107,17 @@ module z_cusparse_mod
integer(c_int) :: res integer(c_int) :: res
end function z_CSRGDeviceCsrsmAnalysis end function z_CSRGDeviceCsrsmAnalysis
end interface end interface
#else
interface CSRGIsNullSvBuffer
function z_CSRGIsNullSvBuffer(Mat) &
& bind(c,name="z_CSRGIsNullSvBuffer") result(res)
use iso_c_binding
import z_Cmat
type(z_Cmat) :: Mat
integer(c_int) :: res
end function z_CSRGIsNullSvBuffer
end interface
#endif
interface CSRGDeviceAlloc interface CSRGDeviceAlloc
function z_CSRGDeviceAlloc(Mat,nr,nc,nz) & function z_CSRGDeviceAlloc(Mat,nr,nc,nz) &

@ -38,7 +38,8 @@
#include "cintrf.h" #include "cintrf.h"
#include "fcusparse.h" #include "fcusparse.h"
/* Double precision complex */
/* Double precision real */
#define TYPE double complex #define TYPE double complex
#define CUSPARSE_BASE_TYPE CUDA_C_64F #define CUSPARSE_BASE_TYPE CUDA_C_64F
#define T_CSRGDeviceMat z_CSRGDeviceMat #define T_CSRGDeviceMat z_CSRGDeviceMat
@ -85,8 +86,14 @@
#define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize #define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis #define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseZcsrsv2_solve #define cusparseTcsrsv2_solve cusparseZcsrsv2_solve
#else
#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer
#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr
#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr
#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr
#endif #endif
#include "fcusparse_fct.h" #include "fcusparse_fct.h"

Loading…
Cancel
Save