Modified CSRG to work with latest versions; cusparse docs are unclear

repack-nvid
Salvatore Filippone 1 year ago
parent 6b65199afb
commit d28ea462d9

@ -97,6 +97,7 @@ module c_cusparse_mod
end function c_CSRGDeviceSetMatIndexBase
end interface
#if CUDA_SHORT_VERSION <= 10
interface CSRGDeviceCsrsmAnalysis
function c_CSRGDeviceCsrsmAnalysis(Mat) &
& bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res)
@ -106,6 +107,17 @@ module c_cusparse_mod
integer(c_int) :: res
end function c_CSRGDeviceCsrsmAnalysis
end interface
#else
interface CSRGIsNullSvBuffer
function c_CSRGIsNullSvBuffer(Mat) &
& bind(c,name="c_CSRGIsNullSvBuffer") result(res)
use iso_c_binding
import c_Cmat
type(c_Cmat) :: Mat
integer(c_int) :: res
end function c_CSRGIsNullSvBuffer
end interface
#endif
interface CSRGDeviceAlloc
function c_CSRGDeviceAlloc(Mat,nr,nc,nz) &

@ -38,7 +38,8 @@
#include "cintrf.h"
#include "fcusparse.h"
/* Single precision complex */
/* Double precision real */
#define TYPE float complex
#define CUSPARSE_BASE_TYPE CUDA_C_32F
#define T_CSRGDeviceMat c_CSRGDeviceMat
@ -54,25 +55,12 @@
#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms
#if CUDA_SHORT_VERSION <= 10
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis
#define cusparseTcsrmv cusparseCcsrmv
#define cusparseTcsrsv_solve cusparseCcsrsv_solve
#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis
#elif CUDA_VERSION < 11030
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis
#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve
#else
#define T_HYBGDeviceMat c_HYBGDeviceMat
#define T_Hmat c_Hmat
#define T_HYBGDeviceFree c_HYBGDeviceFree
@ -89,6 +77,22 @@
#define cusparseThybsv_solve cusparseChybsv_solve
#define cusparseThybsv_analysis cusparseChybsv_analysis
#define cusparseTcsr2hyb cusparseCcsr2hyb
#elif CUDA_VERSION < 11030
#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType
#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase
#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis
#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve
#else
#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer
#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr
#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr
#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr
#endif
#include "fcusparse_fct.h"

@ -228,7 +228,7 @@ int gpuInit(int dev)
if (!psb_cublas_handle)
psb_cudaCreateCublasHandle();
hasUVA=getDeviceHasUVA();
FcusparseCreate();
return err;
}
@ -240,7 +240,7 @@ void gpuClose()
st1=spgpuGetStream(psb_cuda_handle);
if (! psb_cublas_handle)
cublasGetStream(psb_cublas_handle,&st2);
FcusparseDestroy();
psb_cudaDestroyHandle();
if (st1 != st2)
psb_cudaDestroyCublasHandle();

@ -107,6 +107,16 @@ module d_cusparse_mod
integer(c_int) :: res
end function d_CSRGDeviceCsrsmAnalysis
end interface
#else
interface CSRGIsNullSvBuffer
function d_CSRGIsNullSvBuffer(Mat) &
& bind(c,name="d_CSRGIsNullSvBuffer") result(res)
use iso_c_binding
import d_Cmat
type(d_Cmat) :: Mat
integer(c_int) :: res
end function d_CSRGIsNullSvBuffer
end interface
#endif
interface CSRGDeviceAlloc

@ -86,6 +86,12 @@
#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve
#else
#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer
#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr
#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr
#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr
#endif

@ -53,14 +53,17 @@ int FcusparseCreate()
if (ret == CUSPARSE_STATUS_SUCCESS)
cusparse_handle = handle;
}
fprintf(stderr,"Created cusparses_handle\n");
return (ret);
}
int FcusparseDestroy()
{
int val;
if (cusparse_handle!=NULL){
val = (int) cusparseDestroy(*cusparse_handle);
free(cusparse_handle);
}
cusparse_handle=NULL;
return(val);
}

@ -39,7 +39,7 @@ typedef struct T_CSRGDeviceMat
size_t mvbsize, svbsize;
void *mvbuffer, *svbuffer;
#else
cusparseSpMatDescr_t descr;
cusparseSpMatDescr_t *spmvDescr;
cusparseSpSVDescr_t *spsvDescr;
size_t mvbsize, svbsize;
void *mvbuffer, *svbuffer;
@ -102,6 +102,12 @@ int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type);
int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type);
#else
int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat);
int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat);
int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat);
int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat);
#endif
@ -187,15 +193,20 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
(void *) vY, CUSPARSE_BASE_TYPE,
CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer));
#elif CUDA_VERSION <= 12030
#else
cusparseDnVecDescr_t vecX, vecY;
size_t bfsz;
if (T_CSRGIsNullMvDescr(cMat)) {
cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *));
}
T_CSRGCreateSpMVDescr(cMat);
vX=x->v_;
vY=y->v_;
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,cMat->descr,vecX,&beta,vecY,
&alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
&bfsz));
if (bfsz > cMat->mvbsize) {
@ -207,13 +218,12 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
cMat->mvbsize = bfsz;
}
CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,cMat->descr,vecX,&beta,vecY,
&alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY,
CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT,
cMat->mvbuffer));
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
#else
fprintf(stderr,"Unsupported CUSPARSE version\n");
CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr)));
#endif
}
@ -246,16 +256,24 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
(const TYPE *) vX, (TYPE *) vY,
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
(void *) cMat->svbuffer));
#elif CUDA_VERSION <= 12030
#else
cusparseDnVecDescr_t vecX, vecY;
size_t bfsz;
vX=x->v_;
vY=y->v_;
cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *));
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) );
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) );
// fprintf(stderr,"Entry to SpSVDevice: %d %p\n",
// T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr);
if (T_CSRGIsNullSvDescr(cMat)) {
cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *));
cMat->svbsize=0;
CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) );
}
//fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n",
// T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize);
CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,cMat->descr,vecX,vecY,
&alpha,*(cMat->spmvDescr),vecX,vecY,
CUSPARSE_BASE_TYPE,
CUSPARSE_SPSV_ALG_DEFAULT,
*(cMat->spsvDescr),
@ -267,31 +285,49 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
}
CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz));
cMat->svbsize=bfsz;
}
if (cMat->spsvDescr==NULL) {
CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,
cMat->descr,
*(cMat->spmvDescr),
vecX, vecY,
CUSPARSE_BASE_TYPE,
CUSPARSE_SPSV_ALG_DEFAULT,
*(cMat->spsvDescr),
cMat->svbuffer));
}
if (T_CSRGIsNullSvBuffer(cMat)) {
fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n");
}
T_CSRGCreateSpMVDescr(cMat);
CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,cMat->descr,vecX,vecY,
&alpha,*(cMat->spmvDescr),vecX,vecY,
CUSPARSE_BASE_TYPE,
CUSPARSE_SPSV_ALG_DEFAULT,
*(cMat->spsvDescr)));
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
#else
fprintf(stderr,"Unsupported CUSPARSE version\n");
CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr)));
#endif
}
T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat)
{
int64_t tr,tc,tz;
tr = cMat->m;
tc = cMat->n;
tz = cMat->nz;
CHECK_CUSPARSE(cusparseCreateCsr(cMat->spmvDescr,
tr,tc,tz,
(void *) cMat->irp,
(void *) cMat->ja,
(void *) cMat->val,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE,
CUSPARSE_BASE_TYPE) );
}
int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz)
{
T_CSRGDeviceMat *cMat;
@ -353,17 +389,8 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz)
#else
int64_t rows=nr, cols=nc, nnz=nz;
CHECK_CUSPARSE(cusparseCreateCsr(&(cMat->descr),
rows, cols, nnz,
(void *) cMat->irp,
(void *) cMat->ja,
(void *) cMat->val,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE,
CUSPARSE_BASE_TYPE) );
cMat->spmvDescr=NULL;
cMat->spsvDescr=NULL;
cMat->mvbuffer=NULL;
cMat->svbuffer=NULL;
@ -389,20 +416,23 @@ int T_CSRGDeviceFree(T_Cmat *Matrix)
cusparseDestroyMatDescr(cMat->descr);
cusparseDestroyCsrsv2Info(cMat->triang);
#else
cusparseDestroySpMat(cMat->descr);
if (cMat->spsvDescr!=NULL) {
CHECK_CUSPARSE( cusparseSpSV_destroyDescr(*(cMat->spsvDescr)));
free(cMat->spsvDescr);
cMat->spsvDescr=NULL;
if (!T_CSRGIsNullMvDescr(cMat)) {
// already destroyed spmvDescr, just free the pointer
free(cMat->spmvDescr);
cMat->spmvDescr=NULL;
}
if (cMat->mvbuffer!=NULL)
CHECK_CUDA( cudaFree(cMat->mvbuffer));
cMat->mvbuffer=NULL;
cMat->mvbsize=0;
if (!T_CSRGIsNullSvDescr(cMat)) {
CHECK_CUSPARSE(cusparseSpSV_destroyDescr(*(cMat->spsvDescr)));
free(cMat->spsvDescr);
cMat->spsvDescr=NULL;
}
if (cMat->svbuffer!=NULL)
CHECK_CUDA( cudaFree(cMat->svbuffer));
cMat->spsvDescr=NULL;
cMat->mvbuffer=NULL;
cMat->svbuffer=NULL;
cMat->mvbsize=0;
cMat->svbsize=0;
#endif
free(cMat);
@ -500,7 +530,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseFillMode_t mode=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr,
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
CUSPARSE_SPMAT_FILL_MODE,
(const void*) &mode,
sizeof(cusparseFillMode_t)));
@ -511,13 +541,27 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseDiagType_t cutype=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr,
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
CUSPARSE_SPMAT_DIAG_TYPE,
(const void*) &cutype,
sizeof(cusparseDiagType_t)));
return(0);
}
int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat)
{
return(cMat->spmvDescr == NULL);
}
int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat)
{
return(cMat->svbuffer == NULL);
}
int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat)
{
return(cMat->spsvDescr == NULL);
}
#endif
int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz,
@ -550,6 +594,8 @@ int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz,
cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL,
cMat->svbuffer));
}
#else
//cusparseSetMatType(*(cMat->spmvDescr),CUSPARSE_MATRIX_TYPE_GENERAL);
#endif
return(CUSPARSE_STATUS_SUCCESS);
}

@ -227,7 +227,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm)
endif
#else
#elif 0
if (a%is_unit()) then
!
@ -309,6 +309,65 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm)
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
!!$ end if
#else
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
if ((info == 0) .and. a%is_triangle()) then
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = cone
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = cone
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif
#endif
call a%set_sync()

@ -308,6 +308,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
!!$ if ((info == 0) .and. a%is_triangle()) then
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
!!$ end if
#else
if (a%is_unit()) then
@ -325,9 +326,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
end if
end if
!!! We are explicitly adding the diagonal
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
if ((info == 0) .and. a%is_triangle()) then
!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
@ -366,24 +365,6 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
!info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general)
!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
!!$ if (a%is_triangle()) then
!!$ if (info == 0) then
!!$ if (a%is_unit()) then
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
!!$ else
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
!!$ end if
!!$ end if
!!$ if ((info == 0) )then
!!$ if ((info == 0).and.a%is_upper()) then
!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
!!$ else
!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
!!$ end if
!!$ end if
!!$ end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif

@ -227,7 +227,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm)
endif
#else
#elif 0
if (a%is_unit()) then
!
@ -309,6 +309,65 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm)
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
!!$ end if
#else
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
if ((info == 0) .and. a%is_triangle()) then
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = sone
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = sone
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif
#endif
call a%set_sync()

@ -227,7 +227,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm)
endif
#else
#elif 0
if (a%is_unit()) then
!
@ -309,6 +309,65 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm)
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
!!$ end if
#else
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
if ((info == 0) .and. a%is_triangle()) then
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = zone
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = zone
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif
#endif
call a%set_sync()

@ -97,6 +97,7 @@ module s_cusparse_mod
end function s_CSRGDeviceSetMatIndexBase
end interface
#if CUDA_SHORT_VERSION <= 10
interface CSRGDeviceCsrsmAnalysis
function s_CSRGDeviceCsrsmAnalysis(Mat) &
& bind(c,name="s_CSRGDeviceCsrsmAnalysis") result(res)
@ -106,6 +107,17 @@ module s_cusparse_mod
integer(c_int) :: res
end function s_CSRGDeviceCsrsmAnalysis
end interface
#else
interface CSRGIsNullSvBuffer
function s_CSRGIsNullSvBuffer(Mat) &
& bind(c,name="s_CSRGIsNullSvBuffer") result(res)
use iso_c_binding
import s_Cmat
type(s_Cmat) :: Mat
integer(c_int) :: res
end function s_CSRGIsNullSvBuffer
end interface
#endif
interface CSRGDeviceAlloc
function s_CSRGDeviceAlloc(Mat,nr,nc,nz) &

@ -38,7 +38,8 @@
#include "cintrf.h"
#include "fcusparse.h"
/* Single precision real */
/* Double precision real */
#define TYPE float
#define CUSPARSE_BASE_TYPE CUDA_R_32F
#define T_CSRGDeviceMat s_CSRGDeviceMat
@ -60,7 +61,6 @@
#define cusparseTcsrmv cusparseScsrmv
#define cusparseTcsrsv_solve cusparseScsrsv_solve
#define cusparseTcsrsv_analysis cusparseScsrsv_analysis
#define T_HYBGDeviceMat s_HYBGDeviceMat
#define T_Hmat s_Hmat
#define T_HYBGDeviceFree s_HYBGDeviceFree
@ -78,7 +78,6 @@
#define cusparseThybsv_analysis cusparseShybsv_analysis
#define cusparseTcsr2hyb cusparseScsr2hyb
#elif CUDA_VERSION < 11030
#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType
@ -87,6 +86,13 @@
#define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseScsrsv2_solve
#else
#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer
#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr
#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr
#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr
#endif
#include "fcusparse_fct.h"

@ -97,6 +97,7 @@ module z_cusparse_mod
end function z_CSRGDeviceSetMatIndexBase
end interface
#if CUDA_SHORT_VERSION <= 10
interface CSRGDeviceCsrsmAnalysis
function z_CSRGDeviceCsrsmAnalysis(Mat) &
& bind(c,name="z_CSRGDeviceCsrsmAnalysis") result(res)
@ -106,6 +107,17 @@ module z_cusparse_mod
integer(c_int) :: res
end function z_CSRGDeviceCsrsmAnalysis
end interface
#else
interface CSRGIsNullSvBuffer
function z_CSRGIsNullSvBuffer(Mat) &
& bind(c,name="z_CSRGIsNullSvBuffer") result(res)
use iso_c_binding
import z_Cmat
type(z_Cmat) :: Mat
integer(c_int) :: res
end function z_CSRGIsNullSvBuffer
end interface
#endif
interface CSRGDeviceAlloc
function z_CSRGDeviceAlloc(Mat,nr,nc,nz) &

@ -38,7 +38,8 @@
#include "cintrf.h"
#include "fcusparse.h"
/* Double precision complex */
/* Double precision real */
#define TYPE double complex
#define CUSPARSE_BASE_TYPE CUDA_C_64F
#define T_CSRGDeviceMat z_CSRGDeviceMat
@ -85,8 +86,14 @@
#define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize
#define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis
#define cusparseTcsrsv2_solve cusparseZcsrsv2_solve
#else
#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer
#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr
#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr
#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr
#endif
#include "fcusparse_fct.h"

Loading…
Cancel
Save