From 20a01d4d71df474faea3e5fed5b518c1de8bef9e Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 19 Dec 2023 11:54:42 -0500 Subject: [PATCH] Attempt at fixing CSRG in CUDA 10.2. Not complete yet. --- cuda/fcusparse_fct.h | 13 +++++++------ cuda/impl/psb_c_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- cuda/impl/psb_d_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- cuda/impl/psb_s_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- cuda/impl/psb_z_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- 5 files changed, 47 insertions(+), 46 deletions(-) diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 52df7b78..06facdc0 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -141,15 +141,16 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cusparseHandle_t *my_handle=getHandle(); TYPE ealpha=alpha, ebeta=beta; #if CUDA_SHORT_VERSION <= 10 - /*getAddrMultiVecDevice(deviceX, &vX); - getAddrMultiVecDevice(deviceY, &vY); */ + /* getAddrMultiVecDevice(deviceX, &vX); */ + /* getAddrMultiVecDevice(deviceY, &vY); */ vX=x->v_; vY=y->v_; - return cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, - cMat->val, cMat->irp, cMat->ja, - (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY); + CHECK_CUSPARSE(cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY)); + #elif CUDA_VERSION < 11030 size_t bfsz; vX=x->v_; diff --git a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index 8e7d25a9..cc3fbaaf 100644 --- a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index 4ecb0bbc..d7a1b1e7 100644 --- a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else diff --git a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index 246e780d..cc5b9c8d 100644 --- a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else diff --git a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index 41c96f68..56943f37 100644 --- a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else