Identufied problems with CSRG. Will fix in a branch

repack-nvid
sfilippone 1 year ago
parent 41491f7b9c
commit 0230fbb7af

@ -89,7 +89,7 @@ GEN_PSI_FUNC_NAME(TYPE_SYMBOL)
(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, (spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm,
int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal,
int *idiag, int *rP, VALUE_TYPE *cM) int *idiag, int *rP, VALUE_TYPE *cM)
{ int i,j,k, nrws; { int i,j, nrws;
//int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX;
int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX);

@ -97,6 +97,7 @@ module d_cusparse_mod
end function d_CSRGDeviceSetMatIndexBase end function d_CSRGDeviceSetMatIndexBase
end interface end interface
#if CUDA_SHORT_VERSION <= 10
interface CSRGDeviceCsrsmAnalysis interface CSRGDeviceCsrsmAnalysis
function d_CSRGDeviceCsrsmAnalysis(Mat) & function d_CSRGDeviceCsrsmAnalysis(Mat) &
& bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res) & bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res)
@ -106,6 +107,7 @@ module d_cusparse_mod
integer(c_int) :: res integer(c_int) :: res
end function d_CSRGDeviceCsrsmAnalysis end function d_CSRGDeviceCsrsmAnalysis
end interface end interface
#endif
interface CSRGDeviceAlloc interface CSRGDeviceAlloc
function d_CSRGDeviceAlloc(Mat,nr,nc,nz) & function d_CSRGDeviceAlloc(Mat,nr,nc,nz) &

@ -187,7 +187,7 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
(void *) vY, CUSPARSE_BASE_TYPE, (void *) vY, CUSPARSE_BASE_TYPE,
CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer));
#else #elif CUDA_VERSION <= 12030
cusparseDnVecDescr_t vecX, vecY; cusparseDnVecDescr_t vecX, vecY;
size_t bfsz; size_t bfsz;
vX=x->v_; vX=x->v_;
@ -212,6 +212,8 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
cMat->mvbuffer)); cMat->mvbuffer));
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
#else
fprintf(stderr,"Unsupported CUSPARSE version\n");
#endif #endif
} }
@ -244,7 +246,7 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
(const TYPE *) vX, (TYPE *) vY, (const TYPE *) vX, (TYPE *) vY,
CUSPARSE_SOLVE_POLICY_USE_LEVEL, CUSPARSE_SOLVE_POLICY_USE_LEVEL,
(void *) cMat->svbuffer)); (void *) cMat->svbuffer));
#else #elif CUDA_VERSION <= 12030
cusparseDnVecDescr_t vecX, vecY; cusparseDnVecDescr_t vecX, vecY;
size_t bfsz; size_t bfsz;
vX=x->v_; vX=x->v_;
@ -285,6 +287,8 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX,
*(cMat->spsvDescr))); *(cMat->spsvDescr)));
CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) );
CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) );
#else
fprintf(stderr,"Unsupported CUSPARSE version\n");
#endif #endif
} }

@ -38,7 +38,7 @@ subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info)
implicit none implicit none
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info

@ -38,7 +38,7 @@ subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info)
implicit none implicit none
class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info

@ -227,7 +227,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
endif endif
#else #elif 0
if (a%is_unit()) then if (a%is_unit()) then
! !
@ -308,6 +308,84 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
!!$ if ((info == 0) .and. a%is_triangle()) then !!$ if ((info == 0) .and. a%is_triangle()) then
!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat)
!!$ end if !!$ end if
#else
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
!! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
if ((info == 0) .and. a%is_triangle()) then
!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular)
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = done
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = done
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi)
else
if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz)
!info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general)
!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one)
!!$ if (a%is_triangle()) then
!!$ if (info == 0) then
!!$ if (a%is_unit()) then
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
!!$ else
!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
!!$ end if
!!$ end if
!!$ if ((info == 0) )then
!!$ if ((info == 0).and.a%is_upper()) then
!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
!!$ else
!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
!!$ end if
!!$ end if
!!$ end if
if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val)
endif
#endif #endif
call a%set_sync() call a%set_sync()

@ -38,7 +38,7 @@ subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info)
implicit none implicit none
class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(inout) :: b class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info

@ -38,7 +38,7 @@ subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info)
implicit none implicit none
class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(inout) :: b class(psb_z_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info

Loading…
Cancel
Save