Updated tests

psblas-bgmres
gabrielequatrana 10 months ago
parent 5389ff245e
commit cf315660e1

@ -227,7 +227,11 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
for (int j=0; j<devVecA->count_; j++) {
spgpuDmdot(handle, y_res+devVecA->count_*j, n,
((double*)devVecA->v_)+devVecA->pitch_*j,(double*)devVecB->v_,
devVecB->count_,devVecB->pitch_);
}
return(i);
}
@ -243,7 +247,6 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
return SPGPU_UNSUPPORTED;
for(j=0;j<devVecY->count_;j++)
fprintf(stderr,"CUDA ENTERED %d %p %d %d %d \n",j, (((double *)(devVecX->v_))+(pitch)*j), n, pitch, devVecY->size_);
spgpuDaxpby(handle,(((double *)(devVecY->v_))+(pitch)*j), n, beta,
(((double *)(devVecY->v_))+(pitch)*j), alpha,(((double *)(devVecX->v_))+(pitch)*j));
return(i);

@ -1616,7 +1616,6 @@ contains
real(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
res = dzero
!
! Note: this is the gpu implementation.
! When we get here, we are sure that X is of
@ -1626,7 +1625,8 @@ contains
type is (psb_d_multivect_cuda)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
info = dotMultiVecDevice(res,nr,x%deviceVect,yy%deviceVect,x%get_ncols())
allocate(res(size(x%v,2),size(y%v,2)))
info = dotMultiVecDevice(res,nr,x%deviceVect,yy%deviceVect,size(x%v,2))
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,'d_cuda_multi_dot_v')
@ -1878,6 +1878,7 @@ contains
end function d_cuda_multi_nrm2
! TODO CUDA
function d_cuda_multi_amax(nr,x) result(res)
implicit none
class(psb_d_multivect_cuda), intent(inout) :: x
@ -1894,6 +1895,7 @@ contains
end function d_cuda_multi_amax
! TODO CUDA
function d_cuda_multi_asum(nr,x) result(res)
implicit none
class(psb_d_multivect_cuda), intent(inout) :: x

@ -154,7 +154,7 @@ void spgpuDmdot(spgpuHandle_t handle, double* y, int n, __device double* a, __de
for (int i=0; i<count; ++i)
{
y[i] = spgpuDdot(handle, n, a, b);
a += pitch;
//a += pitch;
b += pitch;
}
}

@ -279,8 +279,10 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter,
! Compute i index for H operations
idx_i = (i-1)*nrhs+1
! TODO
! STEP 6: Compute H(i,j) = (V(i)**T)*W
h(idx_i:idx_i+n_add,idx_j:idx_j+n_add) = psb_gedot(v(i),w,desc_a,info)
h(idx_i:idx_i+n_add,idx_j:idx_j+n_add) = psb_geprod(v(i),w,desc_a,info,trans=.true.)
!h(idx_i:idx_i+n_add,idx_j:idx_j+n_add) = psb_gedot(v(i),w,desc_a,info)
if (info /= psb_success_) then
info=psb_err_from_subroutine_non_
call psb_errpush(info,name)
@ -362,12 +364,9 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter,
errnum = rmn2
errden = r0n2
! do col=1,nrhs
! write(*,*) rmn2(col), r0n2(col)
! end do
end if
! Check convergence (max o media)
! TODO Check convergence (max o media)
if (maxval(errnum).le.(eps*maxval(errden))) then
! Exit algorithm

@ -33,7 +33,7 @@ program psb_dbf_sample
! molds
type(psb_d_cuda_csrg_sparse_mat), target :: acsrg
type(psb_d_cuda_hlg_sparse_mat), target :: ahlg
type(psb_d_cuda_hlg_sparse_mat), target :: ahlg
type(psb_d_cuda_elg_sparse_mat), target :: aelg
class(psb_d_base_sparse_mat), pointer :: agmold, acmold
@ -60,7 +60,7 @@ program psb_dbf_sample
real(psb_dpk_), allocatable :: resmx(:), res(:,:)
real(psb_dpk_) :: resmxp
integer(psb_ipk_), allocatable :: ivg(:)
logical :: print_matrix = .true.
logical :: print_matrix = .false.
call psb_init(ctxt)
call psb_info(ctxt,iam,np)

@ -526,14 +526,14 @@ program dpdegen
character(len=40) :: kmethd = "GMRES"
character(len=40) :: ptype = "NONE"
character(len=5) :: agfmt = "CSRG"
integer(psb_ipk_) :: nrhs = 50
integer(psb_ipk_) :: nrhs = 5
integer(psb_ipk_) :: istopbg = 1
integer(psb_ipk_) :: istoprg = 2
integer(psb_ipk_) :: itmax = 500
integer(psb_ipk_) :: itrace = -1
integer(psb_ipk_) :: itrs = 100
real(psb_dpk_) :: eps = 1.d-7
integer(psb_ipk_) :: idim = 10
integer(psb_ipk_) :: idim = 20
logical :: tnd = .false.

@ -613,7 +613,6 @@ program pdegenmm
type(psb_d_cuda_hybg_sparse_mat), target :: ahybg
#endif
type(psb_d_cuda_hlg_sparse_mat), target :: ahlg
! TODO HDIAG E DNSG non hanno nemmeno CSMM
type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag
type(psb_d_cuda_dnsg_sparse_mat), target :: adnsg
#endif
@ -804,43 +803,6 @@ program pdegenmm
x1 = b_mv%get_vect()
x2 = b_mv_g%get_vect()
! TODO test AXPBY
call psb_geall(xg,desc_a,info)
call psb_geasb(xg,desc_a,info,mold=tmold)
call xg%set(done)
!call xg%sync()
call psb_geall(bg,desc_a,info)
call psb_geasb(bg,desc_a,info,mold=tmold)
!call bg%set(done+done)
do i=1,8
write(*,*) xg%v%v(i)
end do
! ! TODO: Non funziona spgpuDaxpby (axpbyMultiVecDeviceDouble)
call psb_geaxpby(done,xg,dzero,bg,desc_a,info)
call psb_cuda_DeviceSync()
write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
call bg%sync()
write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
do i=1,8
write(*,*) bg%v%v(i)
end do
return
! call x_mv_g%set(done)
! call x_mv_g%sync()
! call psb_geaxpby(done,x_mv_g,dzero,b_mv_g,desc_a,info)
! call b_mv_g%sync()
! do i=1,size(b_mv_g%v%v,1)
! write(*,*) b_mv_g%v%v(i,:)
! end do
! return
call psb_barrier(ctxt)
tt1 = psb_wtime()
do i=1,ntests

Loading…
Cancel
Save