|
|
@ -596,7 +596,7 @@ program pdegenmm
|
|
|
|
! solver parameters
|
|
|
|
! solver parameters
|
|
|
|
integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes
|
|
|
|
integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes
|
|
|
|
real(psb_dpk_) :: err, eps
|
|
|
|
real(psb_dpk_) :: err, eps
|
|
|
|
integer, parameter :: ntests=1, ngpu=50, ncnv=20
|
|
|
|
integer, parameter :: ntests=50, ngpu=50, ncnv=20
|
|
|
|
type(psb_d_coo_sparse_mat), target :: acoo
|
|
|
|
type(psb_d_coo_sparse_mat), target :: acoo
|
|
|
|
type(psb_d_csr_sparse_mat), target :: acsr
|
|
|
|
type(psb_d_csr_sparse_mat), target :: acsr
|
|
|
|
type(psb_d_ell_sparse_mat), target :: aell
|
|
|
|
type(psb_d_ell_sparse_mat), target :: aell
|
|
|
@ -612,7 +612,9 @@ program pdegenmm
|
|
|
|
#if CUDA_SHORT_VERSION <= 10
|
|
|
|
#if CUDA_SHORT_VERSION <= 10
|
|
|
|
type(psb_d_cuda_hybg_sparse_mat), target :: ahybg
|
|
|
|
type(psb_d_cuda_hybg_sparse_mat), target :: ahybg
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
! TODO HLG da fare (complesso)
|
|
|
|
type(psb_d_cuda_hlg_sparse_mat), target :: ahlg
|
|
|
|
type(psb_d_cuda_hlg_sparse_mat), target :: ahlg
|
|
|
|
|
|
|
|
! TODO HDIAG E DNSG non hanno nemmeno CSMM
|
|
|
|
type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag
|
|
|
|
type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag
|
|
|
|
type(psb_d_cuda_dnsg_sparse_mat), target :: adnsg
|
|
|
|
type(psb_d_cuda_dnsg_sparse_mat), target :: adnsg
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
@ -624,9 +626,7 @@ program pdegenmm
|
|
|
|
character(len=20) :: name,ch_err
|
|
|
|
character(len=20) :: name,ch_err
|
|
|
|
character(len=40) :: fname
|
|
|
|
character(len=40) :: fname
|
|
|
|
|
|
|
|
|
|
|
|
real(psb_dpk_), allocatable :: test(:,:), test1(:,:), test2(:)
|
|
|
|
real(psb_dpk_) :: random_value
|
|
|
|
|
|
|
|
|
|
|
|
type(c_ptr) :: gpx, gpy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
info=psb_success_
|
|
|
|
info=psb_success_
|
|
|
|
|
|
|
|
|
|
|
@ -804,30 +804,37 @@ program pdegenmm
|
|
|
|
! FIXME: cache flush needed here
|
|
|
|
! FIXME: cache flush needed here
|
|
|
|
x1 = b_mv%get_vect()
|
|
|
|
x1 = b_mv%get_vect()
|
|
|
|
x2 = b_mv_g%get_vect()
|
|
|
|
x2 = b_mv_g%get_vect()
|
|
|
|
write(*,*)
|
|
|
|
|
|
|
|
|
|
|
|
! ! TODO test AXPBY
|
|
|
|
|
|
|
|
call psb_geall(xg,desc_a,info)
|
|
|
|
|
|
|
|
call psb_geasb(xg,desc_a,info,mold=tmold)
|
|
|
|
|
|
|
|
call xg%set(done)
|
|
|
|
|
|
|
|
call xg%sync()
|
|
|
|
|
|
|
|
call psb_geall(bg,desc_a,info)
|
|
|
|
|
|
|
|
call psb_geasb(bg,desc_a,info,mold=tmold)
|
|
|
|
|
|
|
|
!call bg%set(done+done)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! ! TODO: Non funziona spgpuDaxpby (axpbyMultiVecDeviceDouble)
|
|
|
|
|
|
|
|
call psb_geaxpby(done,xg,dzero,bg,desc_a,info)
|
|
|
|
|
|
|
|
call psb_cuda_DeviceSync()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
|
|
|
|
|
|
|
|
call bg%sync()
|
|
|
|
|
|
|
|
write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
|
|
|
|
do i=1,8
|
|
|
|
do i=1,8
|
|
|
|
write(*,*) x1(i,:)
|
|
|
|
write(*,*) bg%v%v(i)
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
|
|
|
|
|
|
|
|
! TODO test AXPBY e SPMM
|
|
|
|
return
|
|
|
|
! call psb_geall(xg,desc_a,info)
|
|
|
|
|
|
|
|
! call psb_geasb(xg,desc_a,info,mold=tmold)
|
|
|
|
|
|
|
|
! call xg%set(done)
|
|
|
|
|
|
|
|
! call xg%sync()
|
|
|
|
|
|
|
|
! call psb_geall(bg,desc_a,info)
|
|
|
|
|
|
|
|
! call psb_geasb(bg,desc_a,info,mold=tmold)
|
|
|
|
|
|
|
|
! call bg%set(done+done)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! ! TODO: Non funziona spgpuDaxpby (axpbyMultiVecDeviceDouble)
|
|
|
|
! call x_mv_g%set(done)
|
|
|
|
! call psb_geaxpby(done,xg,dzero,bg,desc_a,info)
|
|
|
|
! call x_mv_g%sync()
|
|
|
|
! call psb_cuda_DeviceSync()
|
|
|
|
|
|
|
|
! call psb_barrier(ctxt)
|
|
|
|
! call psb_geaxpby(done,x_mv_g,dzero,b_mv_g,desc_a,info)
|
|
|
|
|
|
|
|
|
|
|
|
! write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
|
|
|
|
! call b_mv_g%sync()
|
|
|
|
! call bg%sync()
|
|
|
|
! do i=1,size(b_mv_g%v%v,1)
|
|
|
|
! write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
|
|
|
|
! write(*,*) b_mv_g%v%v(i,:)
|
|
|
|
! do i=1,8
|
|
|
|
|
|
|
|
! write(*,*) bg%v%v(i)
|
|
|
|
|
|
|
|
! end do
|
|
|
|
! end do
|
|
|
|
|
|
|
|
|
|
|
|
! return
|
|
|
|
! return
|
|
|
@ -848,10 +855,6 @@ program pdegenmm
|
|
|
|
call psb_amx(ctxt,tt2)
|
|
|
|
call psb_amx(ctxt,tt2)
|
|
|
|
x1 = b_mv%get_vect()
|
|
|
|
x1 = b_mv%get_vect()
|
|
|
|
x2 = b_mv_g%get_vect()
|
|
|
|
x2 = b_mv_g%get_vect()
|
|
|
|
write(*,*)
|
|
|
|
|
|
|
|
do i=1,size(x2,1)
|
|
|
|
|
|
|
|
write(*,*) x2(i,:)
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
nr = desc_a%get_local_rows()
|
|
|
|
nr = desc_a%get_local_rows()
|
|
|
|
eps = maxval(abs(x1(1:nr,1:nrhs)-x2(1:nr,1:nrhs)))
|
|
|
|
eps = maxval(abs(x1(1:nr,1:nrhs)-x2(1:nr,1:nrhs)))
|
|
|
|
call psb_amx(ctxt,eps)
|
|
|
|
call psb_amx(ctxt,eps)
|
|
|
@ -879,10 +882,6 @@ program pdegenmm
|
|
|
|
call b_mv_g%sync()
|
|
|
|
call b_mv_g%sync()
|
|
|
|
x1 = b_mv%get_vect()
|
|
|
|
x1 = b_mv%get_vect()
|
|
|
|
x2 = b_mv_g%get_vect()
|
|
|
|
x2 = b_mv_g%get_vect()
|
|
|
|
write(*,*)
|
|
|
|
|
|
|
|
do i=1,size(x2,1)
|
|
|
|
|
|
|
|
write(*,*) x2(i,:)
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
call psb_geaxpby(-done,b_mv_g,+done,b_mv,desc_a,info)
|
|
|
|
call psb_geaxpby(-done,b_mv_g,+done,b_mv,desc_a,info)
|
|
|
|
eps = psb_geamax(b_mv,desc_a,info)
|
|
|
|
eps = psb_geamax(b_mv,desc_a,info)
|
|
|
|
|
|
|
|
|
|
|
|