|
|
@ -1357,17 +1357,18 @@ module psb_d_cuda_multivect_mod
|
|
|
|
procedure, pass(x) :: get_nrows => d_cuda_multi_get_nrows
|
|
|
|
procedure, pass(x) :: get_nrows => d_cuda_multi_get_nrows
|
|
|
|
procedure, pass(x) :: get_ncols => d_cuda_multi_get_ncols
|
|
|
|
procedure, pass(x) :: get_ncols => d_cuda_multi_get_ncols
|
|
|
|
procedure, nopass :: get_fmt => d_cuda_multi_get_fmt
|
|
|
|
procedure, nopass :: get_fmt => d_cuda_multi_get_fmt
|
|
|
|
|
|
|
|
! TODO
|
|
|
|
!!$ procedure, pass(x) :: dot_v => d_cuda_multi_dot_v
|
|
|
|
!!$ procedure, pass(x) :: dot_v => d_cuda_multi_dot_v
|
|
|
|
!!$ procedure, pass(x) :: dot_a => d_cuda_multi_dot_a
|
|
|
|
!!$ procedure, pass(x) :: dot_a => d_cuda_multi_dot_a
|
|
|
|
!!$ procedure, pass(y) :: axpby_v => d_cuda_multi_axpby_v
|
|
|
|
procedure, pass(y) :: axpby_v => d_cuda_multi_axpby_v
|
|
|
|
!!$ procedure, pass(y) :: axpby_a => d_cuda_multi_axpby_a
|
|
|
|
procedure, pass(y) :: axpby_a => d_cuda_multi_axpby_a
|
|
|
|
!!$ procedure, pass(y) :: mlt_v => d_cuda_multi_mlt_v
|
|
|
|
!!$ procedure, pass(y) :: mlt_v => d_cuda_multi_mlt_v
|
|
|
|
!!$ procedure, pass(y) :: mlt_a => d_cuda_multi_mlt_a
|
|
|
|
!!$ procedure, pass(y) :: mlt_a => d_cuda_multi_mlt_a
|
|
|
|
!!$ procedure, pass(z) :: mlt_a_2 => d_cuda_multi_mlt_a_2
|
|
|
|
!!$ procedure, pass(z) :: mlt_a_2 => d_cuda_multi_mlt_a_2
|
|
|
|
!!$ procedure, pass(z) :: mlt_v_2 => d_cuda_multi_mlt_v_2
|
|
|
|
!!$ procedure, pass(z) :: mlt_v_2 => d_cuda_multi_mlt_v_2
|
|
|
|
!!$ procedure, pass(x) :: scal => d_cuda_multi_scal
|
|
|
|
!!$ procedure, pass(x) :: scal => d_cuda_multi_scal
|
|
|
|
!!$ procedure, pass(x) :: nrm2 => d_cuda_multi_nrm2
|
|
|
|
procedure, pass(x) :: nrm2 => d_cuda_multi_nrm2
|
|
|
|
!!$ procedure, pass(x) :: amax => d_cuda_multi_amax
|
|
|
|
procedure, pass(x) :: amax => d_cuda_multi_amax
|
|
|
|
!!$ procedure, pass(x) :: asum => d_cuda_multi_asum
|
|
|
|
!!$ procedure, pass(x) :: asum => d_cuda_multi_asum
|
|
|
|
procedure, pass(x) :: all => d_cuda_multi_all
|
|
|
|
procedure, pass(x) :: all => d_cuda_multi_all
|
|
|
|
procedure, pass(x) :: zero => d_cuda_multi_zero
|
|
|
|
procedure, pass(x) :: zero => d_cuda_multi_zero
|
|
|
@ -1607,108 +1608,109 @@ contains
|
|
|
|
res = 'dGPU'
|
|
|
|
res = 'dGPU'
|
|
|
|
end function d_cuda_multi_get_fmt
|
|
|
|
end function d_cuda_multi_get_fmt
|
|
|
|
|
|
|
|
|
|
|
|
!!$ function d_cuda_multi_dot_v(n,x,y) result(res)
|
|
|
|
function d_cuda_multi_dot_v(n,x,y) result(res)
|
|
|
|
!!$ implicit none
|
|
|
|
implicit none
|
|
|
|
!!$ class(psb_d_multivect_cuda), intent(inout) :: x
|
|
|
|
class(psb_d_multivect_cuda), intent(inout) :: x
|
|
|
|
!!$ class(psb_d_base_multivect_type), intent(inout) :: y
|
|
|
|
class(psb_d_base_multivect_type), intent(inout) :: y
|
|
|
|
!!$ integer(psb_ipk_), intent(in) :: n
|
|
|
|
integer(psb_ipk_), intent(in) :: n
|
|
|
|
!!$ real(psb_dpk_) :: res
|
|
|
|
real(psb_dpk_), allocatable :: res(:,:)
|
|
|
|
!!$ real(psb_dpk_), external :: ddot
|
|
|
|
real(psb_dpk_), external :: ddot
|
|
|
|
!!$ integer(psb_ipk_) :: info
|
|
|
|
integer(psb_ipk_) :: info
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ res = dzero
|
|
|
|
res = dzero
|
|
|
|
!!$ !
|
|
|
|
!
|
|
|
|
!!$ ! Note: this is the gpu implementation.
|
|
|
|
! Note: this is the gpu implementation.
|
|
|
|
!!$ ! When we get here, we are sure that X is of
|
|
|
|
! When we get here, we are sure that X is of
|
|
|
|
!!$ ! TYPE psb_d_vect
|
|
|
|
! TYPE psb_d_vect
|
|
|
|
!!$ !
|
|
|
|
!
|
|
|
|
!!$ select type(yy => y)
|
|
|
|
! TODO tra
|
|
|
|
!!$ type is (psb_d_base_multivect_type)
|
|
|
|
select type(yy => y)
|
|
|
|
!!$ if (x%is_dev()) call x%sync()
|
|
|
|
type is (psb_d_multivect_cuda)
|
|
|
|
!!$ res = ddot(n,x%v,1,yy%v,1)
|
|
|
|
if (x%is_host()) call x%sync()
|
|
|
|
!!$ type is (psb_d_multivect_cuda)
|
|
|
|
if (yy%is_host()) call yy%sync()
|
|
|
|
!!$ if (x%is_host()) call x%sync()
|
|
|
|
info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect,x%get_ncols())
|
|
|
|
!!$ if (yy%is_host()) call yy%sync()
|
|
|
|
if (info /= 0) then
|
|
|
|
!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect)
|
|
|
|
info = psb_err_internal_error_
|
|
|
|
!!$ if (info /= 0) then
|
|
|
|
call psb_errpush(info,'d_cuda_multi_dot_v')
|
|
|
|
!!$ info = psb_err_internal_error_
|
|
|
|
end if
|
|
|
|
!!$ call psb_errpush(info,'d_cuda_multi_dot_v')
|
|
|
|
|
|
|
|
!!$ end if
|
|
|
|
! TODO
|
|
|
|
!!$
|
|
|
|
class default
|
|
|
|
!!$ class default
|
|
|
|
! y%sync is done in dot_a
|
|
|
|
!!$ ! y%sync is done in dot_a
|
|
|
|
call x%sync()
|
|
|
|
!!$ call x%sync()
|
|
|
|
res = y%dot(n,x%v)
|
|
|
|
!!$ res = y%dot(n,x%v)
|
|
|
|
end select
|
|
|
|
!!$ end select
|
|
|
|
|
|
|
|
!!$
|
|
|
|
end function d_cuda_multi_dot_v
|
|
|
|
!!$ end function d_cuda_multi_dot_v
|
|
|
|
|
|
|
|
!!$
|
|
|
|
! TODO
|
|
|
|
!!$ function d_cuda_multi_dot_a(n,x,y) result(res)
|
|
|
|
function d_cuda_multi_dot_a(n,x,y) result(res)
|
|
|
|
!!$ implicit none
|
|
|
|
implicit none
|
|
|
|
!!$ class(psb_d_multivect_cuda), intent(inout) :: x
|
|
|
|
class(psb_d_multivect_cuda), intent(inout) :: x
|
|
|
|
!!$ real(psb_dpk_), intent(in) :: y(:)
|
|
|
|
real(psb_dpk_), intent(in) :: y(:)
|
|
|
|
!!$ integer(psb_ipk_), intent(in) :: n
|
|
|
|
integer(psb_ipk_), intent(in) :: n
|
|
|
|
!!$ real(psb_dpk_) :: res
|
|
|
|
real(psb_dpk_), allocatable :: res(:,:)
|
|
|
|
!!$ real(psb_dpk_), external :: ddot
|
|
|
|
real(psb_dpk_), external :: ddot
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ if (x%is_dev()) call x%sync()
|
|
|
|
if (x%is_dev()) call x%sync()
|
|
|
|
!!$ res = ddot(n,y,1,x%v,1)
|
|
|
|
allocate(res(2,2))
|
|
|
|
!!$
|
|
|
|
res = ddot(n,y,1,x%v,1)
|
|
|
|
!!$ end function d_cuda_multi_dot_a
|
|
|
|
|
|
|
|
!!$
|
|
|
|
end function d_cuda_multi_dot_a
|
|
|
|
!!$ subroutine d_cuda_multi_axpby_v(m,alpha, x, beta, y, info)
|
|
|
|
|
|
|
|
!!$ use psi_serial_mod
|
|
|
|
subroutine d_cuda_multi_axpby_v(m,alpha, x, beta, y, info, n)
|
|
|
|
!!$ implicit none
|
|
|
|
use psi_serial_mod
|
|
|
|
!!$ integer(psb_ipk_), intent(in) :: m
|
|
|
|
implicit none
|
|
|
|
!!$ class(psb_d_base_multivect_type), intent(inout) :: x
|
|
|
|
integer(psb_ipk_), intent(in) :: m
|
|
|
|
!!$ class(psb_d_multivect_cuda), intent(inout) :: y
|
|
|
|
class(psb_d_base_multivect_type), intent(inout) :: x
|
|
|
|
!!$ real(psb_dpk_), intent (in) :: alpha, beta
|
|
|
|
class(psb_d_multivect_cuda), intent(inout) :: y
|
|
|
|
!!$ integer(psb_ipk_), intent(out) :: info
|
|
|
|
real(psb_dpk_), intent (in) :: alpha, beta
|
|
|
|
!!$ integer(psb_ipk_) :: nx, ny
|
|
|
|
integer(psb_ipk_), intent(out) :: info
|
|
|
|
!!$
|
|
|
|
integer(psb_ipk_), intent(in), optional :: n
|
|
|
|
!!$ info = psb_success_
|
|
|
|
integer(psb_ipk_) :: nc, nx, ny
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ select type(xx => x)
|
|
|
|
info = psb_success_
|
|
|
|
!!$ type is (psb_d_base_multivect_type)
|
|
|
|
select type(xx => x)
|
|
|
|
!!$ if ((beta /= dzero).and.(y%is_dev()))&
|
|
|
|
type is (psb_d_multivect_cuda)
|
|
|
|
!!$ & call y%sync()
|
|
|
|
if ((beta /= dzero).and.(y%is_host())) call y%sync()
|
|
|
|
!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info)
|
|
|
|
if (xx%is_host()) call xx%sync()
|
|
|
|
!!$ call y%set_host()
|
|
|
|
nx = getMultiVecDeviceSize(xx%deviceVect)
|
|
|
|
!!$ type is (psb_d_multivect_cuda)
|
|
|
|
ny = getMultiVecDeviceSize(y%deviceVect)
|
|
|
|
!!$ ! Do something different here
|
|
|
|
if ((nx<m).or.(ny<m)) then
|
|
|
|
!!$ if ((beta /= dzero).and.y%is_host())&
|
|
|
|
info = psb_err_internal_error_
|
|
|
|
!!$ & call y%sync()
|
|
|
|
else
|
|
|
|
!!$ if (xx%is_host()) call xx%sync()
|
|
|
|
info = axpbyMultiVecDevice(m,alpha,xx%deviceVect,beta,y%deviceVect)
|
|
|
|
!!$ nx = getMultiVecDeviceSize(xx%deviceVect)
|
|
|
|
end if
|
|
|
|
!!$ ny = getMultiVecDeviceSize(y%deviceVect)
|
|
|
|
call y%set_dev()
|
|
|
|
!!$ if ((nx<m).or.(ny<m)) then
|
|
|
|
class default
|
|
|
|
!!$ info = psb_err_internal_error_
|
|
|
|
! Do it on the host side
|
|
|
|
!!$ info = psb_err_internal_error_
|
|
|
|
if ((alpha /= dzero).and.(x%is_dev())) call x%sync()
|
|
|
|
!!$ else
|
|
|
|
call y%axpby(m,alpha,x%v,beta,info,n=n)
|
|
|
|
!!$ info = axpbyMultiVecDevice(m,alpha,xx%deviceVect,beta,y%deviceVect)
|
|
|
|
end select
|
|
|
|
!!$ end if
|
|
|
|
|
|
|
|
!!$ call y%set_dev()
|
|
|
|
end subroutine d_cuda_multi_axpby_v
|
|
|
|
!!$ class default
|
|
|
|
|
|
|
|
!!$ call x%sync()
|
|
|
|
subroutine d_cuda_multi_axpby_a(m,alpha, x, beta, y, info, n)
|
|
|
|
!!$ call y%axpby(m,alpha,x%v,beta,info)
|
|
|
|
use psi_serial_mod
|
|
|
|
!!$ end select
|
|
|
|
implicit none
|
|
|
|
!!$
|
|
|
|
integer(psb_ipk_), intent(in) :: m
|
|
|
|
!!$ end subroutine d_cuda_multi_axpby_v
|
|
|
|
real(psb_dpk_), intent(in) :: x(:,:)
|
|
|
|
!!$
|
|
|
|
class(psb_d_multivect_cuda), intent(inout) :: y
|
|
|
|
!!$ subroutine d_cuda_multi_axpby_a(m,alpha, x, beta, y, info)
|
|
|
|
real(psb_dpk_), intent (in) :: alpha, beta
|
|
|
|
!!$ use psi_serial_mod
|
|
|
|
integer(psb_ipk_), intent(out) :: info
|
|
|
|
!!$ implicit none
|
|
|
|
integer(psb_ipk_), intent(in), optional :: n
|
|
|
|
!!$ integer(psb_ipk_), intent(in) :: m
|
|
|
|
integer(psb_ipk_) :: nc
|
|
|
|
!!$ real(psb_dpk_), intent(in) :: x(:)
|
|
|
|
|
|
|
|
!!$ class(psb_d_multivect_cuda), intent(inout) :: y
|
|
|
|
if (present(n)) then
|
|
|
|
!!$ real(psb_dpk_), intent (in) :: alpha, beta
|
|
|
|
nc = n
|
|
|
|
!!$ integer(psb_ipk_), intent(out) :: info
|
|
|
|
else
|
|
|
|
!!$
|
|
|
|
nc = min(size(x,2),size(y%v,2))
|
|
|
|
!!$ if (y%is_dev()) call y%sync()
|
|
|
|
end if
|
|
|
|
!!$ call psb_geaxpby(m,alpha,x,beta,y%v,info)
|
|
|
|
if ((beta /= dzero).and.(y%is_dev())) call y%sync()
|
|
|
|
!!$ call y%set_host()
|
|
|
|
call psb_geaxpby(m,nc,alpha,x,beta,y%v,info)
|
|
|
|
!!$ end subroutine d_cuda_multi_axpby_a
|
|
|
|
call y%set_host()
|
|
|
|
!!$
|
|
|
|
end subroutine d_cuda_multi_axpby_a
|
|
|
|
|
|
|
|
|
|
|
|
!!$ subroutine d_cuda_multi_mlt_v(x, y, info)
|
|
|
|
!!$ subroutine d_cuda_multi_mlt_v(x, y, info)
|
|
|
|
!!$ use psi_serial_mod
|
|
|
|
!!$ use psi_serial_mod
|
|
|
|
!!$ implicit none
|
|
|
|
!!$ implicit none
|
|
|
@ -1860,41 +1862,50 @@ contains
|
|
|
|
!!$ call x%psb_d_base_multivect_type%scal(alpha)
|
|
|
|
!!$ call x%psb_d_base_multivect_type%scal(alpha)
|
|
|
|
!!$ call x%set_host()
|
|
|
|
!!$ call x%set_host()
|
|
|
|
!!$ end subroutine d_cuda_multi_scal
|
|
|
|
!!$ end subroutine d_cuda_multi_scal
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$
|
|
|
|
function d_cuda_multi_nrm2(nr,x) result(res)
|
|
|
|
!!$ function d_cuda_multi_nrm2(n,x) result(res)
|
|
|
|
implicit none
|
|
|
|
!!$ implicit none
|
|
|
|
class(psb_d_multivect_cuda), intent(inout) :: x
|
|
|
|
!!$ class(psb_d_multivect_cuda), intent(inout) :: x
|
|
|
|
integer(psb_ipk_), intent(in) :: nr
|
|
|
|
!!$ integer(psb_ipk_), intent(in) :: n
|
|
|
|
real(psb_dpk_), allocatable :: res(:)
|
|
|
|
!!$ real(psb_dpk_) :: res
|
|
|
|
integer(psb_ipk_) :: info
|
|
|
|
!!$ integer(psb_ipk_) :: info
|
|
|
|
! WARNING: this should be changed.
|
|
|
|
!!$ ! WARNING: this should be changed.
|
|
|
|
if (x%is_host()) call x%sync()
|
|
|
|
!!$ if (x%is_host()) call x%sync()
|
|
|
|
allocate(res(x%get_ncols()))
|
|
|
|
!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect)
|
|
|
|
info = nrm2MultiVecDevice(res,nr,x%deviceVect)
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ end function d_cuda_multi_nrm2
|
|
|
|
end function d_cuda_multi_nrm2
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ function d_cuda_multi_amax(n,x) result(res)
|
|
|
|
function d_cuda_multi_amax(nr,x) result(res)
|
|
|
|
!!$ implicit none
|
|
|
|
implicit none
|
|
|
|
!!$ class(psb_d_multivect_cuda), intent(inout) :: x
|
|
|
|
class(psb_d_multivect_cuda), intent(inout) :: x
|
|
|
|
!!$ integer(psb_ipk_), intent(in) :: n
|
|
|
|
integer(psb_ipk_), intent(in) :: nr
|
|
|
|
!!$ real(psb_dpk_) :: res
|
|
|
|
real(psb_dpk_) :: res
|
|
|
|
!!$
|
|
|
|
integer(psb_ipk_) :: i, nc
|
|
|
|
!!$ if (x%is_dev()) call x%sync()
|
|
|
|
|
|
|
|
!!$ res = maxval(abs(x%v(1:n)))
|
|
|
|
if (x%is_dev()) call x%sync()
|
|
|
|
!!$
|
|
|
|
nc = x%get_ncols()
|
|
|
|
!!$ end function d_cuda_multi_amax
|
|
|
|
res = 0
|
|
|
|
!!$
|
|
|
|
do i=1,nr
|
|
|
|
!!$ function d_cuda_multi_asum(n,x) result(res)
|
|
|
|
res = max(res,sum(abs(x%v(i,1:nc))))
|
|
|
|
!!$ implicit none
|
|
|
|
end do
|
|
|
|
!!$ class(psb_d_multivect_cuda), intent(inout) :: x
|
|
|
|
|
|
|
|
!!$ integer(psb_ipk_), intent(in) :: n
|
|
|
|
end function d_cuda_multi_amax
|
|
|
|
!!$ real(psb_dpk_) :: res
|
|
|
|
|
|
|
|
!!$
|
|
|
|
function d_cuda_multi_asum(nr,x) result(res)
|
|
|
|
!!$ if (x%is_dev()) call x%sync()
|
|
|
|
implicit none
|
|
|
|
!!$ res = sum(abs(x%v(1:n)))
|
|
|
|
class(psb_d_multivect_cuda), intent(inout) :: x
|
|
|
|
!!$
|
|
|
|
integer(psb_ipk_), intent(in) :: nr
|
|
|
|
!!$ end function d_cuda_multi_asum
|
|
|
|
real(psb_dpk_) :: res
|
|
|
|
|
|
|
|
integer(psb_ipk_) :: j
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (x%is_dev()) call x%sync()
|
|
|
|
|
|
|
|
res = 0
|
|
|
|
|
|
|
|
do j=1,x%get_ncols()
|
|
|
|
|
|
|
|
res = max(res,sum(abs(x%v(1:nr,j))))
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
end function d_cuda_multi_asum
|
|
|
|
|
|
|
|
|
|
|
|
subroutine d_cuda_multi_all(m,n, x, info)
|
|
|
|
subroutine d_cuda_multi_all(m,n, x, info)
|
|
|
|
use psi_serial_mod
|
|
|
|
use psi_serial_mod
|
|
|
|