You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psblas3/openacc/psb_d_oacc_vect_mod.F90

965 lines
29 KiB
Fortran

module psb_d_oacc_vect_mod
use iso_c_binding
use psb_const_mod
use psb_error_mod
use psb_d_vect_mod
use psb_i_vect_mod
use psb_i_oacc_vect_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
integer(psb_ipk_), parameter, private :: is_dev = 1
type, extends(psb_d_base_vect_type) :: psb_d_vect_oacc
integer :: state = is_host
contains
!!$ procedure, pass(x) :: get_nrows => d_oacc_get_nrows
!!$ procedure, nopass :: get_fmt => d_oacc_get_fmt
!!$
!!$ procedure, pass(x) :: all => d_oacc_vect_all
!!$ procedure, pass(x) :: zero => d_oacc_zero
!!$ procedure, pass(x) :: asb_m => d_oacc_asb_m
procedure, pass(x) :: sync => d_oacc_sync
procedure, pass(x) :: sync_space => d_oacc_sync_space
!!$ procedure, pass(x) :: bld_x => d_oacc_bld_x
!!$ procedure, pass(x) :: bld_mn => d_oacc_bld_mn
!!$ procedure, pass(x) :: free => d_oacc_vect_free
!!$ procedure, pass(x) :: ins_a => d_oacc_ins_a
!!$ procedure, pass(x) :: ins_v => d_oacc_ins_v
procedure, pass(x) :: is_host => d_oacc_is_host
procedure, pass(x) :: is_dev => d_oacc_is_dev
procedure, pass(x) :: is_sync => d_oacc_is_sync
procedure, pass(x) :: set_host => d_oacc_set_host
procedure, pass(x) :: set_dev => d_oacc_set_dev
procedure, pass(x) :: set_sync => d_oacc_set_sync
!!$ procedure, pass(x) :: set_scal => d_oacc_set_scal
!!$
!!$ procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
!!$ procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf
!!$ procedure, pass(y) :: sctb => d_oacc_sctb
!!$ procedure, pass(y) :: sctb_x => d_oacc_sctb_x
!!$ procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
!!$
!!$ procedure, pass(x) :: get_size => oacc_get_size
!!$ procedure, pass(x) :: dot_v => d_oacc_vect_dot
!!$ procedure, pass(x) :: dot_a => d_oacc_dot_a
!!$ procedure, pass(y) :: axpby_v => d_oacc_axpby_v
!!$ procedure, pass(y) :: axpby_a => d_oacc_axpby_a
!!$ procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
!!$ procedure, pass(y) :: mlt_v => d_oacc_mlt_v
!!$ procedure, pass(y) :: mlt_a => d_oacc_mlt_a
!!$ procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
!!$ procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
!!$ procedure, pass(x) :: scal => d_oacc_scal
!!$ procedure, pass(x) :: nrm2 => d_oacc_nrm2
!!$ procedure, pass(x) :: amax => d_oacc_amax
!!$ procedure, pass(x) :: asum => d_oacc_asum
procedure, pass(x) :: absval1 => d_oacc_absval1
!!$ procedure, pass(x) :: absval2 => d_oacc_absval2
end type psb_d_vect_oacc
real(psb_dpk_), allocatable :: v1(:),v2(:),p(:)
contains
subroutine d_oacc_absval1(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: n, i
if (x%is_host()) call x%sync_space()
n = size(x%v)
!$acc parallel loop
do i = 1, n
x%v(i) = abs(x%v(i))
end do
call x%set_dev()
end subroutine d_oacc_absval1
!!$ subroutine d_oacc_absval2(x, y)
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ class(psb_d_base_vect_type), intent(inout) :: y
!!$ integer(psb_ipk_) :: n
!!$ integer(psb_ipk_) :: i
!!$
!!$ n = min(size(x%v), size(y%v))
!!$ select type (yy => y)
!!$ class is (psb_d_vect_oacc)
!!$ if (x%is_host()) call x%sync()
!!$ if (yy%is_host()) call yy%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ yy%v(i) = abs(x%v(i))
!!$ end do
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ call x%psb_d_base_vect_type%absval(y)
!!$ end select
!!$ end subroutine d_oacc_absval2
!!$
!!$ subroutine d_oacc_scal(alpha, x)
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ real(psb_dpk_), intent(in) :: alpha
!!$ integer(psb_ipk_) :: info
!!$ integer(psb_ipk_) :: i
!!$
!!$ if (x%is_host()) call x%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, size(x%v)
!!$ x%v(i) = alpha * x%v(i)
!!$ end do
!!$ call x%set_dev()
!!$ end subroutine d_oacc_scal
!!$
!!$ function d_oacc_nrm2(n, x) result(res)
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ integer(psb_ipk_), intent(in) :: n
!!$ real(psb_dpk_) :: res
!!$ integer(psb_ipk_) :: info
!!$ real(psb_dpk_) :: sum
!!$ integer(psb_ipk_) :: i
!!$
!!$ if (x%is_host()) call x%sync_space()
!!$ sum = 0.0
!!$ !$acc parallel loop reduction(+:sum)
!!$ do i = 1, n
!!$ sum = sum + x%v(i) * x%v(i)
!!$ end do
!!$ res = sqrt(sum)
!!$ end function d_oacc_nrm2
!!$
!!$ function d_oacc_amax(n, x) result(res)
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ integer(psb_ipk_), intent(in) :: n
!!$ real(psb_dpk_) :: res
!!$ integer(psb_ipk_) :: info
!!$ real(psb_dpk_) :: max_val
!!$ integer(psb_ipk_) :: i
!!$
!!$ if (x%is_host()) call x%sync_space()
!!$ max_val = -huge(0.0)
!!$ !$acc parallel loop reduction(max:max_val)
!!$ do i = 1, n
!!$ if (x%v(i) > max_val) max_val = x%v(i)
!!$ end do
!!$ res = max_val
!!$ end function d_oacc_amax
!!$
!!$ function d_oacc_asum(n, x) result(res)
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ integer(psb_ipk_), intent(in) :: n
!!$ real(psb_dpk_) :: res
!!$ integer(psb_ipk_) :: info
!!$ real(psb_dpk_) :: sum
!!$ integer(psb_ipk_) :: i
!!$
!!$ if (x%is_host()) call x%sync_space()
!!$ sum = 0.0
!!$ !$acc parallel loop reduction(+:sum)
!!$ do i = 1, n
!!$ sum = sum + abs(x%v(i))
!!$ end do
!!$ res = sum
!!$ end function d_oacc_asum
!!$
!!$
!!$ subroutine d_oacc_mlt_v(x, y, info)
!!$ use psi_serial_mod
!!$ implicit none
!!$ class(psb_d_base_vect_type), intent(inout) :: x
!!$ class(psb_d_vect_oacc), intent(inout) :: y
!!$ integer(psb_ipk_), intent(out) :: info
!!$
!!$ integer(psb_ipk_) :: i, n
!!$
!!$ info = 0
!!$ n = min(x%get_nrows(), y%get_nrows())
!!$ select type(xx => x)
!!$ type is (psb_d_base_vect_type)
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ end select
!!$ end subroutine d_oacc_mlt_v
!!$
!!$ subroutine d_oacc_mlt_a(x, y, info)
!!$ use psi_serial_mod
!!$ implicit none
!!$ real(psb_dpk_), intent(in) :: x(:)
!!$ class(psb_d_vect_oacc), intent(inout) :: y
!!$ integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_) :: i, n
!!$
!!$ info = 0
!!$ if (y%is_dev()) call y%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, size(x)
!!$ y%v(i) = y%v(i) * x(i)
!!$ end do
!!$ call y%set_host()
!!$ end subroutine d_oacc_mlt_a
!!$
!!$ subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info)
!!$ use psi_serial_mod
!!$ implicit none
!!$ real(psb_dpk_), intent(in) :: alpha, beta
!!$ real(psb_dpk_), intent(in) :: x(:)
!!$ real(psb_dpk_), intent(in) :: y(:)
!!$ class(psb_d_vect_oacc), intent(inout) :: z
!!$ integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_) :: i, n
!!$
!!$ info = 0
!!$ if (z%is_dev()) call z%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, size(x)
!!$ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end subroutine d_oacc_mlt_a_2
!!$
!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
!!$ use psi_serial_mod
!!$ use psb_string_mod
!!$ implicit none
!!$ real(psb_dpk_), intent(in) :: alpha, beta
!!$ class(psb_d_base_vect_type), intent(inout) :: x
!!$ class(psb_d_base_vect_type), intent(inout) :: y
!!$ class(psb_d_vect_oacc), intent(inout) :: z
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character(len=1), intent(in), optional :: conjgx, conjgy
!!$ integer(psb_ipk_) :: i, n
!!$ logical :: conjgx_, conjgy_
!!$
!!$ conjgx_ = .false.
!!$ conjgy_ = .false.
!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
!!$
!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
!!$
!!$ info = 0
!!$ select type(xx => x)
!!$ class is (psb_d_vect_oacc)
!!$ select type (yy => y)
!!$ class is (psb_d_vect_oacc)
!!$ if (xx%is_host()) call xx%sync_space()
!!$ if (yy%is_host()) call yy%sync_space()
!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_dev()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync_space()
!!$ if (yy%is_dev()) call yy%sync()
!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ end subroutine d_oacc_mlt_v_2
!!$
!!$
!!$ subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
!!$ use psi_serial_mod
!!$ implicit none
!!$ integer(psb_ipk_), intent(in) :: m
!!$ class(psb_d_base_vect_type), intent(inout) :: x
!!$ class(psb_d_vect_oacc), intent(inout) :: y
!!$ real(psb_dpk_), intent(in) :: alpha, beta
!!$ integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_) :: nx, ny, i
!!$
!!$ info = psb_success_
!!$
!!$ select type(xx => x)
!!$ type is (psb_d_vect_oacc)
!!$ if ((beta /= dzero) .and. y%is_host()) call y%sync_space()
!!$ if (xx%is_host()) call xx%sync_space()
!!$ nx = size(xx%v)
!!$ ny = size(y%v)
!!$ if ((nx < m) .or. (ny < m)) then
!!$ info = psb_err_internal_error_
!!$ else
!!$ !$acc parallel loop
!!$ do i = 1, m
!!$ y%v(i) = alpha * xx%v(i) + beta * y%v(i)
!!$ end do
!!$ end if
!!$ call y%set_dev()
!!$ class default
!!$ if ((alpha /= dzero) .and. (x%is_dev())) call x%sync()
!!$ call y%axpby(m, alpha, x%v, beta, info)
!!$ end select
!!$ end subroutine d_oacc_axpby_v
!!$
!!$ subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
!!$ use psi_serial_mod
!!$ implicit none
!!$ integer(psb_ipk_), intent(in) :: m
!!$ real(psb_dpk_), intent(in) :: x(:)
!!$ class(psb_d_vect_oacc), intent(inout) :: y
!!$ real(psb_dpk_), intent(in) :: alpha, beta
!!$ integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_) :: i
!!$
!!$ if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, m
!!$ y%v(i) = alpha * x(i) + beta * y%v(i)
!!$ end do
!!$ call y%set_host()
!!$ end subroutine d_oacc_axpby_a
!!$
!!$ subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
!!$ use psi_serial_mod
!!$ implicit none
!!$ integer(psb_ipk_), intent(in) :: m
!!$ class(psb_d_base_vect_type), intent(inout) :: x
!!$ class(psb_d_base_vect_type), intent(inout) :: y
!!$ class(psb_d_vect_oacc), intent(inout) :: z
!!$ real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta
!!$ integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_) :: nx, ny, nz, i
!!$ logical :: gpu_done
!!$
!!$ info = psb_success_
!!$ gpu_done = .false.
!!$
!!$ select type(xx => x)
!!$ class is (psb_d_vect_oacc)
!!$ select type(yy => y)
!!$ class is (psb_d_vect_oacc)
!!$ select type(zz => z)
!!$ class is (psb_d_vect_oacc)
!!$ if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space()
!!$ if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space()
!!$ if (xx%is_host()) call xx%sync_space()
!!$ nx = size(xx%v)
!!$ ny = size(yy%v)
!!$ nz = size(zz%v)
!!$ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
!!$ info = psb_err_internal_error_
!!$ else
!!$ !$acc parallel loop
!!$ do i = 1, m
!!$ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
!!$ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
!!$ end do
!!$ end if
!!$ call yy%set_dev()
!!$ call zz%set_dev()
!!$ gpu_done = .true.
!!$ end select
!!$ end select
!!$ end select
!!$
!!$ if (.not. gpu_done) then
!!$ if (x%is_host()) call x%sync()
!!$ if (y%is_host()) call y%sync()
!!$ if (z%is_host()) call z%sync()
!!$ call y%axpby(m, alpha, x, beta, info)
!!$ call z%axpby(m, gamma, y, delta, info)
!!$ end if
!!$ end subroutine d_oacc_abgdxyz
!!$
!!$
!!$ subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
!!$ use psb_base_mod
!!$ implicit none
!!$ integer(psb_ipk_) :: i, n
!!$ class(psb_i_base_vect_type) :: idx
!!$ real(psb_dpk_) :: beta
!!$ class(psb_d_vect_oacc) :: y
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (.not.allocated(y%combuf)) then
!!$ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
!!$ return
!!$ end if
!!$
!!$ select type(ii => idx)
!!$ class is (psb_i_vect_oacc)
!!$ if (ii%is_host()) call ii%sync_space(info)
!!$ if (y%is_host()) call y%sync_space()
!!$
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
!!$ end do
!!$
!!$ class default
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
!!$ end do
!!$ end select
!!$ end subroutine d_oacc_sctb_buf
!!$
!!$ subroutine d_oacc_sctb_x(i, n, idx, x, beta, y)
!!$ use psb_base_mod
!!$ implicit none
!!$ integer(psb_ipk_):: i, n
!!$ class(psb_i_base_vect_type) :: idx
!!$ real(psb_dpk_) :: beta, x(:)
!!$ class(psb_d_vect_oacc) :: y
!!$ integer(psb_ipk_) :: info, ni
!!$
!!$ select type(ii => idx)
!!$ class is (psb_i_vect_oacc)
!!$ if (ii%is_host()) call ii%sync_space(info)
!!$ class default
!!$ call psb_errpush(info, 'd_oacc_sctb_x')
!!$ return
!!$ end select
!!$
!!$ if (y%is_host()) call y%sync_space()
!!$
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
!!$ end do
!!$
!!$ call y%set_dev()
!!$ end subroutine d_oacc_sctb_x
!!$
!!$
!!$
!!$ subroutine d_oacc_sctb(n, idx, x, beta, y)
!!$ use psb_base_mod
!!$ implicit none
!!$ integer(psb_ipk_) :: n
!!$ integer(psb_ipk_) :: idx(:)
!!$ real(psb_dpk_) :: beta, x(:)
!!$ class(psb_d_vect_oacc) :: y
!!$ integer(psb_ipk_) :: info
!!$ integer(psb_ipk_) :: i
!!$
!!$ if (n == 0) return
!!$ if (y%is_dev()) call y%sync_space()
!!$
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
!!$ end do
!!$
!!$ call y%set_host()
!!$ end subroutine d_oacc_sctb
!!$
!!$
!!$ subroutine d_oacc_gthzbuf(i, n, idx, x)
!!$ use psb_base_mod
!!$ implicit none
!!$ integer(psb_ipk_) :: i, n
!!$ class(psb_i_base_vect_type) :: idx
!!$ class(psb_d_vect_oacc) :: x
!!$ integer(psb_ipk_) :: info
!!$
!!$ info = 0
!!$ if (.not.allocated(x%combuf)) then
!!$ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
!!$ return
!!$ end if
!!$
!!$ select type(ii => idx)
!!$ class is (psb_i_vect_oacc)
!!$ if (ii%is_host()) call ii%sync_space(info)
!!$ class default
!!$ call psb_errpush(info, 'd_oacc_gthzbuf')
!!$ return
!!$ end select
!!$
!!$ if (x%is_host()) call x%sync_space()
!!$
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ x%combuf(i) = x%v(idx%v(i))
!!$ end do
!!$ end subroutine d_oacc_gthzbuf
!!$
!!$ subroutine d_oacc_gthzv_x(i, n, idx, x, y)
!!$ use psb_base_mod
!!$ implicit none
!!$ integer(psb_ipk_) :: i, n
!!$ class(psb_i_base_vect_type):: idx
!!$ real(psb_dpk_) :: y(:)
!!$ class(psb_d_vect_oacc):: x
!!$ integer(psb_ipk_) :: info
!!$
!!$ info = 0
!!$
!!$ select type(ii => idx)
!!$ class is (psb_i_vect_oacc)
!!$ if (ii%is_host()) call ii%sync_space(info)
!!$ class default
!!$ call psb_errpush(info, 'd_oacc_gthzv_x')
!!$ return
!!$ end select
!!$
!!$ if (x%is_host()) call x%sync_space()
!!$
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y(i) = x%v(idx%v(i))
!!$ end do
!!$ end subroutine d_oacc_gthzv_x
!!$
!!$ subroutine d_oacc_ins_v(n, irl, val, dupl, x, info)
!!$ use psi_serial_mod
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ integer(psb_ipk_), intent(in) :: n, dupl
!!$ class(psb_i_base_vect_type), intent(inout) :: irl
!!$ class(psb_d_base_vect_type), intent(inout) :: val
!!$ integer(psb_ipk_), intent(out) :: info
!!$
!!$ integer(psb_ipk_) :: i, isz
!!$ logical :: done_oacc
!!$
!!$ info = 0
!!$ if (psb_errstatus_fatal()) return
!!$
!!$ done_oacc = .false.
!!$ select type(virl => irl)
!!$ type is (psb_i_vect_oacc)
!!$ select type(vval => val)
!!$ type is (psb_d_vect_oacc)
!!$ if (vval%is_host()) call vval%sync_space()
!!$ if (virl%is_host()) call virl%sync_space(info)
!!$ if (x%is_host()) call x%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ x%v(virl%v(i)) = vval%v(i)
!!$ end do
!!$ call x%set_dev()
!!$ done_oacc = .true.
!!$ end select
!!$ end select
!!$
!!$ if (.not.done_oacc) then
!!$ select type(virl => irl)
!!$ type is (psb_i_vect_oacc)
!!$ if (virl%is_dev()) call virl%sync_space(info)
!!$ end select
!!$ select type(vval => val)
!!$ type is (psb_d_vect_oacc)
!!$ if (vval%is_dev()) call vval%sync_space()
!!$ end select
!!$ call x%ins(n, irl%v, val%v, dupl, info)
!!$ end if
!!$
!!$ if (info /= 0) then
!!$ call psb_errpush(info, 'oacc_vect_ins')
!!$ return
!!$ end if
!!$
!!$ end subroutine d_oacc_ins_v
!!$
!!$
!!$
!!$ subroutine d_oacc_ins_a(n, irl, val, dupl, x, info)
!!$ use psi_serial_mod
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ integer(psb_ipk_), intent(in) :: n, dupl
!!$ integer(psb_ipk_), intent(in) :: irl(:)
!!$ real(psb_dpk_), intent(in) :: val(:)
!!$ integer(psb_ipk_), intent(out) :: info
!!$
!!$ integer(psb_ipk_) :: i
!!$
!!$ info = 0
!!$ if (x%is_dev()) call x%sync_space()
!!$ call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info)
!!$ call x%set_host()
!!$ !$acc update device(x%v)
!!$
!!$ end subroutine d_oacc_ins_a
!!$
!!$
!!$
!!$ subroutine d_oacc_bld_mn(x, n)
!!$ use psb_base_mod
!!$ implicit none
!!$ integer(psb_mpk_), intent(in) :: n
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ integer(psb_ipk_) :: info
!!$
!!$ call x%all(n, info)
!!$ if (info /= 0) then
!!$ call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
!!$ end if
!!$ call x%set_host()
!!$ !$acc update device(x%v)
!!$
!!$ end subroutine d_oacc_bld_mn
!!$
!!$
!!$ subroutine d_oacc_bld_x(x, this)
!!$ use psb_base_mod
!!$ implicit none
!!$ real(psb_dpk_), intent(in) :: this(:)
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ integer(psb_ipk_) :: info
!!$
!!$ call psb_realloc(size(this), x%v, info)
!!$ if (info /= 0) then
!!$ info = psb_err_alloc_request_
!!$ call psb_errpush(info, 'd_oacc_bld_x', &
!!$ i_err=(/size(this), izero, izero, izero, izero/))
!!$ return
!!$ end if
!!$
!!$ x%v(:) = this(:)
!!$ call x%set_host()
!!$ !$acc update device(x%v)
!!$
!!$ end subroutine d_oacc_bld_x
!!$
!!$
!!$ subroutine d_oacc_asb_m(n, x, info)
!!$ use psb_base_mod
!!$ implicit none
!!$ integer(psb_mpk_), intent(in) :: n
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_mpk_) :: nd
!!$
!!$ info = psb_success_
!!$
!!$ if (x%is_dev()) then
!!$ nd = size(x%v)
!!$ if (nd < n) then
!!$ call x%sync()
!!$ call x%psb_d_base_vect_type%asb(n, info)
!!$ if (info == psb_success_) call x%sync_space()
!!$ call x%set_host()
!!$ end if
!!$ else
!!$ if (size(x%v) < n) then
!!$ call x%psb_d_base_vect_type%asb(n, info)
!!$ if (info == psb_success_) call x%sync_space()
!!$ call x%set_host()
!!$ end if
!!$ end if
!!$ end subroutine d_oacc_asb_m
!!$
!!$
!!$
!!$ subroutine d_oacc_set_scal(x, val, first, last)
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ real(psb_dpk_), intent(in) :: val
!!$ integer(psb_ipk_), optional :: first, last
!!$
!!$ integer(psb_ipk_) :: first_, last_
!!$ first_ = 1
!!$ last_ = x%get_nrows()
!!$ if (present(first)) first_ = max(1, first)
!!$ if (present(last)) last_ = min(last, last_)
!!$
!!$ !$acc parallel loop
!!$ do i = first_, last_
!!$ x%v(i) = val
!!$ end do
!!$ !$acc end parallel loop
!!$
!!$ call x%set_dev()
!!$ end subroutine d_oacc_set_scal
!!$
!!$
!!$
!!$ subroutine d_oacc_zero(x)
!!$ use psi_serial_mod
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ call x%set_dev()
!!$ call x%set_scal(dzero)
!!$ end subroutine d_oacc_zero
!!$
!!$ function d_oacc_get_nrows(x) result(res)
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(in) :: x
!!$ integer(psb_ipk_) :: res
!!$
!!$ if (allocated(x%v)) res = size(x%v)
!!$ end function d_oacc_get_nrows
!!$
!!$ function d_oacc_get_fmt() result(res)
!!$ implicit none
!!$ character(len=5) :: res
!!$ res = "dOACC"
!!$
!!$ end function d_oacc_get_fmt
!!$
!!$ function d_oacc_vect_dot(n, x, y) result(res)
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ class(psb_d_base_vect_type), intent(inout) :: y
!!$ integer(psb_ipk_), intent(in) :: n
!!$ real(psb_dpk_) :: res
!!$ real(psb_dpk_), external :: ddot
!!$ integer(psb_ipk_) :: info
!!$ integer(psb_ipk_) :: i
!!$
!!$ res = dzero
!!$
!!$ select type(yy => y)
!!$ type is (psb_d_base_vect_type)
!!$ if (x%is_dev()) call x%sync()
!!$ res = ddot(n, x%v, 1, yy%v, 1)
!!$ type is (psb_d_vect_oacc)
!!$ if (x%is_host()) call x%sync()
!!$ if (yy%is_host()) call yy%sync()
!!$
!!$ !$acc parallel loop reduction(+:res) present(x%v, yy%v)
!!$ do i = 1, n
!!$ res = res + x%v(i) * yy%v(i)
!!$ end do
!!$ !$acc end parallel loop
!!$
!!$ class default
!!$ call x%sync()
!!$ res = y%dot(n, x%v)
!!$ end select
!!$
!!$ end function d_oacc_vect_dot
!!$
!!$
!!$
!!$
!!$ function d_oacc_dot_a(n, x, y) result(res)
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ real(psb_dpk_), intent(in) :: y(:)
!!$ integer(psb_ipk_), intent(in) :: n
!!$ real(psb_dpk_) :: res
!!$ real(psb_dpk_), external :: ddot
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ res = ddot(n, y, 1, x%v, 1)
!!$
!!$ end function d_oacc_dot_a
!!$
!!$ ! subroutine d_oacc_set_vect(x,y)
!!$ ! implicit none
!!$ ! class(psb_d_vect_oacc), intent(inout) :: x
!!$ ! real(psb_dpk_), intent(in) :: y(:)
!!$ ! integer(psb_ipk_) :: info
!!$
!!$ ! if (size(x%v) /= size(y)) then
!!$ ! call x%free(info)
!!$ ! call x%all(size(y),info)
!!$ ! end if
!!$ ! x%v(:) = y(:)
!!$ ! call x%set_host()
!!$ ! end subroutine d_oacc_set_vect
!!$
!!$ subroutine d_oacc_to_dev(v)
!!$ implicit none
!!$ real(psb_dpk_) :: v(:)
!!$ !$acc update device(v)
!!$ end subroutine d_oacc_to_dev
!!$
!!$ subroutine d_oacc_to_host(v)
!!$ implicit none
!!$ real(psb_dpk_) :: v(:)
!!$ !$acc update self(v)
!!$ end subroutine d_oacc_to_host
!!$
subroutine d_oacc_sync_space(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
call d_oacc_create_dev(x%v)
end if
contains
subroutine d_oacc_create_dev(v)
implicit none
real(psb_dpk_) :: v(:)
!$acc enter data copyin(v)
end subroutine d_oacc_create_dev
end subroutine d_oacc_sync_space
subroutine d_oacc_sync(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call d_oacc_to_host(x%v)
end if
if (x%is_host()) then
call d_oacc_to_dev(x%v)
end if
call x%set_sync()
end subroutine d_oacc_sync
subroutine d_oacc_set_host(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
x%state = is_host
end subroutine d_oacc_set_host
subroutine d_oacc_set_dev(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
x%state = is_dev
end subroutine d_oacc_set_dev
subroutine d_oacc_set_sync(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
x%state = is_sync
end subroutine d_oacc_set_sync
function d_oacc_is_dev(x) result(res)
implicit none
class(psb_d_vect_oacc), intent(in) :: x
logical :: res
res = (x%state == is_dev)
end function d_oacc_is_dev
function d_oacc_is_host(x) result(res)
implicit none
class(psb_d_vect_oacc), intent(in) :: x
logical :: res
res = (x%state == is_host)
end function d_oacc_is_host
function d_oacc_is_sync(x) result(res)
implicit none
class(psb_d_vect_oacc), intent(in) :: x
logical :: res
res = (x%state == is_sync)
end function d_oacc_is_sync
!!$
!!$ subroutine d_oacc_vect_all(n, x, info)
!!$ use psi_serial_mod
!!$ use psb_realloc_mod
!!$ implicit none
!!$ integer(psb_ipk_), intent(in) :: n
!!$ class(psb_d_vect_oacc), intent(out) :: x
!!$ integer(psb_ipk_), intent(out) :: info
!!$
!!$ call psb_realloc(n, x%v, info)
!!$ if (info == 0) then
!!$ call x%set_host()
!!$ !$acc enter data create(x%v)
!!$ call x%sync_space()
!!$ end if
!!$ if (info /= 0) then
!!$ info = psb_err_alloc_request_
!!$ call psb_errpush(info, 'd_oacc_all', &
!!$ i_err=(/n, n, n, n, n/))
!!$ end if
!!$ end subroutine d_oacc_vect_all
!!$
!!$
!!$ subroutine d_oacc_vect_free(x, info)
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ integer(psb_ipk_), intent(out) :: info
!!$ info = 0
!!$ if (allocated(x%v)) then
!!$ !$acc exit data delete(x%v) finalize
!!$ deallocate(x%v, stat=info)
!!$ end if
!!$
!!$ end subroutine d_oacc_vect_free
!!$
!!$ function oacc_get_size(x) result(res)
!!$ implicit none
!!$ class(psb_d_vect_oacc), intent(inout) :: x
!!$ integer(psb_ipk_) :: res
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ res = size(x%v)
!!$ end function oacc_get_size
!!$
!!$
!!$ subroutine initialize(N)
!!$ integer(psb_ipk_) :: N
!!$ integer(psb_ipk_) :: i
!!$ allocate(v1(N),v2(N),p(N))
!!$ !$acc enter data create(v1,v2,p)
!!$ !$acc parallel
!!$ !$acc loop
!!$ do i=1,n
!!$ v1(i) = i
!!$ v2(i) = n+i
!!$ end do
!!$ !$acc end parallel
!!$ end subroutine initialize
!!$ subroutine finalize_dev()
!!$ !$acc exit data delete(v1,v2,p)
!!$ end subroutine finalize_dev
!!$ subroutine finalize_host()
!!$ deallocate(v1,v2,p)
!!$ end subroutine finalize_host
!!$ subroutine to_dev()
!!$ !$acc update device(v1,v2)
!!$ end subroutine to_dev
!!$ subroutine to_host()
!!$ !$acc update self(v1,v2)
!!$ end subroutine to_host
!!$ function d_dot(N) result(res)
!!$ real(kind(1.d0)) :: res
!!$ integer(psb_ipk_) :: i,N
!!$ real(kind(1.d0)) :: t1,t2,t3
!!$ res = 0.0d0
!!$ !$acc parallel
!!$ !$acc loop reduction(+:res)
!!$ do i=1,N
!!$ res = res + v1(i) * v2(i)
!!$ end do
!!$ !$acc end parallel
!!$
!!$ end function d_dot
!!$ function h_dot(N) result(res)
!!$ integer(psb_ipk_) :: i,N
!!$ real(kind(1.d0)) :: t1,t2,t3,res
!!$ res = 0.0d0
!!$ do i=1,N
!!$ res = res + v1(i) * v2(i)
!!$ end do
!!$ end function h_dot
!!$
end module psb_d_oacc_vect_mod