|
|
@ -3,8 +3,8 @@ module psb_d_oacc_vect_mod
|
|
|
|
use psb_const_mod
|
|
|
|
use psb_const_mod
|
|
|
|
use psb_error_mod
|
|
|
|
use psb_error_mod
|
|
|
|
use psb_d_vect_mod
|
|
|
|
use psb_d_vect_mod
|
|
|
|
use psb_i_vect_mod
|
|
|
|
|
|
|
|
use psb_i_oacc_vect_mod
|
|
|
|
use psb_i_oacc_vect_mod
|
|
|
|
|
|
|
|
use psb_i_vect_mod
|
|
|
|
|
|
|
|
|
|
|
|
integer(psb_ipk_), parameter, private :: is_host = -1
|
|
|
|
integer(psb_ipk_), parameter, private :: is_host = -1
|
|
|
|
integer(psb_ipk_), parameter, private :: is_sync = 0
|
|
|
|
integer(psb_ipk_), parameter, private :: is_sync = 0
|
|
|
@ -47,9 +47,9 @@ module psb_d_oacc_vect_mod
|
|
|
|
procedure, pass(y) :: axpby_v => d_oacc_axpby_v
|
|
|
|
procedure, pass(y) :: axpby_v => d_oacc_axpby_v
|
|
|
|
procedure, pass(y) :: axpby_a => d_oacc_axpby_a
|
|
|
|
procedure, pass(y) :: axpby_a => d_oacc_axpby_a
|
|
|
|
procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
|
|
|
|
procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
|
|
|
|
|
|
|
|
procedure, pass(y) :: mlt_v => d_oacc_mlt_v
|
|
|
|
procedure, pass(y) :: mlt_a => d_oacc_mlt_a
|
|
|
|
procedure, pass(y) :: mlt_a => d_oacc_mlt_a
|
|
|
|
procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
|
|
|
|
procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
|
|
|
|
procedure, pass(y) :: mlt_v => d_oacc_mlt_v
|
|
|
|
|
|
|
|
procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
|
|
|
|
procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
|
|
|
|
procedure, pass(x) :: scal => d_oacc_scal
|
|
|
|
procedure, pass(x) :: scal => d_oacc_scal
|
|
|
|
procedure, pass(x) :: nrm2 => d_oacc_nrm2
|
|
|
|
procedure, pass(x) :: nrm2 => d_oacc_nrm2
|
|
|
@ -62,30 +62,6 @@ module psb_d_oacc_vect_mod
|
|
|
|
|
|
|
|
|
|
|
|
real(psb_dpk_), allocatable :: v1(:),v2(:),p(:)
|
|
|
|
real(psb_dpk_), allocatable :: v1(:),v2(:),p(:)
|
|
|
|
|
|
|
|
|
|
|
|
interface
|
|
|
|
|
|
|
|
subroutine d_oacc_mlt_v(x, y, info)
|
|
|
|
|
|
|
|
import
|
|
|
|
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
class(psb_d_base_vect_type), intent(inout) :: x
|
|
|
|
|
|
|
|
class(psb_d_vect_oacc), intent(inout) :: y
|
|
|
|
|
|
|
|
integer(psb_ipk_), intent(out) :: info
|
|
|
|
|
|
|
|
end subroutine d_oacc_mlt_v
|
|
|
|
|
|
|
|
end interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
interface
|
|
|
|
|
|
|
|
subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
|
|
|
|
|
|
|
|
import
|
|
|
|
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
real(psb_dpk_), intent(in) :: alpha, beta
|
|
|
|
|
|
|
|
class(psb_d_base_vect_type), intent(inout) :: x
|
|
|
|
|
|
|
|
class(psb_d_base_vect_type), intent(inout) :: y
|
|
|
|
|
|
|
|
class(psb_d_vect_oacc), intent(inout) :: z
|
|
|
|
|
|
|
|
integer(psb_ipk_), intent(out) :: info
|
|
|
|
|
|
|
|
character(len=1), intent(in), optional :: conjgx, conjgy
|
|
|
|
|
|
|
|
end subroutine d_oacc_mlt_v_2
|
|
|
|
|
|
|
|
end interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
contains
|
|
|
|
contains
|
|
|
|
|
|
|
|
|
|
|
|
subroutine d_oacc_absval1(x)
|
|
|
|
subroutine d_oacc_absval1(x)
|
|
|
@ -125,6 +101,8 @@ contains
|
|
|
|
end select
|
|
|
|
end select
|
|
|
|
end subroutine d_oacc_absval2
|
|
|
|
end subroutine d_oacc_absval2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
subroutine d_oacc_scal(alpha, x)
|
|
|
|
subroutine d_oacc_scal(alpha, x)
|
|
|
|
implicit none
|
|
|
|
implicit none
|
|
|
|
class(psb_d_vect_oacc), intent(inout) :: x
|
|
|
|
class(psb_d_vect_oacc), intent(inout) :: x
|
|
|
@ -195,7 +173,38 @@ contains
|
|
|
|
end function d_oacc_asum
|
|
|
|
end function d_oacc_asum
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
subroutine d_oacc_mlt_v(x, y, info)
|
|
|
|
|
|
|
|
use psi_serial_mod
|
|
|
|
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
class(psb_d_base_vect_type), intent(inout) :: x
|
|
|
|
|
|
|
|
class(psb_d_vect_oacc), intent(inout) :: y
|
|
|
|
|
|
|
|
integer(psb_ipk_), intent(out) :: info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
integer(psb_ipk_) :: i, n
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
info = 0
|
|
|
|
|
|
|
|
n = min(x%get_nrows(), y%get_nrows())
|
|
|
|
|
|
|
|
select type(xx => x)
|
|
|
|
|
|
|
|
type is (psb_d_base_vect_type)
|
|
|
|
|
|
|
|
if (y%is_dev()) call y%sync()
|
|
|
|
|
|
|
|
!$acc parallel loop
|
|
|
|
|
|
|
|
do i = 1, n
|
|
|
|
|
|
|
|
y%v(i) = y%v(i) * xx%v(i)
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
call y%set_host()
|
|
|
|
|
|
|
|
class default
|
|
|
|
|
|
|
|
if (xx%is_dev()) call xx%sync()
|
|
|
|
|
|
|
|
if (y%is_dev()) call y%sync()
|
|
|
|
|
|
|
|
!$acc parallel loop
|
|
|
|
|
|
|
|
do i = 1, n
|
|
|
|
|
|
|
|
y%v(i) = y%v(i) * xx%v(i)
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
call y%set_host()
|
|
|
|
|
|
|
|
end select
|
|
|
|
|
|
|
|
end subroutine d_oacc_mlt_v
|
|
|
|
|
|
|
|
|
|
|
|
subroutine d_oacc_mlt_a(x, y, info)
|
|
|
|
subroutine d_oacc_mlt_a(x, y, info)
|
|
|
|
|
|
|
|
use psi_serial_mod
|
|
|
|
implicit none
|
|
|
|
implicit none
|
|
|
|
real(psb_dpk_), intent(in) :: x(:)
|
|
|
|
real(psb_dpk_), intent(in) :: x(:)
|
|
|
|
class(psb_d_vect_oacc), intent(inout) :: y
|
|
|
|
class(psb_d_vect_oacc), intent(inout) :: y
|
|
|
@ -212,6 +221,7 @@ contains
|
|
|
|
end subroutine d_oacc_mlt_a
|
|
|
|
end subroutine d_oacc_mlt_a
|
|
|
|
|
|
|
|
|
|
|
|
subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info)
|
|
|
|
subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info)
|
|
|
|
|
|
|
|
use psi_serial_mod
|
|
|
|
implicit none
|
|
|
|
implicit none
|
|
|
|
real(psb_dpk_), intent(in) :: alpha, beta
|
|
|
|
real(psb_dpk_), intent(in) :: alpha, beta
|
|
|
|
real(psb_dpk_), intent(in) :: x(:)
|
|
|
|
real(psb_dpk_), intent(in) :: x(:)
|
|
|
@ -229,94 +239,64 @@ contains
|
|
|
|
call z%set_host()
|
|
|
|
call z%set_host()
|
|
|
|
end subroutine d_oacc_mlt_a_2
|
|
|
|
end subroutine d_oacc_mlt_a_2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
|
|
|
|
|
|
|
|
use psi_serial_mod
|
|
|
|
|
|
|
|
use psb_string_mod
|
|
|
|
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
real(psb_dpk_), intent(in) :: alpha, beta
|
|
|
|
|
|
|
|
class(psb_d_base_vect_type), intent(inout) :: x
|
|
|
|
|
|
|
|
class(psb_d_base_vect_type), intent(inout) :: y
|
|
|
|
|
|
|
|
class(psb_d_vect_oacc), intent(inout) :: z
|
|
|
|
|
|
|
|
integer(psb_ipk_), intent(out) :: info
|
|
|
|
|
|
|
|
character(len=1), intent(in), optional :: conjgx, conjgy
|
|
|
|
|
|
|
|
integer(psb_ipk_) :: i, n
|
|
|
|
|
|
|
|
logical :: conjgx_, conjgy_
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
conjgx_ = .false.
|
|
|
|
|
|
|
|
conjgy_ = .false.
|
|
|
|
|
|
|
|
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
|
|
|
|
|
|
|
|
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
|
|
|
|
|
|
|
|
|
|
|
|
!!$ subroutine d_oacc_mlt_v(x, y, info)
|
|
|
|
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
|
|
|
|
!!$ implicit none
|
|
|
|
|
|
|
|
!!$ class(psb_d_base_vect_type), intent(inout) :: x
|
|
|
|
info = 0
|
|
|
|
!!$ class(psb_d_vect_oacc), intent(inout) :: y
|
|
|
|
select type(xx => x)
|
|
|
|
!!$ integer(psb_ipk_), intent(out) :: info
|
|
|
|
class is (psb_d_vect_oacc)
|
|
|
|
!!$
|
|
|
|
select type (yy => y)
|
|
|
|
!!$ integer(psb_ipk_) :: i, n
|
|
|
|
class is (psb_d_vect_oacc)
|
|
|
|
!!$
|
|
|
|
if (xx%is_host()) call xx%sync_space()
|
|
|
|
!!$ info = 0
|
|
|
|
if (yy%is_host()) call yy%sync_space()
|
|
|
|
!!$ n = min(x%get_nrows(), y%get_nrows())
|
|
|
|
if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
|
|
|
|
!!$ select type(xx => x)
|
|
|
|
!$acc parallel loop
|
|
|
|
!!$ type is (psb_d_base_vect_type)
|
|
|
|
do i = 1, n
|
|
|
|
!!$ if (y%is_dev()) call y%sync()
|
|
|
|
z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
|
|
|
|
!!$ !$acc parallel loop
|
|
|
|
end do
|
|
|
|
!!$ do i = 1, n
|
|
|
|
call z%set_dev()
|
|
|
|
!!$ y%v(i) = y%v(i) * xx%v(i)
|
|
|
|
class default
|
|
|
|
!!$ end do
|
|
|
|
if (xx%is_dev()) call xx%sync_space()
|
|
|
|
!!$ call y%set_host()
|
|
|
|
if (yy%is_dev()) call yy%sync()
|
|
|
|
!!$ class default
|
|
|
|
if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
|
|
|
|
!!$ if (xx%is_dev()) call xx%sync()
|
|
|
|
!$acc parallel loop
|
|
|
|
!!$ if (y%is_dev()) call y%sync()
|
|
|
|
do i = 1, n
|
|
|
|
!!$ !$acc parallel loop
|
|
|
|
z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
|
|
|
|
!!$ do i = 1, n
|
|
|
|
end do
|
|
|
|
!!$ y%v(i) = y%v(i) * xx%v(i)
|
|
|
|
call z%set_host()
|
|
|
|
!!$ end do
|
|
|
|
end select
|
|
|
|
!!$ call y%set_host()
|
|
|
|
class default
|
|
|
|
!!$ end select
|
|
|
|
if (x%is_dev()) call x%sync()
|
|
|
|
!!$ end subroutine d_oacc_mlt_v
|
|
|
|
if (y%is_dev()) call y%sync()
|
|
|
|
!!$
|
|
|
|
if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
|
|
|
|
!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
|
|
|
|
!$acc parallel loop
|
|
|
|
!!$ use psi_serial_mod
|
|
|
|
do i = 1, n
|
|
|
|
!!$ use psb_string_mod
|
|
|
|
z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
|
|
|
|
!!$ implicit none
|
|
|
|
end do
|
|
|
|
!!$ real(psb_dpk_), intent(in) :: alpha, beta
|
|
|
|
call z%set_host()
|
|
|
|
!!$ class(psb_d_base_vect_type), intent(inout) :: x
|
|
|
|
end select
|
|
|
|
!!$ class(psb_d_base_vect_type), intent(inout) :: y
|
|
|
|
end subroutine d_oacc_mlt_v_2
|
|
|
|
!!$ class(psb_d_vect_oacc), intent(inout) :: z
|
|
|
|
|
|
|
|
!!$ integer(psb_ipk_), intent(out) :: info
|
|
|
|
|
|
|
|
!!$ character(len=1), intent(in), optional :: conjgx, conjgy
|
|
|
|
|
|
|
|
!!$ integer(psb_ipk_) :: i, n
|
|
|
|
|
|
|
|
!!$ logical :: conjgx_, conjgy_
|
|
|
|
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ conjgx_ = .false.
|
|
|
|
|
|
|
|
!!$ conjgy_ = .false.
|
|
|
|
|
|
|
|
!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
|
|
|
|
|
|
|
|
!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
|
|
|
|
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
|
|
|
|
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ info = 0
|
|
|
|
|
|
|
|
!!$ select type(xx => x)
|
|
|
|
|
|
|
|
!!$ class is (psb_d_vect_oacc)
|
|
|
|
|
|
|
|
!!$ select type (yy => y)
|
|
|
|
|
|
|
|
!!$ class is (psb_d_vect_oacc)
|
|
|
|
|
|
|
|
!!$ if (xx%is_host()) call xx%sync_space()
|
|
|
|
|
|
|
|
!!$ if (yy%is_host()) call yy%sync_space()
|
|
|
|
|
|
|
|
!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
|
|
|
|
|
|
|
|
!!$ !$acc parallel loop
|
|
|
|
|
|
|
|
!!$ do i = 1, n
|
|
|
|
|
|
|
|
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
|
|
|
|
|
|
|
|
!!$ end do
|
|
|
|
|
|
|
|
!!$ call z%set_dev()
|
|
|
|
|
|
|
|
!!$ class default
|
|
|
|
|
|
|
|
!!$ if (xx%is_dev()) call xx%sync_space()
|
|
|
|
|
|
|
|
!!$ if (yy%is_dev()) call yy%sync()
|
|
|
|
|
|
|
|
!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
|
|
|
|
|
|
|
|
!!$ !$acc parallel loop
|
|
|
|
|
|
|
|
!!$ do i = 1, n
|
|
|
|
|
|
|
|
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
|
|
|
|
|
|
|
|
!!$ end do
|
|
|
|
|
|
|
|
!!$ call z%set_host()
|
|
|
|
|
|
|
|
!!$ end select
|
|
|
|
|
|
|
|
!!$ class default
|
|
|
|
|
|
|
|
!!$ if (x%is_dev()) call x%sync()
|
|
|
|
|
|
|
|
!!$ if (y%is_dev()) call y%sync()
|
|
|
|
|
|
|
|
!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
|
|
|
|
|
|
|
|
!!$ !$acc parallel loop
|
|
|
|
|
|
|
|
!!$ do i = 1, n
|
|
|
|
|
|
|
|
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
|
|
|
|
|
|
|
|
!!$ end do
|
|
|
|
|
|
|
|
!!$ call z%set_host()
|
|
|
|
|
|
|
|
!!$ end select
|
|
|
|
|
|
|
|
!!$ end subroutine d_oacc_mlt_v_2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
|
|
|
|
subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
|
|
|
|
!use psi_serial_mod
|
|
|
|
use psi_serial_mod
|
|
|
|
implicit none
|
|
|
|
implicit none
|
|
|
|
integer(psb_ipk_), intent(in) :: m
|
|
|
|
integer(psb_ipk_), intent(in) :: m
|
|
|
|
class(psb_d_base_vect_type), intent(inout) :: x
|
|
|
|
class(psb_d_base_vect_type), intent(inout) :: x
|
|
|
@ -349,7 +329,7 @@ contains
|
|
|
|
end subroutine d_oacc_axpby_v
|
|
|
|
end subroutine d_oacc_axpby_v
|
|
|
|
|
|
|
|
|
|
|
|
subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
|
|
|
|
subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
|
|
|
|
!use psi_serial_mod
|
|
|
|
use psi_serial_mod
|
|
|
|
implicit none
|
|
|
|
implicit none
|
|
|
|
integer(psb_ipk_), intent(in) :: m
|
|
|
|
integer(psb_ipk_), intent(in) :: m
|
|
|
|
real(psb_dpk_), intent(in) :: x(:)
|
|
|
|
real(psb_dpk_), intent(in) :: x(:)
|
|
|
@ -418,6 +398,7 @@ contains
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
end subroutine d_oacc_abgdxyz
|
|
|
|
end subroutine d_oacc_abgdxyz
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
|
|
|
|
subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
|
|
|
|
use psb_base_mod
|
|
|
|
use psb_base_mod
|
|
|
|
implicit none
|
|
|
|
implicit none
|
|
|
@ -934,32 +915,52 @@ contains
|
|
|
|
res = size(x%v)
|
|
|
|
res = size(x%v)
|
|
|
|
end function oacc_get_size
|
|
|
|
end function oacc_get_size
|
|
|
|
|
|
|
|
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ subroutine initialize(N)
|
|
|
|
subroutine initialize(N)
|
|
|
|
!!$ integer(psb_ipk_) :: N
|
|
|
|
integer(psb_ipk_) :: N
|
|
|
|
!!$ integer(psb_ipk_) :: i
|
|
|
|
integer(psb_ipk_) :: i
|
|
|
|
!!$ allocate(v1(N),v2(N),p(N))
|
|
|
|
allocate(v1(N),v2(N),p(N))
|
|
|
|
!!$ !$acc enter data create(v1,v2,p)
|
|
|
|
!$acc enter data create(v1,v2,p)
|
|
|
|
!!$ !$acc parallel
|
|
|
|
!$acc parallel
|
|
|
|
!!$ !$acc loop
|
|
|
|
!$acc loop
|
|
|
|
!!$ do i=1,n
|
|
|
|
do i=1,n
|
|
|
|
!!$ v1(i) = i
|
|
|
|
v1(i) = i
|
|
|
|
!!$ v2(i) = n+i
|
|
|
|
v2(i) = n+i
|
|
|
|
!!$ end do
|
|
|
|
end do
|
|
|
|
!!$ !$acc end parallel
|
|
|
|
!$acc end parallel
|
|
|
|
!!$ end subroutine initialize
|
|
|
|
end subroutine initialize
|
|
|
|
!!$ subroutine finalize_dev()
|
|
|
|
subroutine finalize_dev()
|
|
|
|
!!$ !$acc exit data delete(v1,v2,p)
|
|
|
|
!$acc exit data delete(v1,v2,p)
|
|
|
|
!!$ end subroutine finalize_dev
|
|
|
|
end subroutine finalize_dev
|
|
|
|
!!$ subroutine finalize_host()
|
|
|
|
subroutine finalize_host()
|
|
|
|
!!$ deallocate(v1,v2,p)
|
|
|
|
deallocate(v1,v2,p)
|
|
|
|
!!$ end subroutine finalize_host
|
|
|
|
end subroutine finalize_host
|
|
|
|
!!$ subroutine to_dev()
|
|
|
|
subroutine to_dev()
|
|
|
|
!!$ !$acc update device(v1,v2)
|
|
|
|
!$acc update device(v1,v2)
|
|
|
|
!!$ end subroutine to_dev
|
|
|
|
end subroutine to_dev
|
|
|
|
!!$ subroutine to_host()
|
|
|
|
subroutine to_host()
|
|
|
|
!!$ !$acc update self(v1,v2)
|
|
|
|
!$acc update self(v1,v2)
|
|
|
|
!!$ end subroutine to_host
|
|
|
|
end subroutine to_host
|
|
|
|
!!$
|
|
|
|
function d_dot(N) result(res)
|
|
|
|
|
|
|
|
real(kind(1.d0)) :: res
|
|
|
|
|
|
|
|
integer(psb_ipk_) :: i,N
|
|
|
|
|
|
|
|
real(kind(1.d0)) :: t1,t2,t3
|
|
|
|
|
|
|
|
res = 0.0d0
|
|
|
|
|
|
|
|
!$acc parallel
|
|
|
|
|
|
|
|
!$acc loop reduction(+:res)
|
|
|
|
|
|
|
|
do i=1,N
|
|
|
|
|
|
|
|
res = res + v1(i) * v2(i)
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
!$acc end parallel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
end function d_dot
|
|
|
|
|
|
|
|
function h_dot(N) result(res)
|
|
|
|
|
|
|
|
integer(psb_ipk_) :: i,N
|
|
|
|
|
|
|
|
real(kind(1.d0)) :: t1,t2,t3,res
|
|
|
|
|
|
|
|
res = 0.0d0
|
|
|
|
|
|
|
|
do i=1,N
|
|
|
|
|
|
|
|
res = res + v1(i) * v2(i)
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
end function h_dot
|
|
|
|
|
|
|
|
|
|
|
|
end module psb_d_oacc_vect_mod
|
|
|
|
end module psb_d_oacc_vect_mod
|