Merge branch 'oacc_loloum' into repackage

repack-newsolve
sfilippone 4 months ago
commit 744f14d2f5

@ -257,9 +257,9 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irp)>0) call acc_create(a%irp)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irp)>0) call acc_copyin(a%irp)
end subroutine c_oacc_csr_sync_dev_space
subroutine c_oacc_csr_sync(a)

@ -186,10 +186,10 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irn)>0) call acc_create(a%irn)
if (psb_size(a%idiag)>0) call acc_create(a%idiag)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irn)>0) call acc_copyin(a%irn)
if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
end subroutine c_oacc_ell_sync_dev_space
function c_oacc_ell_is_host(a) result(res)

@ -240,11 +240,11 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irn)>0) call acc_create(a%irn)
if (psb_size(a%idiag)>0) call acc_create(a%idiag)
if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irn)>0) call acc_copyin(a%irn)
if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs)
end subroutine c_oacc_hll_sync_dev_space

@ -113,7 +113,7 @@ contains
complex(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@ -144,7 +144,7 @@ contains
complex(psb_spk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@ -164,7 +164,7 @@ contains
complex(psb_spk_), intent(in) :: alpha
complex(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@ -189,7 +189,7 @@ contains
real(psb_spk_) :: sum, mx
integer(psb_ipk_) :: i
mx = szero
!$acc parallel loop reduction(max:mx)
!$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@ -197,7 +197,7 @@ contains
res = mx
else
sum = szero
!$acc parallel loop reduction(+:sum)
!$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@ -223,7 +223,7 @@ contains
real(psb_spk_) :: max_val
integer(psb_ipk_) :: i
max_val = szero
!$acc parallel loop reduction(max:max_val)
!$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@ -248,7 +248,7 @@ contains
real(psb_spk_) :: res
integer(psb_ipk_) :: i
res = szero
!$acc parallel loop reduction(+:res)
!$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
!$acc parallel loop
!$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
!$acc parallel loop
!$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@ -327,7 +327,7 @@ contains
complex(psb_spk_), intent(inout) :: y(:)
complex(psb_spk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
!$acc parallel
!$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
!$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine c_oacc_gthzbuf
@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine c_oacc_gthzv_x
@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
!$acc parallel loop
!$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
!$acc parallel loop
!$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@ -796,22 +798,18 @@ contains
class(psb_c_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_) :: res
complex(psb_spk_), external :: ddot
integer(psb_ipk_) :: info
res = czero
!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
type is (psb_c_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n, x%v, 1, yy%v, 1)
type is (psb_c_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
res = c_inner_oacc_dot(n, x%v, yy%v)
class default
call x%sync()
res = y%dot(n, x%v)
if (x%is_dev()) call x%sync()
res = y%dot(n, x%v)
end select
contains
function c_inner_oacc_dot(n, x, y) result(res)
@ -836,10 +834,10 @@ contains
complex(psb_spk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_) :: res
complex(psb_spk_), external :: ddot
complex(psb_spk_), external :: cdot
if (x%is_dev()) call x%sync()
res = ddot(n, y, 1, x%v, 1)
res = cdot(n, y, 1, x%v, 1)
end function c_oacc_dot_a
@ -849,26 +847,36 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
if (n > psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer: reallocating '
if (allocated(x%combuf)) then
!if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
!$acc exit data delete(x%combuf)
end if
call x%psb_c_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
! call acc_copyin(x%combuf)
end if
end subroutine c_oacc_new_buffer
subroutine c_oacc_sync_dev_space(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
!!$ write(0,*) 'oacc sync_dev_space'
if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine c_oacc_sync_dev_space
subroutine c_oacc_sync(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call acc_update_self(x%v)
if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
call acc_update_device(x%v)
if (.not.acc_is_present(x%v)) call c_oacc_sync_dev_space(x)
if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine c_oacc_sync
@ -941,6 +949,8 @@ contains
type(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
!!$ write(0,*) 'oacc final_vect_free'
call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@ -953,8 +963,9 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
!!$ write(0,*) 'oacc vect_free'
call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_c_base_vect_type%free(info)
end subroutine c_oacc_vect_free
@ -964,8 +975,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
if (psb_oacc_get_maybe_free_buffer()) then
!write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
call x%free_buffer(info)
end if
end subroutine c_oacc_vect_maybe_free_buffer
@ -973,7 +986,7 @@ contains
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_c_base_vect_type%free_buffer(info)
@ -985,7 +998,6 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
if (x%is_dev()) call x%sync()
res = size(x%v)
end function c_oacc_get_size

@ -257,9 +257,9 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irp)>0) call acc_create(a%irp)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irp)>0) call acc_copyin(a%irp)
end subroutine d_oacc_csr_sync_dev_space
subroutine d_oacc_csr_sync(a)

@ -186,10 +186,10 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irn)>0) call acc_create(a%irn)
if (psb_size(a%idiag)>0) call acc_create(a%idiag)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irn)>0) call acc_copyin(a%irn)
if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
end subroutine d_oacc_ell_sync_dev_space
function d_oacc_ell_is_host(a) result(res)

@ -240,11 +240,11 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irn)>0) call acc_create(a%irn)
if (psb_size(a%idiag)>0) call acc_create(a%idiag)
if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irn)>0) call acc_copyin(a%irn)
if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs)
end subroutine d_oacc_hll_sync_dev_space

@ -113,7 +113,7 @@ contains
real(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@ -144,7 +144,7 @@ contains
real(psb_dpk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@ -164,7 +164,7 @@ contains
real(psb_dpk_), intent(in) :: alpha
real(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@ -189,7 +189,7 @@ contains
real(psb_dpk_) :: sum, mx
integer(psb_ipk_) :: i
mx = dzero
!$acc parallel loop reduction(max:mx)
!$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@ -197,7 +197,7 @@ contains
res = mx
else
sum = dzero
!$acc parallel loop reduction(+:sum)
!$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@ -223,7 +223,7 @@ contains
real(psb_dpk_) :: max_val
integer(psb_ipk_) :: i
max_val = dzero
!$acc parallel loop reduction(max:max_val)
!$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@ -248,7 +248,7 @@ contains
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
res = dzero
!$acc parallel loop reduction(+:res)
!$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
!$acc parallel loop
!$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
!$acc parallel loop
!$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@ -327,7 +327,7 @@ contains
real(psb_dpk_), intent(inout) :: y(:)
real(psb_dpk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
!$acc parallel
!$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
!$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine d_oacc_gthzbuf
@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine d_oacc_gthzv_x
@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
!$acc parallel loop
!$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
!$acc parallel loop
!$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@ -796,22 +798,18 @@ contains
class(psb_d_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res
real(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
res = dzero
!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
type is (psb_d_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n, x%v, 1, yy%v, 1)
type is (psb_d_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
res = d_inner_oacc_dot(n, x%v, yy%v)
class default
call x%sync()
res = y%dot(n, x%v)
if (x%is_dev()) call x%sync()
res = y%dot(n, x%v)
end select
contains
function d_inner_oacc_dot(n, x, y) result(res)
@ -849,26 +847,36 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
if (n > psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer: reallocating '
if (allocated(x%combuf)) then
!if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
!$acc exit data delete(x%combuf)
end if
call x%psb_d_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
! call acc_copyin(x%combuf)
end if
end subroutine d_oacc_new_buffer
subroutine d_oacc_sync_dev_space(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
!!$ write(0,*) 'oacc sync_dev_space'
if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine d_oacc_sync_dev_space
subroutine d_oacc_sync(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call acc_update_self(x%v)
if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
call acc_update_device(x%v)
if (.not.acc_is_present(x%v)) call d_oacc_sync_dev_space(x)
if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine d_oacc_sync
@ -941,6 +949,8 @@ contains
type(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
!!$ write(0,*) 'oacc final_vect_free'
call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@ -953,8 +963,9 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
!!$ write(0,*) 'oacc vect_free'
call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_d_base_vect_type%free(info)
end subroutine d_oacc_vect_free
@ -964,8 +975,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
if (psb_oacc_get_maybe_free_buffer()) then
!write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
call x%free_buffer(info)
end if
end subroutine d_oacc_vect_maybe_free_buffer
@ -973,7 +986,7 @@ contains
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_d_base_vect_type%free_buffer(info)
@ -985,7 +998,6 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
if (x%is_dev()) call x%sync()
res = size(x%v)
end function d_oacc_get_size

@ -70,6 +70,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@ -97,8 +98,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -142,8 +143,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -185,6 +186,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@ -210,13 +212,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine i_oacc_gthzbuf
@ -254,13 +256,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine i_oacc_gthzv_x
@ -287,7 +289,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
!$acc parallel loop
!$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@ -411,7 +413,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
!$acc parallel loop
!$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@ -449,26 +451,36 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
if (n > psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer: reallocating '
if (allocated(x%combuf)) then
!if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
!$acc exit data delete(x%combuf)
end if
call x%psb_i_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
! call acc_copyin(x%combuf)
end if
end subroutine i_oacc_new_buffer
subroutine i_oacc_sync_dev_space(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
!!$ write(0,*) 'oacc sync_dev_space'
if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine i_oacc_sync_dev_space
subroutine i_oacc_sync(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call acc_update_self(x%v)
if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
call acc_update_device(x%v)
if (.not.acc_is_present(x%v)) call i_oacc_sync_dev_space(x)
if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine i_oacc_sync
@ -541,6 +553,8 @@ contains
type(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
!!$ write(0,*) 'oacc final_vect_free'
call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@ -553,8 +567,9 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
!!$ write(0,*) 'oacc vect_free'
call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_i_base_vect_type%free(info)
end subroutine i_oacc_vect_free
@ -564,8 +579,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
if (psb_oacc_get_maybe_free_buffer()) then
!write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
call x%free_buffer(info)
end if
end subroutine i_oacc_vect_maybe_free_buffer
@ -573,7 +590,7 @@ contains
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_i_base_vect_type%free_buffer(info)
@ -585,7 +602,6 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
if (x%is_dev()) call x%sync()
res = size(x%v)
end function i_oacc_get_size

@ -72,6 +72,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@ -99,8 +100,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -144,8 +145,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -187,6 +188,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@ -212,13 +214,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine l_oacc_gthzbuf
@ -256,13 +258,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine l_oacc_gthzv_x
@ -289,7 +291,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
!$acc parallel loop
!$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@ -413,7 +415,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
!$acc parallel loop
!$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@ -451,26 +453,36 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
if (n > psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer: reallocating '
if (allocated(x%combuf)) then
!if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
!$acc exit data delete(x%combuf)
end if
call x%psb_l_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
! call acc_copyin(x%combuf)
end if
end subroutine l_oacc_new_buffer
subroutine l_oacc_sync_dev_space(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
!!$ write(0,*) 'oacc sync_dev_space'
if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine l_oacc_sync_dev_space
subroutine l_oacc_sync(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call acc_update_self(x%v)
if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
call acc_update_device(x%v)
if (.not.acc_is_present(x%v)) call l_oacc_sync_dev_space(x)
if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine l_oacc_sync
@ -543,6 +555,8 @@ contains
type(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
!!$ write(0,*) 'oacc final_vect_free'
call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@ -555,8 +569,9 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
!!$ write(0,*) 'oacc vect_free'
call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_l_base_vect_type%free(info)
end subroutine l_oacc_vect_free
@ -566,8 +581,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
if (psb_oacc_get_maybe_free_buffer()) then
!write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
call x%free_buffer(info)
end if
end subroutine l_oacc_vect_maybe_free_buffer
@ -575,7 +592,7 @@ contains
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_l_base_vect_type%free_buffer(info)
@ -587,7 +604,6 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
if (x%is_dev()) call x%sync()
res = size(x%v)
end function l_oacc_get_size

@ -257,9 +257,9 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irp)>0) call acc_create(a%irp)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irp)>0) call acc_copyin(a%irp)
end subroutine s_oacc_csr_sync_dev_space
subroutine s_oacc_csr_sync(a)

@ -186,10 +186,10 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irn)>0) call acc_create(a%irn)
if (psb_size(a%idiag)>0) call acc_create(a%idiag)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irn)>0) call acc_copyin(a%irn)
if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
end subroutine s_oacc_ell_sync_dev_space
function s_oacc_ell_is_host(a) result(res)

@ -240,11 +240,11 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irn)>0) call acc_create(a%irn)
if (psb_size(a%idiag)>0) call acc_create(a%idiag)
if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irn)>0) call acc_copyin(a%irn)
if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs)
end subroutine s_oacc_hll_sync_dev_space

@ -113,7 +113,7 @@ contains
real(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@ -144,7 +144,7 @@ contains
real(psb_spk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@ -164,7 +164,7 @@ contains
real(psb_spk_), intent(in) :: alpha
real(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@ -189,7 +189,7 @@ contains
real(psb_spk_) :: sum, mx
integer(psb_ipk_) :: i
mx = szero
!$acc parallel loop reduction(max:mx)
!$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@ -197,7 +197,7 @@ contains
res = mx
else
sum = szero
!$acc parallel loop reduction(+:sum)
!$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@ -223,7 +223,7 @@ contains
real(psb_spk_) :: max_val
integer(psb_ipk_) :: i
max_val = szero
!$acc parallel loop reduction(max:max_val)
!$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@ -248,7 +248,7 @@ contains
real(psb_spk_) :: res
integer(psb_ipk_) :: i
res = szero
!$acc parallel loop reduction(+:res)
!$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
!$acc parallel loop
!$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
!$acc parallel loop
!$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@ -327,7 +327,7 @@ contains
real(psb_spk_), intent(inout) :: y(:)
real(psb_spk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
!$acc parallel
!$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
!$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine s_oacc_gthzbuf
@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine s_oacc_gthzv_x
@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
!$acc parallel loop
!$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
!$acc parallel loop
!$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@ -796,22 +798,18 @@ contains
class(psb_s_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
real(psb_spk_), external :: ddot
integer(psb_ipk_) :: info
res = szero
!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
type is (psb_s_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n, x%v, 1, yy%v, 1)
type is (psb_s_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
res = s_inner_oacc_dot(n, x%v, yy%v)
class default
call x%sync()
res = y%dot(n, x%v)
if (x%is_dev()) call x%sync()
res = y%dot(n, x%v)
end select
contains
function s_inner_oacc_dot(n, x, y) result(res)
@ -836,10 +834,10 @@ contains
real(psb_spk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
real(psb_spk_), external :: ddot
real(psb_spk_), external :: sdot
if (x%is_dev()) call x%sync()
res = ddot(n, y, 1, x%v, 1)
res = sdot(n, y, 1, x%v, 1)
end function s_oacc_dot_a
@ -849,26 +847,36 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
if (n > psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer: reallocating '
if (allocated(x%combuf)) then
!if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
!$acc exit data delete(x%combuf)
end if
call x%psb_s_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
! call acc_copyin(x%combuf)
end if
end subroutine s_oacc_new_buffer
subroutine s_oacc_sync_dev_space(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
!!$ write(0,*) 'oacc sync_dev_space'
if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine s_oacc_sync_dev_space
subroutine s_oacc_sync(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call acc_update_self(x%v)
if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
call acc_update_device(x%v)
if (.not.acc_is_present(x%v)) call s_oacc_sync_dev_space(x)
if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine s_oacc_sync
@ -941,6 +949,8 @@ contains
type(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
!!$ write(0,*) 'oacc final_vect_free'
call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@ -953,8 +963,9 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
!!$ write(0,*) 'oacc vect_free'
call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_s_base_vect_type%free(info)
end subroutine s_oacc_vect_free
@ -964,8 +975,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
if (psb_oacc_get_maybe_free_buffer()) then
!write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
call x%free_buffer(info)
end if
end subroutine s_oacc_vect_maybe_free_buffer
@ -973,7 +986,7 @@ contains
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_s_base_vect_type%free_buffer(info)
@ -985,7 +998,6 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
if (x%is_dev()) call x%sync()
res = size(x%v)
end function s_oacc_get_size

@ -257,9 +257,9 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irp)>0) call acc_create(a%irp)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irp)>0) call acc_copyin(a%irp)
end subroutine z_oacc_csr_sync_dev_space
subroutine z_oacc_csr_sync(a)

@ -186,10 +186,10 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irn)>0) call acc_create(a%irn)
if (psb_size(a%idiag)>0) call acc_create(a%idiag)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irn)>0) call acc_copyin(a%irn)
if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
end subroutine z_oacc_ell_sync_dev_space
function z_oacc_ell_is_host(a) result(res)

@ -240,11 +240,11 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
if (psb_size(a%val)>0) call acc_create(a%val)
if (psb_size(a%ja)>0) call acc_create(a%ja)
if (psb_size(a%irn)>0) call acc_create(a%irn)
if (psb_size(a%idiag)>0) call acc_create(a%idiag)
if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
if (psb_size(a%val)>0) call acc_copyin(a%val)
if (psb_size(a%ja)>0) call acc_copyin(a%ja)
if (psb_size(a%irn)>0) call acc_copyin(a%irn)
if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs)
end subroutine z_oacc_hll_sync_dev_space

@ -113,7 +113,7 @@ contains
complex(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@ -144,7 +144,7 @@ contains
complex(psb_dpk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@ -164,7 +164,7 @@ contains
complex(psb_dpk_), intent(in) :: alpha
complex(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
!$acc parallel loop
!$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@ -189,7 +189,7 @@ contains
real(psb_dpk_) :: sum, mx
integer(psb_ipk_) :: i
mx = dzero
!$acc parallel loop reduction(max:mx)
!$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@ -197,7 +197,7 @@ contains
res = mx
else
sum = dzero
!$acc parallel loop reduction(+:sum)
!$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@ -223,7 +223,7 @@ contains
real(psb_dpk_) :: max_val
integer(psb_ipk_) :: i
max_val = dzero
!$acc parallel loop reduction(max:max_val)
!$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@ -248,7 +248,7 @@ contains
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
res = dzero
!$acc parallel loop reduction(+:res)
!$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
!$acc parallel loop
!$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
!$acc parallel loop
!$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@ -327,7 +327,7 @@ contains
complex(psb_dpk_), intent(inout) :: y(:)
complex(psb_dpk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
!$acc parallel
!$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
!$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc update device(x(1:n)) async
!$acc parallel loop
!$acc update device(x(1:n))
!$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine z_oacc_gthzbuf
@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
!
!$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y(1:n)) async
!$acc update self(y(1:n))
end subroutine inner_gth
end subroutine z_oacc_gthzv_x
@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
!$acc parallel loop
!$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
!$acc parallel loop
!$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@ -796,22 +798,18 @@ contains
class(psb_z_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_) :: res
complex(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
res = zzero
!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
type is (psb_z_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n, x%v, 1, yy%v, 1)
type is (psb_z_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
res = z_inner_oacc_dot(n, x%v, yy%v)
class default
call x%sync()
res = y%dot(n, x%v)
if (x%is_dev()) call x%sync()
res = y%dot(n, x%v)
end select
contains
function z_inner_oacc_dot(n, x, y) result(res)
@ -836,10 +834,10 @@ contains
complex(psb_dpk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_) :: res
complex(psb_dpk_), external :: ddot
complex(psb_dpk_), external :: zdot
if (x%is_dev()) call x%sync()
res = ddot(n, y, 1, x%v, 1)
res = zdot(n, y, 1, x%v, 1)
end function z_oacc_dot_a
@ -849,26 +847,36 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
if (n > psb_size(x%combuf)) then
!write(0,*) 'oacc new_buffer: reallocating '
if (allocated(x%combuf)) then
!if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
!$acc exit data delete(x%combuf)
end if
call x%psb_z_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
! call acc_copyin(x%combuf)
end if
end subroutine z_oacc_new_buffer
subroutine z_oacc_sync_dev_space(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
!!$ write(0,*) 'oacc sync_dev_space'
if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine z_oacc_sync_dev_space
subroutine z_oacc_sync(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call acc_update_self(x%v)
if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
call acc_update_device(x%v)
if (.not.acc_is_present(x%v)) call z_oacc_sync_dev_space(x)
if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine z_oacc_sync
@ -941,6 +949,8 @@ contains
type(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
!!$ write(0,*) 'oacc final_vect_free'
call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@ -953,8 +963,9 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
!!$ write(0,*) 'oacc vect_free'
call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_z_base_vect_type%free(info)
end subroutine z_oacc_vect_free
@ -964,8 +975,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
if (psb_oacc_get_maybe_free_buffer()) then
!write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
call x%free_buffer(info)
end if
end subroutine z_oacc_vect_maybe_free_buffer
@ -973,7 +986,7 @@ contains
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_z_base_vect_type%free_buffer(info)
@ -985,7 +998,6 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
if (x%is_dev()) call x%sync()
res = size(x%v)
end function z_oacc_get_size

Loading…
Cancel
Save