Backporting fixes from version 4

oacc_loloum
sfilippone 5 months ago
parent 6236f3489c
commit bcbe0c89c7

@ -58,9 +58,10 @@ psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \
psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \ psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \
psb_oacc_env_mod.o psb_oacc_env_mod.o
psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o\ psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o psb_oacc_env_mod.o
psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_oacc_env_mod.o
psb_i_oacc_vect_mod.o: psb_oacc_env_mod.o
psb_s_oacc_csr_mat_mod.o psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o: psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o: psb_s_oacc_vect_mod.o

@ -18,7 +18,7 @@ contains
m = a%get_nrows() m = a%get_nrows()
n = a%get_ncols() n = a%get_ncols()
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -19,7 +19,7 @@ contains
n = a%get_ncols() n = a%get_ncols()
nzt = a%nzt nzt = a%nzt
nc = size(a%ja,2) nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -20,7 +20,7 @@ contains
nhacks = size(a%hkoffs) - 1 nhacks = size(a%hkoffs) - 1
hksz = a%hksz hksz = a%hksz
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v) write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -18,7 +18,7 @@ contains
m = a%get_nrows() m = a%get_nrows()
n = a%get_ncols() n = a%get_ncols()
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -19,7 +19,7 @@ contains
n = a%get_ncols() n = a%get_ncols()
nzt = a%nzt nzt = a%nzt
nc = size(a%ja,2) nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -20,7 +20,7 @@ contains
nhacks = size(a%hkoffs) - 1 nhacks = size(a%hkoffs) - 1
hksz = a%hksz hksz = a%hksz
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v) write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -18,7 +18,7 @@ contains
m = a%get_nrows() m = a%get_nrows()
n = a%get_ncols() n = a%get_ncols()
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -19,7 +19,7 @@ contains
n = a%get_ncols() n = a%get_ncols()
nzt = a%nzt nzt = a%nzt
nc = size(a%ja,2) nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -20,7 +20,7 @@ contains
nhacks = size(a%hkoffs) - 1 nhacks = size(a%hkoffs) - 1
hksz = a%hksz hksz = a%hksz
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v) write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -18,7 +18,7 @@ contains
m = a%get_nrows() m = a%get_nrows()
n = a%get_ncols() n = a%get_ncols()
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -19,7 +19,7 @@ contains
n = a%get_ncols() n = a%get_ncols()
nzt = a%nzt nzt = a%nzt
nc = size(a%ja,2) nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -20,7 +20,7 @@ contains
nhacks = size(a%hkoffs) - 1 nhacks = size(a%hkoffs) - 1
hksz = a%hksz hksz = a%hksz
if ((n /= size(x%v)) .or. (m /= size(y%v))) then if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v) write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_ info = psb_err_invalid_mat_state_
return return

@ -3,6 +3,8 @@ module psb_c_oacc_vect_mod
use openacc use openacc
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
use psb_realloc_mod
use psb_oacc_env_mod
use psb_c_vect_mod use psb_c_vect_mod
use psb_i_vect_mod use psb_i_vect_mod
use psb_i_oacc_vect_mod use psb_i_oacc_vect_mod
@ -26,6 +28,8 @@ module psb_c_oacc_vect_mod
procedure, pass(x) :: bld_x => c_oacc_bld_x procedure, pass(x) :: bld_x => c_oacc_bld_x
procedure, pass(x) :: bld_mn => c_oacc_bld_mn procedure, pass(x) :: bld_mn => c_oacc_bld_mn
procedure, pass(x) :: free => c_oacc_vect_free procedure, pass(x) :: free => c_oacc_vect_free
procedure, pass(x) :: free_buffer => c_oacc_vect_free_buffer
procedure, pass(x) :: maybe_free_buffer => c_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => c_oacc_ins_a procedure, pass(x) :: ins_a => c_oacc_ins_a
procedure, pass(x) :: ins_v => c_oacc_ins_v procedure, pass(x) :: ins_v => c_oacc_ins_v
procedure, pass(x) :: is_host => c_oacc_is_host procedure, pass(x) :: is_host => c_oacc_is_host
@ -36,11 +40,13 @@ module psb_c_oacc_vect_mod
procedure, pass(x) :: set_sync => c_oacc_set_sync procedure, pass(x) :: set_sync => c_oacc_set_sync
procedure, pass(x) :: set_scal => c_oacc_set_scal procedure, pass(x) :: set_scal => c_oacc_set_scal
procedure, pass(x) :: new_buffer => c_oacc_new_buffer
procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x
procedure, pass(x) :: gthzbuf_x => c_oacc_gthzbuf procedure, pass(x) :: gthzbuf => c_oacc_gthzbuf
procedure, pass(y) :: sctb => c_oacc_sctb procedure, pass(y) :: sctb => c_oacc_sctb
procedure, pass(y) :: sctb_x => c_oacc_sctb_x procedure, pass(y) :: sctb_x => c_oacc_sctb_x
procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf
procedure, nopass :: device_wait => c_oacc_device_wait
procedure, pass(x) :: get_size => c_oacc_get_size procedure, pass(x) :: get_size => c_oacc_get_size
@ -87,6 +93,11 @@ module psb_c_oacc_vect_mod
contains contains
subroutine c_oacc_device_wait()
implicit none
call acc_wait_all()
end subroutine c_oacc_device_wait
subroutine c_oacc_absval1(x) subroutine c_oacc_absval1(x)
implicit none implicit none
class(psb_c_vect_oacc), intent(inout) :: x class(psb_c_vect_oacc), intent(inout) :: x
@ -182,12 +193,16 @@ contains
do i = 1, n do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i)) if (abs(x(i)) > mx) mx = abs(x(i))
end do end do
if (mx == szero) then
res = mx
else
sum = szero sum = szero
!$acc parallel loop reduction(+:sum) !$acc parallel loop reduction(+:sum)
do i = 1, n do i = 1, n
sum = sum + abs(x(i)/mx)**2 sum = sum + abs(x(i)/mx)**2
end do end do
res = mx*sqrt(sum) res = mx*sqrt(sum)
end if
end function c_inner_oacc_nrm2 end function c_inner_oacc_nrm2
end function c_oacc_nrm2 end function c_oacc_nrm2
@ -398,29 +413,44 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
complex(psb_spk_) :: beta complex(psb_spk_) :: beta
class(psb_c_vect_oacc) :: y class(psb_c_vect_oacc) :: y
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return return
end if end if
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
!$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
end select
!$acc parallel loop if (.not.acc_done) then
do i = 1, n if (idx%is_dev()) call idx%sync()
y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do end do
end if
class default contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
end select !$acc end parallel loop
end subroutine inner_sctb
end subroutine c_oacc_sctb_buf end subroutine c_oacc_sctb_buf
subroutine c_oacc_sctb_x(i, n, idx, x, beta, y) subroutine c_oacc_sctb_x(i, n, idx, x, beta, y)
@ -430,24 +460,41 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
complex(psb_spk_) :: beta, x(:) complex(psb_spk_) :: beta, x(:)
class(psb_c_vect_oacc) :: y class(psb_c_vect_oacc) :: y
integer(psb_ipk_) :: info, ni integer(psb_ipk_) :: info, ni, k
logical :: acc_done
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'c_oacc_sctb_x')
return
end select
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
if (acc_is_present(x)) then
call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
acc_done = .true.
call y%set_dev()
end if
end select
if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
end do
call y%set_host()
end if
contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
!$acc end parallel loop
end subroutine inner_sctb
call y%set_dev()
end subroutine c_oacc_sctb_x end subroutine c_oacc_sctb_x
subroutine c_oacc_sctb(n, idx, x, beta, y) subroutine c_oacc_sctb(n, idx, x, beta, y)
@ -463,7 +510,6 @@ contains
if (n == 0) return if (n == 0) return
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i) y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do end do
@ -477,28 +523,46 @@ contains
integer(psb_ipk_) :: i, n integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
class(psb_c_vect_oacc) :: x class(psb_c_vect_oacc) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info,k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
if (.not.allocated(x%combuf)) then if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return return
end if end if
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default if (x%is_host()) call x%sync()
call psb_errpush(info, 'c_oacc_gthzbuf') call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
return acc_done = .true.
end select end select
if (x%is_host()) call x%sync() if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (x%is_dev()) call x%sync()
do k = 1, n
x%combuf(k+i-1) = x%v(idx%v(k+i-1))
end do
end if
!$acc parallel loop contains
do i = 1, n subroutine inner_gth(n,x,y,idx)
x%combuf(i) = x%v(idx%v(i)) integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine c_oacc_gthzbuf end subroutine c_oacc_gthzbuf
subroutine c_oacc_gthzv_x(i, n, idx, x, y) subroutine c_oacc_gthzv_x(i, n, idx, x, y)
@ -508,24 +572,41 @@ contains
class(psb_i_base_vect_type):: idx class(psb_i_base_vect_type):: idx
complex(psb_spk_) :: y(:) complex(psb_spk_) :: y(:)
class(psb_c_vect_oacc):: x class(psb_c_vect_oacc):: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'c_oacc_gthzv_x')
return
end select
if (x%is_host()) call x%sync() if (x%is_host()) call x%sync()
if (acc_is_present(y)) then
!$acc parallel loop call inner_gth(n,x%v,y(i:),ii%v(i:))
do i = 1, n acc_done=.true.
y(i) = x%v(idx%v(i)) end if
end select
if (.not.acc_done) then
if (x%is_dev()) call x%sync()
if (idx%is_dev()) call idx%sync()
do k = 1, n
y(k+i-1) = x%v(idx%v(k+i-1))
!write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
end do end do
end if
contains
subroutine inner_gth(n,x,y,idx)
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine c_oacc_gthzv_x end subroutine c_oacc_gthzv_x
subroutine c_oacc_ins_v(n, irl, val, dupl, x, info) subroutine c_oacc_ins_v(n, irl, val, dupl, x, info)
@ -718,7 +799,7 @@ contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
res = czero res = czero
!write(0,*) 'dot_v' !!$ write(0,*) 'oacc_dot_v'
select type(yy => y) select type(yy => y)
type is (psb_c_base_vect_type) type is (psb_c_base_vect_type)
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
@ -762,6 +843,17 @@ contains
end function c_oacc_dot_a end function c_oacc_dot_a
subroutine c_oacc_new_buffer(n,x,info)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
call x%psb_c_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
end if
end subroutine c_oacc_new_buffer
subroutine c_oacc_sync_dev_space(x) subroutine c_oacc_sync_dev_space(x)
implicit none implicit none
class(psb_c_vect_oacc), intent(inout) :: x class(psb_c_vect_oacc), intent(inout) :: x
@ -860,12 +952,33 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
info = 0 info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v) if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info) if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
end if call x%psb_c_base_vect_type%free(info)
end subroutine c_oacc_vect_free end subroutine c_oacc_vect_free
subroutine c_oacc_vect_maybe_free_buffer(x,info)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
end subroutine c_oacc_vect_maybe_free_buffer
subroutine c_oacc_vect_free_buffer(x,info)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_c_base_vect_type%free_buffer(info)
end subroutine c_oacc_vect_free_buffer
function c_oacc_get_size(x) result(res) function c_oacc_get_size(x) result(res)
implicit none implicit none
class(psb_c_vect_oacc), intent(inout) :: x class(psb_c_vect_oacc), intent(inout) :: x

@ -3,6 +3,8 @@ module psb_d_oacc_vect_mod
use openacc use openacc
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
use psb_realloc_mod
use psb_oacc_env_mod
use psb_d_vect_mod use psb_d_vect_mod
use psb_i_vect_mod use psb_i_vect_mod
use psb_i_oacc_vect_mod use psb_i_oacc_vect_mod
@ -26,6 +28,8 @@ module psb_d_oacc_vect_mod
procedure, pass(x) :: bld_x => d_oacc_bld_x procedure, pass(x) :: bld_x => d_oacc_bld_x
procedure, pass(x) :: bld_mn => d_oacc_bld_mn procedure, pass(x) :: bld_mn => d_oacc_bld_mn
procedure, pass(x) :: free => d_oacc_vect_free procedure, pass(x) :: free => d_oacc_vect_free
procedure, pass(x) :: free_buffer => d_oacc_vect_free_buffer
procedure, pass(x) :: maybe_free_buffer => d_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => d_oacc_ins_a procedure, pass(x) :: ins_a => d_oacc_ins_a
procedure, pass(x) :: ins_v => d_oacc_ins_v procedure, pass(x) :: ins_v => d_oacc_ins_v
procedure, pass(x) :: is_host => d_oacc_is_host procedure, pass(x) :: is_host => d_oacc_is_host
@ -36,11 +40,13 @@ module psb_d_oacc_vect_mod
procedure, pass(x) :: set_sync => d_oacc_set_sync procedure, pass(x) :: set_sync => d_oacc_set_sync
procedure, pass(x) :: set_scal => d_oacc_set_scal procedure, pass(x) :: set_scal => d_oacc_set_scal
procedure, pass(x) :: new_buffer => d_oacc_new_buffer
procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf procedure, pass(x) :: gthzbuf => d_oacc_gthzbuf
procedure, pass(y) :: sctb => d_oacc_sctb procedure, pass(y) :: sctb => d_oacc_sctb
procedure, pass(y) :: sctb_x => d_oacc_sctb_x procedure, pass(y) :: sctb_x => d_oacc_sctb_x
procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
procedure, nopass :: device_wait => d_oacc_device_wait
procedure, pass(x) :: get_size => d_oacc_get_size procedure, pass(x) :: get_size => d_oacc_get_size
@ -87,6 +93,11 @@ module psb_d_oacc_vect_mod
contains contains
subroutine d_oacc_device_wait()
implicit none
call acc_wait_all()
end subroutine d_oacc_device_wait
subroutine d_oacc_absval1(x) subroutine d_oacc_absval1(x)
implicit none implicit none
class(psb_d_vect_oacc), intent(inout) :: x class(psb_d_vect_oacc), intent(inout) :: x
@ -182,12 +193,16 @@ contains
do i = 1, n do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i)) if (abs(x(i)) > mx) mx = abs(x(i))
end do end do
if (mx == dzero) then
res = mx
else
sum = dzero sum = dzero
!$acc parallel loop reduction(+:sum) !$acc parallel loop reduction(+:sum)
do i = 1, n do i = 1, n
sum = sum + abs(x(i)/mx)**2 sum = sum + abs(x(i)/mx)**2
end do end do
res = mx*sqrt(sum) res = mx*sqrt(sum)
end if
end function d_inner_oacc_nrm2 end function d_inner_oacc_nrm2
end function d_oacc_nrm2 end function d_oacc_nrm2
@ -398,29 +413,44 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
real(psb_dpk_) :: beta real(psb_dpk_) :: beta
class(psb_d_vect_oacc) :: y class(psb_d_vect_oacc) :: y
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return return
end if end if
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
!$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
end select
!$acc parallel loop if (.not.acc_done) then
do i = 1, n if (idx%is_dev()) call idx%sync()
y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do end do
end if
class default contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
end select !$acc end parallel loop
end subroutine inner_sctb
end subroutine d_oacc_sctb_buf end subroutine d_oacc_sctb_buf
subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) subroutine d_oacc_sctb_x(i, n, idx, x, beta, y)
@ -430,24 +460,41 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
real(psb_dpk_) :: beta, x(:) real(psb_dpk_) :: beta, x(:)
class(psb_d_vect_oacc) :: y class(psb_d_vect_oacc) :: y
integer(psb_ipk_) :: info, ni integer(psb_ipk_) :: info, ni, k
logical :: acc_done
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'd_oacc_sctb_x')
return
end select
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
if (acc_is_present(x)) then
call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
acc_done = .true.
call y%set_dev()
end if
end select
if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
end do
call y%set_host()
end if
contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
!$acc end parallel loop
end subroutine inner_sctb
call y%set_dev()
end subroutine d_oacc_sctb_x end subroutine d_oacc_sctb_x
subroutine d_oacc_sctb(n, idx, x, beta, y) subroutine d_oacc_sctb(n, idx, x, beta, y)
@ -463,7 +510,6 @@ contains
if (n == 0) return if (n == 0) return
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i) y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do end do
@ -477,28 +523,46 @@ contains
integer(psb_ipk_) :: i, n integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
class(psb_d_vect_oacc) :: x class(psb_d_vect_oacc) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info,k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
if (.not.allocated(x%combuf)) then if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return return
end if end if
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default if (x%is_host()) call x%sync()
call psb_errpush(info, 'd_oacc_gthzbuf') call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
return acc_done = .true.
end select end select
if (x%is_host()) call x%sync() if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (x%is_dev()) call x%sync()
do k = 1, n
x%combuf(k+i-1) = x%v(idx%v(k+i-1))
end do
end if
!$acc parallel loop contains
do i = 1, n subroutine inner_gth(n,x,y,idx)
x%combuf(i) = x%v(idx%v(i)) integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine d_oacc_gthzbuf end subroutine d_oacc_gthzbuf
subroutine d_oacc_gthzv_x(i, n, idx, x, y) subroutine d_oacc_gthzv_x(i, n, idx, x, y)
@ -508,24 +572,41 @@ contains
class(psb_i_base_vect_type):: idx class(psb_i_base_vect_type):: idx
real(psb_dpk_) :: y(:) real(psb_dpk_) :: y(:)
class(psb_d_vect_oacc):: x class(psb_d_vect_oacc):: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'd_oacc_gthzv_x')
return
end select
if (x%is_host()) call x%sync() if (x%is_host()) call x%sync()
if (acc_is_present(y)) then
!$acc parallel loop call inner_gth(n,x%v,y(i:),ii%v(i:))
do i = 1, n acc_done=.true.
y(i) = x%v(idx%v(i)) end if
end select
if (.not.acc_done) then
if (x%is_dev()) call x%sync()
if (idx%is_dev()) call idx%sync()
do k = 1, n
y(k+i-1) = x%v(idx%v(k+i-1))
!write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
end do end do
end if
contains
subroutine inner_gth(n,x,y,idx)
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine d_oacc_gthzv_x end subroutine d_oacc_gthzv_x
subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) subroutine d_oacc_ins_v(n, irl, val, dupl, x, info)
@ -718,7 +799,7 @@ contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
res = dzero res = dzero
!write(0,*) 'dot_v' !!$ write(0,*) 'oacc_dot_v'
select type(yy => y) select type(yy => y)
type is (psb_d_base_vect_type) type is (psb_d_base_vect_type)
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
@ -762,6 +843,17 @@ contains
end function d_oacc_dot_a end function d_oacc_dot_a
subroutine d_oacc_new_buffer(n,x,info)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
call x%psb_d_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
end if
end subroutine d_oacc_new_buffer
subroutine d_oacc_sync_dev_space(x) subroutine d_oacc_sync_dev_space(x)
implicit none implicit none
class(psb_d_vect_oacc), intent(inout) :: x class(psb_d_vect_oacc), intent(inout) :: x
@ -860,12 +952,33 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
info = 0 info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v) if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info) if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
end if call x%psb_d_base_vect_type%free(info)
end subroutine d_oacc_vect_free end subroutine d_oacc_vect_free
subroutine d_oacc_vect_maybe_free_buffer(x,info)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
end subroutine d_oacc_vect_maybe_free_buffer
subroutine d_oacc_vect_free_buffer(x,info)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_d_base_vect_type%free_buffer(info)
end subroutine d_oacc_vect_free_buffer
function d_oacc_get_size(x) result(res) function d_oacc_get_size(x) result(res)
implicit none implicit none
class(psb_d_vect_oacc), intent(inout) :: x class(psb_d_vect_oacc), intent(inout) :: x

@ -3,6 +3,8 @@ module psb_i_oacc_vect_mod
use openacc use openacc
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
use psb_realloc_mod
use psb_oacc_env_mod
use psb_i_vect_mod use psb_i_vect_mod
integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_host = -1
@ -24,6 +26,8 @@ module psb_i_oacc_vect_mod
procedure, pass(x) :: bld_x => i_oacc_bld_x procedure, pass(x) :: bld_x => i_oacc_bld_x
procedure, pass(x) :: bld_mn => i_oacc_bld_mn procedure, pass(x) :: bld_mn => i_oacc_bld_mn
procedure, pass(x) :: free => i_oacc_vect_free procedure, pass(x) :: free => i_oacc_vect_free
procedure, pass(x) :: free_buffer => i_oacc_vect_free_buffer
procedure, pass(x) :: maybe_free_buffer => i_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => i_oacc_ins_a procedure, pass(x) :: ins_a => i_oacc_ins_a
procedure, pass(x) :: ins_v => i_oacc_ins_v procedure, pass(x) :: ins_v => i_oacc_ins_v
procedure, pass(x) :: is_host => i_oacc_is_host procedure, pass(x) :: is_host => i_oacc_is_host
@ -34,11 +38,13 @@ module psb_i_oacc_vect_mod
procedure, pass(x) :: set_sync => i_oacc_set_sync procedure, pass(x) :: set_sync => i_oacc_set_sync
procedure, pass(x) :: set_scal => i_oacc_set_scal procedure, pass(x) :: set_scal => i_oacc_set_scal
procedure, pass(x) :: new_buffer => i_oacc_new_buffer
procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x
procedure, pass(x) :: gthzbuf_x => i_oacc_gthzbuf procedure, pass(x) :: gthzbuf => i_oacc_gthzbuf
procedure, pass(y) :: sctb => i_oacc_sctb procedure, pass(y) :: sctb => i_oacc_sctb
procedure, pass(y) :: sctb_x => i_oacc_sctb_x procedure, pass(y) :: sctb_x => i_oacc_sctb_x
procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf
procedure, nopass :: device_wait => i_oacc_device_wait
procedure, pass(x) :: get_size => i_oacc_get_size procedure, pass(x) :: get_size => i_oacc_get_size
@ -48,6 +54,11 @@ module psb_i_oacc_vect_mod
contains contains
subroutine i_oacc_device_wait()
implicit none
call acc_wait_all()
end subroutine i_oacc_device_wait
subroutine i_oacc_sctb_buf(i, n, idx, beta, y) subroutine i_oacc_sctb_buf(i, n, idx, beta, y)
use psb_base_mod use psb_base_mod
@ -56,29 +67,44 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
integer(psb_ipk_) :: beta integer(psb_ipk_) :: beta
class(psb_i_vect_oacc) :: y class(psb_i_vect_oacc) :: y
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return return
end if end if
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
!$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
end select
!$acc parallel loop if (.not.acc_done) then
do i = 1, n if (idx%is_dev()) call idx%sync()
y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do end do
end if
class default contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
end select !$acc end parallel loop
end subroutine inner_sctb
end subroutine i_oacc_sctb_buf end subroutine i_oacc_sctb_buf
subroutine i_oacc_sctb_x(i, n, idx, x, beta, y) subroutine i_oacc_sctb_x(i, n, idx, x, beta, y)
@ -88,24 +114,41 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
integer(psb_ipk_) :: beta, x(:) integer(psb_ipk_) :: beta, x(:)
class(psb_i_vect_oacc) :: y class(psb_i_vect_oacc) :: y
integer(psb_ipk_) :: info, ni integer(psb_ipk_) :: info, ni, k
logical :: acc_done
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'i_oacc_sctb_x')
return
end select
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
if (acc_is_present(x)) then
call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
acc_done = .true.
call y%set_dev()
end if
end select
if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
end do
call y%set_host()
end if
contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
!$acc end parallel loop
end subroutine inner_sctb
call y%set_dev()
end subroutine i_oacc_sctb_x end subroutine i_oacc_sctb_x
subroutine i_oacc_sctb(n, idx, x, beta, y) subroutine i_oacc_sctb(n, idx, x, beta, y)
@ -121,7 +164,6 @@ contains
if (n == 0) return if (n == 0) return
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i) y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do end do
@ -135,28 +177,46 @@ contains
integer(psb_ipk_) :: i, n integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
class(psb_i_vect_oacc) :: x class(psb_i_vect_oacc) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info,k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
if (.not.allocated(x%combuf)) then if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return return
end if end if
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default if (x%is_host()) call x%sync()
call psb_errpush(info, 'i_oacc_gthzbuf') call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
return acc_done = .true.
end select end select
if (x%is_host()) call x%sync() if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (x%is_dev()) call x%sync()
do k = 1, n
x%combuf(k+i-1) = x%v(idx%v(k+i-1))
end do
end if
!$acc parallel loop contains
do i = 1, n subroutine inner_gth(n,x,y,idx)
x%combuf(i) = x%v(idx%v(i)) integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine i_oacc_gthzbuf end subroutine i_oacc_gthzbuf
subroutine i_oacc_gthzv_x(i, n, idx, x, y) subroutine i_oacc_gthzv_x(i, n, idx, x, y)
@ -166,24 +226,41 @@ contains
class(psb_i_base_vect_type):: idx class(psb_i_base_vect_type):: idx
integer(psb_ipk_) :: y(:) integer(psb_ipk_) :: y(:)
class(psb_i_vect_oacc):: x class(psb_i_vect_oacc):: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'i_oacc_gthzv_x')
return
end select
if (x%is_host()) call x%sync() if (x%is_host()) call x%sync()
if (acc_is_present(y)) then
!$acc parallel loop call inner_gth(n,x%v,y(i:),ii%v(i:))
do i = 1, n acc_done=.true.
y(i) = x%v(idx%v(i)) end if
end select
if (.not.acc_done) then
if (x%is_dev()) call x%sync()
if (idx%is_dev()) call idx%sync()
do k = 1, n
y(k+i-1) = x%v(idx%v(k+i-1))
!write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
end do
end if
contains
subroutine inner_gth(n,x,y,idx)
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine i_oacc_gthzv_x end subroutine i_oacc_gthzv_x
subroutine i_oacc_ins_v(n, irl, val, dupl, x, info) subroutine i_oacc_ins_v(n, irl, val, dupl, x, info)
@ -366,6 +443,17 @@ contains
end function i_oacc_get_fmt end function i_oacc_get_fmt
subroutine i_oacc_new_buffer(n,x,info)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
call x%psb_i_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
end if
end subroutine i_oacc_new_buffer
subroutine i_oacc_sync_dev_space(x) subroutine i_oacc_sync_dev_space(x)
implicit none implicit none
class(psb_i_vect_oacc), intent(inout) :: x class(psb_i_vect_oacc), intent(inout) :: x
@ -464,12 +552,33 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
info = 0 info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v) if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info) if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
end if call x%psb_i_base_vect_type%free(info)
end subroutine i_oacc_vect_free end subroutine i_oacc_vect_free
subroutine i_oacc_vect_maybe_free_buffer(x,info)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
end subroutine i_oacc_vect_maybe_free_buffer
subroutine i_oacc_vect_free_buffer(x,info)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_i_base_vect_type%free_buffer(info)
end subroutine i_oacc_vect_free_buffer
function i_oacc_get_size(x) result(res) function i_oacc_get_size(x) result(res)
implicit none implicit none
class(psb_i_vect_oacc), intent(inout) :: x class(psb_i_vect_oacc), intent(inout) :: x

@ -3,6 +3,8 @@ module psb_l_oacc_vect_mod
use openacc use openacc
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
use psb_realloc_mod
use psb_oacc_env_mod
use psb_l_vect_mod use psb_l_vect_mod
use psb_i_vect_mod use psb_i_vect_mod
use psb_i_oacc_vect_mod use psb_i_oacc_vect_mod
@ -26,6 +28,8 @@ module psb_l_oacc_vect_mod
procedure, pass(x) :: bld_x => l_oacc_bld_x procedure, pass(x) :: bld_x => l_oacc_bld_x
procedure, pass(x) :: bld_mn => l_oacc_bld_mn procedure, pass(x) :: bld_mn => l_oacc_bld_mn
procedure, pass(x) :: free => l_oacc_vect_free procedure, pass(x) :: free => l_oacc_vect_free
procedure, pass(x) :: free_buffer => l_oacc_vect_free_buffer
procedure, pass(x) :: maybe_free_buffer => l_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => l_oacc_ins_a procedure, pass(x) :: ins_a => l_oacc_ins_a
procedure, pass(x) :: ins_v => l_oacc_ins_v procedure, pass(x) :: ins_v => l_oacc_ins_v
procedure, pass(x) :: is_host => l_oacc_is_host procedure, pass(x) :: is_host => l_oacc_is_host
@ -36,11 +40,13 @@ module psb_l_oacc_vect_mod
procedure, pass(x) :: set_sync => l_oacc_set_sync procedure, pass(x) :: set_sync => l_oacc_set_sync
procedure, pass(x) :: set_scal => l_oacc_set_scal procedure, pass(x) :: set_scal => l_oacc_set_scal
procedure, pass(x) :: new_buffer => l_oacc_new_buffer
procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x
procedure, pass(x) :: gthzbuf_x => l_oacc_gthzbuf procedure, pass(x) :: gthzbuf => l_oacc_gthzbuf
procedure, pass(y) :: sctb => l_oacc_sctb procedure, pass(y) :: sctb => l_oacc_sctb
procedure, pass(y) :: sctb_x => l_oacc_sctb_x procedure, pass(y) :: sctb_x => l_oacc_sctb_x
procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf
procedure, nopass :: device_wait => l_oacc_device_wait
procedure, pass(x) :: get_size => l_oacc_get_size procedure, pass(x) :: get_size => l_oacc_get_size
@ -50,6 +56,11 @@ module psb_l_oacc_vect_mod
contains contains
subroutine l_oacc_device_wait()
implicit none
call acc_wait_all()
end subroutine l_oacc_device_wait
subroutine l_oacc_sctb_buf(i, n, idx, beta, y) subroutine l_oacc_sctb_buf(i, n, idx, beta, y)
use psb_base_mod use psb_base_mod
@ -58,29 +69,44 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
integer(psb_lpk_) :: beta integer(psb_lpk_) :: beta
class(psb_l_vect_oacc) :: y class(psb_l_vect_oacc) :: y
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return return
end if end if
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
!$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
end select
!$acc parallel loop if (.not.acc_done) then
do i = 1, n if (idx%is_dev()) call idx%sync()
y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do end do
end if
class default contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
end select !$acc end parallel loop
end subroutine inner_sctb
end subroutine l_oacc_sctb_buf end subroutine l_oacc_sctb_buf
subroutine l_oacc_sctb_x(i, n, idx, x, beta, y) subroutine l_oacc_sctb_x(i, n, idx, x, beta, y)
@ -90,24 +116,41 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
integer(psb_lpk_) :: beta, x(:) integer(psb_lpk_) :: beta, x(:)
class(psb_l_vect_oacc) :: y class(psb_l_vect_oacc) :: y
integer(psb_ipk_) :: info, ni integer(psb_ipk_) :: info, ni, k
logical :: acc_done
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'l_oacc_sctb_x')
return
end select
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
if (acc_is_present(x)) then
call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
acc_done = .true.
call y%set_dev()
end if
end select
if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
end do
call y%set_host()
end if
contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
!$acc end parallel loop
end subroutine inner_sctb
call y%set_dev()
end subroutine l_oacc_sctb_x end subroutine l_oacc_sctb_x
subroutine l_oacc_sctb(n, idx, x, beta, y) subroutine l_oacc_sctb(n, idx, x, beta, y)
@ -123,7 +166,6 @@ contains
if (n == 0) return if (n == 0) return
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i) y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do end do
@ -137,28 +179,46 @@ contains
integer(psb_ipk_) :: i, n integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
class(psb_l_vect_oacc) :: x class(psb_l_vect_oacc) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info,k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
if (.not.allocated(x%combuf)) then if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return return
end if end if
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default if (x%is_host()) call x%sync()
call psb_errpush(info, 'l_oacc_gthzbuf') call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
return acc_done = .true.
end select end select
if (x%is_host()) call x%sync() if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (x%is_dev()) call x%sync()
do k = 1, n
x%combuf(k+i-1) = x%v(idx%v(k+i-1))
end do
end if
!$acc parallel loop contains
do i = 1, n subroutine inner_gth(n,x,y,idx)
x%combuf(i) = x%v(idx%v(i)) integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine l_oacc_gthzbuf end subroutine l_oacc_gthzbuf
subroutine l_oacc_gthzv_x(i, n, idx, x, y) subroutine l_oacc_gthzv_x(i, n, idx, x, y)
@ -168,24 +228,41 @@ contains
class(psb_i_base_vect_type):: idx class(psb_i_base_vect_type):: idx
integer(psb_lpk_) :: y(:) integer(psb_lpk_) :: y(:)
class(psb_l_vect_oacc):: x class(psb_l_vect_oacc):: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'l_oacc_gthzv_x')
return
end select
if (x%is_host()) call x%sync() if (x%is_host()) call x%sync()
if (acc_is_present(y)) then
!$acc parallel loop call inner_gth(n,x%v,y(i:),ii%v(i:))
do i = 1, n acc_done=.true.
y(i) = x%v(idx%v(i)) end if
end select
if (.not.acc_done) then
if (x%is_dev()) call x%sync()
if (idx%is_dev()) call idx%sync()
do k = 1, n
y(k+i-1) = x%v(idx%v(k+i-1))
!write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
end do
end if
contains
subroutine inner_gth(n,x,y,idx)
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine l_oacc_gthzv_x end subroutine l_oacc_gthzv_x
subroutine l_oacc_ins_v(n, irl, val, dupl, x, info) subroutine l_oacc_ins_v(n, irl, val, dupl, x, info)
@ -368,6 +445,17 @@ contains
end function l_oacc_get_fmt end function l_oacc_get_fmt
subroutine l_oacc_new_buffer(n,x,info)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
call x%psb_l_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
end if
end subroutine l_oacc_new_buffer
subroutine l_oacc_sync_dev_space(x) subroutine l_oacc_sync_dev_space(x)
implicit none implicit none
class(psb_l_vect_oacc), intent(inout) :: x class(psb_l_vect_oacc), intent(inout) :: x
@ -466,12 +554,33 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
info = 0 info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v) if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info) if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
end if call x%psb_l_base_vect_type%free(info)
end subroutine l_oacc_vect_free end subroutine l_oacc_vect_free
subroutine l_oacc_vect_maybe_free_buffer(x,info)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
end subroutine l_oacc_vect_maybe_free_buffer
subroutine l_oacc_vect_free_buffer(x,info)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_l_base_vect_type%free_buffer(info)
end subroutine l_oacc_vect_free_buffer
function l_oacc_get_size(x) result(res) function l_oacc_get_size(x) result(res)
implicit none implicit none
class(psb_l_vect_oacc), intent(inout) :: x class(psb_l_vect_oacc), intent(inout) :: x

@ -1,10 +1,21 @@
module psb_oacc_env_mod module psb_oacc_env_mod
contains
subroutine psb_oacc_init(ctxt, dev)
use psb_penv_mod use psb_penv_mod
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
logical, private :: oacc_do_maybe_free_buffer = .false.
contains
function psb_oacc_get_maybe_free_buffer() result(res)
logical :: res
res = oacc_do_maybe_free_buffer
end function psb_oacc_get_maybe_free_buffer
subroutine psb_oacc_set_maybe_free_buffer(val)
logical, intent(in) :: val
oacc_do_maybe_free_buffer = val
end subroutine psb_oacc_set_maybe_free_buffer
subroutine psb_oacc_init(ctxt, dev)
type(psb_ctxt_type), intent(in) :: ctxt type(psb_ctxt_type), intent(in) :: ctxt
integer, intent(in), optional :: dev integer, intent(in), optional :: dev

@ -3,6 +3,8 @@ module psb_s_oacc_vect_mod
use openacc use openacc
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
use psb_realloc_mod
use psb_oacc_env_mod
use psb_s_vect_mod use psb_s_vect_mod
use psb_i_vect_mod use psb_i_vect_mod
use psb_i_oacc_vect_mod use psb_i_oacc_vect_mod
@ -26,6 +28,8 @@ module psb_s_oacc_vect_mod
procedure, pass(x) :: bld_x => s_oacc_bld_x procedure, pass(x) :: bld_x => s_oacc_bld_x
procedure, pass(x) :: bld_mn => s_oacc_bld_mn procedure, pass(x) :: bld_mn => s_oacc_bld_mn
procedure, pass(x) :: free => s_oacc_vect_free procedure, pass(x) :: free => s_oacc_vect_free
procedure, pass(x) :: free_buffer => s_oacc_vect_free_buffer
procedure, pass(x) :: maybe_free_buffer => s_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => s_oacc_ins_a procedure, pass(x) :: ins_a => s_oacc_ins_a
procedure, pass(x) :: ins_v => s_oacc_ins_v procedure, pass(x) :: ins_v => s_oacc_ins_v
procedure, pass(x) :: is_host => s_oacc_is_host procedure, pass(x) :: is_host => s_oacc_is_host
@ -36,11 +40,13 @@ module psb_s_oacc_vect_mod
procedure, pass(x) :: set_sync => s_oacc_set_sync procedure, pass(x) :: set_sync => s_oacc_set_sync
procedure, pass(x) :: set_scal => s_oacc_set_scal procedure, pass(x) :: set_scal => s_oacc_set_scal
procedure, pass(x) :: new_buffer => s_oacc_new_buffer
procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x
procedure, pass(x) :: gthzbuf_x => s_oacc_gthzbuf procedure, pass(x) :: gthzbuf => s_oacc_gthzbuf
procedure, pass(y) :: sctb => s_oacc_sctb procedure, pass(y) :: sctb => s_oacc_sctb
procedure, pass(y) :: sctb_x => s_oacc_sctb_x procedure, pass(y) :: sctb_x => s_oacc_sctb_x
procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf
procedure, nopass :: device_wait => s_oacc_device_wait
procedure, pass(x) :: get_size => s_oacc_get_size procedure, pass(x) :: get_size => s_oacc_get_size
@ -87,6 +93,11 @@ module psb_s_oacc_vect_mod
contains contains
subroutine s_oacc_device_wait()
implicit none
call acc_wait_all()
end subroutine s_oacc_device_wait
subroutine s_oacc_absval1(x) subroutine s_oacc_absval1(x)
implicit none implicit none
class(psb_s_vect_oacc), intent(inout) :: x class(psb_s_vect_oacc), intent(inout) :: x
@ -182,12 +193,16 @@ contains
do i = 1, n do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i)) if (abs(x(i)) > mx) mx = abs(x(i))
end do end do
if (mx == szero) then
res = mx
else
sum = szero sum = szero
!$acc parallel loop reduction(+:sum) !$acc parallel loop reduction(+:sum)
do i = 1, n do i = 1, n
sum = sum + abs(x(i)/mx)**2 sum = sum + abs(x(i)/mx)**2
end do end do
res = mx*sqrt(sum) res = mx*sqrt(sum)
end if
end function s_inner_oacc_nrm2 end function s_inner_oacc_nrm2
end function s_oacc_nrm2 end function s_oacc_nrm2
@ -398,29 +413,44 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
real(psb_spk_) :: beta real(psb_spk_) :: beta
class(psb_s_vect_oacc) :: y class(psb_s_vect_oacc) :: y
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return return
end if end if
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
!$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
end select
!$acc parallel loop if (.not.acc_done) then
do i = 1, n if (idx%is_dev()) call idx%sync()
y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do end do
end if
class default contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
end select !$acc end parallel loop
end subroutine inner_sctb
end subroutine s_oacc_sctb_buf end subroutine s_oacc_sctb_buf
subroutine s_oacc_sctb_x(i, n, idx, x, beta, y) subroutine s_oacc_sctb_x(i, n, idx, x, beta, y)
@ -430,24 +460,41 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
real(psb_spk_) :: beta, x(:) real(psb_spk_) :: beta, x(:)
class(psb_s_vect_oacc) :: y class(psb_s_vect_oacc) :: y
integer(psb_ipk_) :: info, ni integer(psb_ipk_) :: info, ni, k
logical :: acc_done
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 's_oacc_sctb_x')
return
end select
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
if (acc_is_present(x)) then
call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
acc_done = .true.
call y%set_dev()
end if
end select
if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
end do
call y%set_host()
end if
contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
!$acc end parallel loop
end subroutine inner_sctb
call y%set_dev()
end subroutine s_oacc_sctb_x end subroutine s_oacc_sctb_x
subroutine s_oacc_sctb(n, idx, x, beta, y) subroutine s_oacc_sctb(n, idx, x, beta, y)
@ -463,7 +510,6 @@ contains
if (n == 0) return if (n == 0) return
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i) y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do end do
@ -477,28 +523,46 @@ contains
integer(psb_ipk_) :: i, n integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
class(psb_s_vect_oacc) :: x class(psb_s_vect_oacc) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info,k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
if (.not.allocated(x%combuf)) then if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return return
end if end if
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default if (x%is_host()) call x%sync()
call psb_errpush(info, 's_oacc_gthzbuf') call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
return acc_done = .true.
end select end select
if (x%is_host()) call x%sync() if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (x%is_dev()) call x%sync()
do k = 1, n
x%combuf(k+i-1) = x%v(idx%v(k+i-1))
end do
end if
!$acc parallel loop contains
do i = 1, n subroutine inner_gth(n,x,y,idx)
x%combuf(i) = x%v(idx%v(i)) integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine s_oacc_gthzbuf end subroutine s_oacc_gthzbuf
subroutine s_oacc_gthzv_x(i, n, idx, x, y) subroutine s_oacc_gthzv_x(i, n, idx, x, y)
@ -508,24 +572,41 @@ contains
class(psb_i_base_vect_type):: idx class(psb_i_base_vect_type):: idx
real(psb_spk_) :: y(:) real(psb_spk_) :: y(:)
class(psb_s_vect_oacc):: x class(psb_s_vect_oacc):: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 's_oacc_gthzv_x')
return
end select
if (x%is_host()) call x%sync() if (x%is_host()) call x%sync()
if (acc_is_present(y)) then
!$acc parallel loop call inner_gth(n,x%v,y(i:),ii%v(i:))
do i = 1, n acc_done=.true.
y(i) = x%v(idx%v(i)) end if
end select
if (.not.acc_done) then
if (x%is_dev()) call x%sync()
if (idx%is_dev()) call idx%sync()
do k = 1, n
y(k+i-1) = x%v(idx%v(k+i-1))
!write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
end do end do
end if
contains
subroutine inner_gth(n,x,y,idx)
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine s_oacc_gthzv_x end subroutine s_oacc_gthzv_x
subroutine s_oacc_ins_v(n, irl, val, dupl, x, info) subroutine s_oacc_ins_v(n, irl, val, dupl, x, info)
@ -718,7 +799,7 @@ contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
res = szero res = szero
!write(0,*) 'dot_v' !!$ write(0,*) 'oacc_dot_v'
select type(yy => y) select type(yy => y)
type is (psb_s_base_vect_type) type is (psb_s_base_vect_type)
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
@ -762,6 +843,17 @@ contains
end function s_oacc_dot_a end function s_oacc_dot_a
subroutine s_oacc_new_buffer(n,x,info)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
call x%psb_s_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
end if
end subroutine s_oacc_new_buffer
subroutine s_oacc_sync_dev_space(x) subroutine s_oacc_sync_dev_space(x)
implicit none implicit none
class(psb_s_vect_oacc), intent(inout) :: x class(psb_s_vect_oacc), intent(inout) :: x
@ -860,12 +952,33 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
info = 0 info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v) if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info) if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
end if call x%psb_s_base_vect_type%free(info)
end subroutine s_oacc_vect_free end subroutine s_oacc_vect_free
subroutine s_oacc_vect_maybe_free_buffer(x,info)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
end subroutine s_oacc_vect_maybe_free_buffer
subroutine s_oacc_vect_free_buffer(x,info)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_s_base_vect_type%free_buffer(info)
end subroutine s_oacc_vect_free_buffer
function s_oacc_get_size(x) result(res) function s_oacc_get_size(x) result(res)
implicit none implicit none
class(psb_s_vect_oacc), intent(inout) :: x class(psb_s_vect_oacc), intent(inout) :: x

@ -3,6 +3,8 @@ module psb_z_oacc_vect_mod
use openacc use openacc
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
use psb_realloc_mod
use psb_oacc_env_mod
use psb_z_vect_mod use psb_z_vect_mod
use psb_i_vect_mod use psb_i_vect_mod
use psb_i_oacc_vect_mod use psb_i_oacc_vect_mod
@ -26,6 +28,8 @@ module psb_z_oacc_vect_mod
procedure, pass(x) :: bld_x => z_oacc_bld_x procedure, pass(x) :: bld_x => z_oacc_bld_x
procedure, pass(x) :: bld_mn => z_oacc_bld_mn procedure, pass(x) :: bld_mn => z_oacc_bld_mn
procedure, pass(x) :: free => z_oacc_vect_free procedure, pass(x) :: free => z_oacc_vect_free
procedure, pass(x) :: free_buffer => z_oacc_vect_free_buffer
procedure, pass(x) :: maybe_free_buffer => z_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => z_oacc_ins_a procedure, pass(x) :: ins_a => z_oacc_ins_a
procedure, pass(x) :: ins_v => z_oacc_ins_v procedure, pass(x) :: ins_v => z_oacc_ins_v
procedure, pass(x) :: is_host => z_oacc_is_host procedure, pass(x) :: is_host => z_oacc_is_host
@ -36,11 +40,13 @@ module psb_z_oacc_vect_mod
procedure, pass(x) :: set_sync => z_oacc_set_sync procedure, pass(x) :: set_sync => z_oacc_set_sync
procedure, pass(x) :: set_scal => z_oacc_set_scal procedure, pass(x) :: set_scal => z_oacc_set_scal
procedure, pass(x) :: new_buffer => z_oacc_new_buffer
procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x
procedure, pass(x) :: gthzbuf_x => z_oacc_gthzbuf procedure, pass(x) :: gthzbuf => z_oacc_gthzbuf
procedure, pass(y) :: sctb => z_oacc_sctb procedure, pass(y) :: sctb => z_oacc_sctb
procedure, pass(y) :: sctb_x => z_oacc_sctb_x procedure, pass(y) :: sctb_x => z_oacc_sctb_x
procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf
procedure, nopass :: device_wait => z_oacc_device_wait
procedure, pass(x) :: get_size => z_oacc_get_size procedure, pass(x) :: get_size => z_oacc_get_size
@ -87,6 +93,11 @@ module psb_z_oacc_vect_mod
contains contains
subroutine z_oacc_device_wait()
implicit none
call acc_wait_all()
end subroutine z_oacc_device_wait
subroutine z_oacc_absval1(x) subroutine z_oacc_absval1(x)
implicit none implicit none
class(psb_z_vect_oacc), intent(inout) :: x class(psb_z_vect_oacc), intent(inout) :: x
@ -182,12 +193,16 @@ contains
do i = 1, n do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i)) if (abs(x(i)) > mx) mx = abs(x(i))
end do end do
if (mx == dzero) then
res = mx
else
sum = dzero sum = dzero
!$acc parallel loop reduction(+:sum) !$acc parallel loop reduction(+:sum)
do i = 1, n do i = 1, n
sum = sum + abs(x(i)/mx)**2 sum = sum + abs(x(i)/mx)**2
end do end do
res = mx*sqrt(sum) res = mx*sqrt(sum)
end if
end function z_inner_oacc_nrm2 end function z_inner_oacc_nrm2
end function z_oacc_nrm2 end function z_oacc_nrm2
@ -398,29 +413,44 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
complex(psb_dpk_) :: beta complex(psb_dpk_) :: beta
class(psb_z_vect_oacc) :: y class(psb_z_vect_oacc) :: y
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return return
end if end if
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
!$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
end select
!$acc parallel loop if (.not.acc_done) then
do i = 1, n if (idx%is_dev()) call idx%sync()
y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do end do
end if
class default contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
end select !$acc end parallel loop
end subroutine inner_sctb
end subroutine z_oacc_sctb_buf end subroutine z_oacc_sctb_buf
subroutine z_oacc_sctb_x(i, n, idx, x, beta, y) subroutine z_oacc_sctb_x(i, n, idx, x, beta, y)
@ -430,24 +460,41 @@ contains
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
complex(psb_dpk_) :: beta, x(:) complex(psb_dpk_) :: beta, x(:)
class(psb_z_vect_oacc) :: y class(psb_z_vect_oacc) :: y
integer(psb_ipk_) :: info, ni integer(psb_ipk_) :: info, ni, k
logical :: acc_done
acc_done = .false.
select type(ii => idx) select type(ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'z_oacc_sctb_x')
return
end select
if (y%is_host()) call y%sync() if (y%is_host()) call y%sync()
if (acc_is_present(x)) then
call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
acc_done = .true.
call y%set_dev()
end if
end select
if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (y%is_dev()) call y%sync()
do k = 1, n
y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
end do
call y%set_host()
end if
contains
subroutine inner_sctb(n,x,beta,y,idx)
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop !$acc parallel loop
do i = 1, n do k = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) y(idx(k)) = x(k) + beta *y(idx(k))
end do end do
!$acc end parallel loop
end subroutine inner_sctb
call y%set_dev()
end subroutine z_oacc_sctb_x end subroutine z_oacc_sctb_x
subroutine z_oacc_sctb(n, idx, x, beta, y) subroutine z_oacc_sctb(n, idx, x, beta, y)
@ -463,7 +510,6 @@ contains
if (n == 0) return if (n == 0) return
if (y%is_dev()) call y%sync() if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i) y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do end do
@ -477,28 +523,46 @@ contains
integer(psb_ipk_) :: i, n integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: idx
class(psb_z_vect_oacc) :: x class(psb_z_vect_oacc) :: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info,k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
if (.not.allocated(x%combuf)) then if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return return
end if end if
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default if (x%is_host()) call x%sync()
call psb_errpush(info, 'z_oacc_gthzbuf') call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
return acc_done = .true.
end select end select
if (x%is_host()) call x%sync() if (.not.acc_done) then
if (idx%is_dev()) call idx%sync()
if (x%is_dev()) call x%sync()
do k = 1, n
x%combuf(k+i-1) = x%v(idx%v(k+i-1))
end do
end if
!$acc parallel loop contains
do i = 1, n subroutine inner_gth(n,x,y,idx)
x%combuf(i) = x%v(idx%v(i)) integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine z_oacc_gthzbuf end subroutine z_oacc_gthzbuf
subroutine z_oacc_gthzv_x(i, n, idx, x, y) subroutine z_oacc_gthzv_x(i, n, idx, x, y)
@ -508,24 +572,41 @@ contains
class(psb_i_base_vect_type):: idx class(psb_i_base_vect_type):: idx
complex(psb_dpk_) :: y(:) complex(psb_dpk_) :: y(:)
class(psb_z_vect_oacc):: x class(psb_z_vect_oacc):: x
integer(psb_ipk_) :: info integer(psb_ipk_) :: info, k
logical :: acc_done
info = 0 info = 0
acc_done = .false.
select type(ii => idx) select type (ii => idx)
class is (psb_i_vect_oacc) class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync() if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'z_oacc_gthzv_x')
return
end select
if (x%is_host()) call x%sync() if (x%is_host()) call x%sync()
if (acc_is_present(y)) then
!$acc parallel loop call inner_gth(n,x%v,y(i:),ii%v(i:))
do i = 1, n acc_done=.true.
y(i) = x%v(idx%v(i)) end if
end select
if (.not.acc_done) then
if (x%is_dev()) call x%sync()
if (idx%is_dev()) call idx%sync()
do k = 1, n
y(k+i-1) = x%v(idx%v(k+i-1))
!write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
end do end do
end if
contains
subroutine inner_gth(n,x,y,idx)
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
!$acc parallel loop present(y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
!$acc update self(y)
end subroutine inner_gth
end subroutine z_oacc_gthzv_x end subroutine z_oacc_gthzv_x
subroutine z_oacc_ins_v(n, irl, val, dupl, x, info) subroutine z_oacc_ins_v(n, irl, val, dupl, x, info)
@ -718,7 +799,7 @@ contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
res = zzero res = zzero
!write(0,*) 'dot_v' !!$ write(0,*) 'oacc_dot_v'
select type(yy => y) select type(yy => y)
type is (psb_z_base_vect_type) type is (psb_z_base_vect_type)
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
@ -762,6 +843,17 @@ contains
end function z_oacc_dot_a end function z_oacc_dot_a
subroutine z_oacc_new_buffer(n,x,info)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
if (n /= psb_size(x%combuf)) then
call x%psb_z_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
end if
end subroutine z_oacc_new_buffer
subroutine z_oacc_sync_dev_space(x) subroutine z_oacc_sync_dev_space(x)
implicit none implicit none
class(psb_z_vect_oacc), intent(inout) :: x class(psb_z_vect_oacc), intent(inout) :: x
@ -860,12 +952,33 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
info = 0 info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v) if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info) if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
end if call x%psb_z_base_vect_type%free(info)
end subroutine z_oacc_vect_free end subroutine z_oacc_vect_free
subroutine z_oacc_vect_maybe_free_buffer(x,info)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (psb_oacc_get_maybe_free_buffer())&
& call x%free_buffer(info)
end subroutine z_oacc_vect_maybe_free_buffer
subroutine z_oacc_vect_free_buffer(x,info)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_z_base_vect_type%free_buffer(info)
end subroutine z_oacc_vect_free_buffer
function z_oacc_get_size(x) result(res) function z_oacc_get_size(x) result(res)
implicit none implicit none
class(psb_z_vect_oacc), intent(inout) :: x class(psb_z_vect_oacc), intent(inout) :: x

Loading…
Cancel
Save