Merge some changes from V4

oacc_loloum
sfilippone 6 months ago
parent 95c546aadd
commit 479135c62d

@ -10,11 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_c_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%set_dev()
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_csr_cp_from_fmt

@ -10,10 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_c_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_csr_mv_from_fmt

@ -16,8 +16,8 @@ contains
m = a%get_nrows()
n = a%get_ncols()
if ((n /= size(x%v)) .or. (n /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if

@ -8,65 +8,14 @@ contains
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
complex(psb_spk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 1
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(a%get_nrows(), nz_per_row))
allocate(a%ja(a%get_nrows(), nz_per_row))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
end if
a%val = czero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row, row_counts(row) + 1) = value
a%ja(row, row_counts(row) + 1) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(i, j) == i) then
a%idiag(i) = j
exit
end if
end do
end do
deallocate(row_counts)
call a%set_dev()
call a%free_space()
call a%psb_c_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_c_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_ell_cp_from_fmt

@ -9,11 +9,13 @@ contains
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_space()
call a%psb_c_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_ell_mv_from_fmt

@ -2,78 +2,20 @@ submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info)
implicit none
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
complex(psb_spk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 32 ! Assuming a default hack size of 32
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(nz_per_row * a%get_nrows()))
allocate(a%ja(nz_per_row * a%get_nrows()))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
end if
a%val = czero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(j + (i - 1) * nz_per_row) == i) then
a%idiag(i) = j
exit
end if
end do
end do
! Calculate hkoffs for HLL format
!$acc parallel loop present(a)
do i = 1, size(a%hkoffs)
a%hkoffs(i) = (i - 1) * hacksize
end do
deallocate(row_counts)
call a%set_dev()
call a%free_space()
call a%psb_c_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_hll_cp_from_fmt

@ -10,10 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_c_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,11 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_hll_mv_from_fmt
end submodule psb_c_oacc_hll_mv_from_fmt_impl

@ -40,28 +40,28 @@ contains
complex(psb_spk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, idx, k
integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
complex(psb_spk_) :: tmp
info = 0
!$acc parallel loop present(val, ja, hkoffs, x, y)
!$acc parallel loop private(nlc, isz,ir,nr)
do i = 1, nhacks
do k = 0, hksz - 1
idx = hkoffs(i) + k
if (idx <= hkoffs(i + 1) - 1) then
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
if (ja(j) > 0) then
tmp = tmp + val(j) * x(ja(j))
end if
end do
y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
end if
isz = hkoffs(i + 1) - hkoffs(i)
nlc = isz/hksz
ir = (i-1)*hksz
nr = min(hksz,m-ir)
!$acc loop independent private(tmp,ii,ipnt)
do ii = 1, nr
ipnt = hkoffs(i) + ii
tmp = czero
!$acc loop seq
do j = 1, nlc
tmp = tmp + val(ipnt) * x(ja(ipnt))
ipnt = ipnt + hksz
end do
y(ii+ir) = alpha * tmp + beta * y(ii+ir)
end do
end do
end subroutine inner_spmv
end subroutine psb_c_oacc_hll_vect_mv
end submodule psb_c_oacc_hll_vect_mv_impl

@ -10,11 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_d_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%set_dev()
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_csr_cp_from_fmt

@ -10,10 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_d_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_csr_mv_from_fmt

@ -16,8 +16,8 @@ contains
m = a%get_nrows()
n = a%get_ncols()
if ((n /= size(x%v)) .or. (n /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if

@ -8,65 +8,14 @@ contains
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
real(psb_dpk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 1
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(a%get_nrows(), nz_per_row))
allocate(a%ja(a%get_nrows(), nz_per_row))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
end if
a%val = dzero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row, row_counts(row) + 1) = value
a%ja(row, row_counts(row) + 1) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(i, j) == i) then
a%idiag(i) = j
exit
end if
end do
end do
deallocate(row_counts)
call a%set_dev()
call a%free_space()
call a%psb_d_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_ell_cp_from_fmt

@ -9,11 +9,13 @@ contains
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_space()
call a%psb_d_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_ell_mv_from_fmt

@ -2,78 +2,20 @@ submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_cp_from_coo(a, b, info)
implicit none
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
real(psb_dpk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 32 ! Assuming a default hack size of 32
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(nz_per_row * a%get_nrows()))
allocate(a%ja(nz_per_row * a%get_nrows()))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
end if
a%val = dzero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(j + (i - 1) * nz_per_row) == i) then
a%idiag(i) = j
exit
end if
end do
end do
! Calculate hkoffs for HLL format
!$acc parallel loop present(a)
do i = 1, size(a%hkoffs)
a%hkoffs(i) = (i - 1) * hacksize
end do
deallocate(row_counts)
call a%set_dev()
call a%free_space()
call a%psb_d_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_hll_cp_from_fmt

@ -10,10 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_d_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,11 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_hll_mv_from_fmt
end submodule psb_d_oacc_hll_mv_from_fmt_impl

@ -40,28 +40,28 @@ contains
real(psb_dpk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, idx, k
integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
real(psb_dpk_) :: tmp
info = 0
!$acc parallel loop present(val, ja, hkoffs, x, y)
!$acc parallel loop private(nlc, isz,ir,nr)
do i = 1, nhacks
do k = 0, hksz - 1
idx = hkoffs(i) + k
if (idx <= hkoffs(i + 1) - 1) then
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
if (ja(j) > 0) then
tmp = tmp + val(j) * x(ja(j))
end if
end do
y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
end if
isz = hkoffs(i + 1) - hkoffs(i)
nlc = isz/hksz
ir = (i-1)*hksz
nr = min(hksz,m-ir)
!$acc loop independent private(tmp,ii,ipnt)
do ii = 1, nr
ipnt = hkoffs(i) + ii
tmp = dzero
!$acc loop seq
do j = 1, nlc
tmp = tmp + val(ipnt) * x(ja(ipnt))
ipnt = ipnt + hksz
end do
y(ii+ir) = alpha * tmp + beta * y(ii+ir)
end do
end do
end subroutine inner_spmv
end subroutine psb_d_oacc_hll_vect_mv
end submodule psb_d_oacc_hll_vect_mv_impl

@ -10,11 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_s_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%set_dev()
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_s_oacc_csr_cp_from_fmt

@ -10,10 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_s_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_s_oacc_csr_mv_from_fmt

@ -16,8 +16,8 @@ contains
m = a%get_nrows()
n = a%get_ncols()
if ((n /= size(x%v)) .or. (n /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if

@ -8,65 +8,14 @@ contains
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
real(psb_spk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 1
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(a%get_nrows(), nz_per_row))
allocate(a%ja(a%get_nrows(), nz_per_row))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
end if
a%val = szero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row, row_counts(row) + 1) = value
a%ja(row, row_counts(row) + 1) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(i, j) == i) then
a%idiag(i) = j
exit
end if
end do
end do
deallocate(row_counts)
call a%set_dev()
call a%free_space()
call a%psb_s_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_s_oacc_ell_cp_from_fmt

@ -9,11 +9,13 @@ contains
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_space()
call a%psb_s_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_s_oacc_ell_mv_from_fmt

@ -2,78 +2,20 @@ submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_cp_from_coo(a, b, info)
implicit none
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
real(psb_spk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 32 ! Assuming a default hack size of 32
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(nz_per_row * a%get_nrows()))
allocate(a%ja(nz_per_row * a%get_nrows()))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
end if
a%val = szero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(j + (i - 1) * nz_per_row) == i) then
a%idiag(i) = j
exit
end if
end do
end do
! Calculate hkoffs for HLL format
!$acc parallel loop present(a)
do i = 1, size(a%hkoffs)
a%hkoffs(i) = (i - 1) * hacksize
end do
deallocate(row_counts)
call a%set_dev()
call a%free_space()
call a%psb_s_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_s_oacc_hll_cp_from_fmt

@ -10,10 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_s_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,11 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_s_oacc_hll_mv_from_fmt
end submodule psb_s_oacc_hll_mv_from_fmt_impl

@ -40,28 +40,28 @@ contains
real(psb_spk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, idx, k
integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
real(psb_spk_) :: tmp
info = 0
!$acc parallel loop present(val, ja, hkoffs, x, y)
!$acc parallel loop private(nlc, isz,ir,nr)
do i = 1, nhacks
do k = 0, hksz - 1
idx = hkoffs(i) + k
if (idx <= hkoffs(i + 1) - 1) then
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
if (ja(j) > 0) then
tmp = tmp + val(j) * x(ja(j))
end if
end do
y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
end if
isz = hkoffs(i + 1) - hkoffs(i)
nlc = isz/hksz
ir = (i-1)*hksz
nr = min(hksz,m-ir)
!$acc loop independent private(tmp,ii,ipnt)
do ii = 1, nr
ipnt = hkoffs(i) + ii
tmp = szero
!$acc loop seq
do j = 1, nlc
tmp = tmp + val(ipnt) * x(ja(ipnt))
ipnt = ipnt + hksz
end do
y(ii+ir) = alpha * tmp + beta * y(ii+ir)
end do
end do
end subroutine inner_spmv
end subroutine psb_s_oacc_hll_vect_mv
end submodule psb_s_oacc_hll_vect_mv_impl

@ -10,11 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_z_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%set_dev()
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_z_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_z_oacc_csr_cp_from_fmt

@ -10,10 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_z_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_z_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irp)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_z_oacc_csr_mv_from_fmt

@ -16,8 +16,8 @@ contains
m = a%get_nrows()
n = a%get_ncols()
if ((n /= size(x%v)) .or. (n /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if

@ -8,65 +8,14 @@ contains
class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
complex(psb_dpk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 1
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(a%get_nrows(), nz_per_row))
allocate(a%ja(a%get_nrows(), nz_per_row))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
end if
a%val = zzero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row, row_counts(row) + 1) = value
a%ja(row, row_counts(row) + 1) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(i, j) == i) then
a%idiag(i) = j
exit
end if
end do
end do
deallocate(row_counts)
call a%set_dev()
call a%free_space()
call a%psb_z_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_z_oacc_ell_cp_from_fmt

@ -9,11 +9,13 @@ contains
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_space()
call a%psb_z_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_z_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_z_oacc_ell_mv_from_fmt

@ -2,78 +2,20 @@ submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_z_oacc_hll_cp_from_coo(a, b, info)
implicit none
implicit none
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(in) :: b
class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
complex(psb_dpk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 32 ! Assuming a default hack size of 32
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(nz_per_row * a%get_nrows()))
allocate(a%ja(nz_per_row * a%get_nrows()))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
end if
a%val = zzero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(j + (i - 1) * nz_per_row) == i) then
a%idiag(i) = j
exit
end if
end do
end do
! Calculate hkoffs for HLL format
!$acc parallel loop present(a)
do i = 1, size(a%hkoffs)
a%hkoffs(i) = (i - 1) * hacksize
end do
deallocate(row_counts)
call a%set_dev()
call a%free_space()
call a%psb_z_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,10 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_space()
call a%psb_z_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_z_oacc_hll_cp_from_fmt

@ -10,10 +10,12 @@ contains
info = psb_success_
call a%free_space()
call a%psb_z_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
return

@ -14,11 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_space()
call a%psb_z_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
call a%sync_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_z_oacc_hll_mv_from_fmt
end submodule psb_z_oacc_hll_mv_from_fmt_impl

@ -40,28 +40,28 @@ contains
complex(psb_dpk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, idx, k
integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
complex(psb_dpk_) :: tmp
info = 0
!$acc parallel loop present(val, ja, hkoffs, x, y)
!$acc parallel loop private(nlc, isz,ir,nr)
do i = 1, nhacks
do k = 0, hksz - 1
idx = hkoffs(i) + k
if (idx <= hkoffs(i + 1) - 1) then
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
if (ja(j) > 0) then
tmp = tmp + val(j) * x(ja(j))
end if
end do
y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
end if
isz = hkoffs(i + 1) - hkoffs(i)
nlc = isz/hksz
ir = (i-1)*hksz
nr = min(hksz,m-ir)
!$acc loop independent private(tmp,ii,ipnt)
do ii = 1, nr
ipnt = hkoffs(i) + ii
tmp = zzero
!$acc loop seq
do j = 1, nlc
tmp = tmp + val(ipnt) * x(ja(ipnt))
ipnt = ipnt + hksz
end do
y(ii+ir) = alpha * tmp + beta * y(ii+ir)
end do
end do
end subroutine inner_spmv
end subroutine psb_z_oacc_hll_vect_mv
end submodule psb_z_oacc_hll_vect_mv_impl

@ -1,6 +1,7 @@
module psb_c_oacc_csr_mat_mod
use iso_c_binding
use openacc
use psb_c_mat_mod
use psb_c_oacc_vect_mod
!use oaccsparse_mod
@ -35,6 +36,7 @@ module psb_c_oacc_csr_mat_mod
procedure, pass(a) :: set_host => c_oacc_csr_set_host
procedure, pass(a) :: set_sync => c_oacc_csr_set_sync
procedure, pass(a) :: set_dev => c_oacc_csr_set_dev
procedure, pass(a) :: free_space => c_oacc_csr_free_space
procedure, pass(a) :: sync_space => c_oacc_csr_sync_space
procedure, pass(a) :: sync => c_oacc_csr_sync
end type psb_c_oacc_csr_sparse_mat
@ -154,22 +156,26 @@ module psb_c_oacc_csr_mat_mod
contains
subroutine c_oacc_csr_free(a)
subroutine c_oacc_csr_free_space(a)
use psb_base_mod
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irp)) then
!$acc exit data delete(a%irp)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irp)) call acc_delete_finalize(a%irp)
return
end subroutine c_oacc_csr_free_space
subroutine c_oacc_csr_free(a)
use psb_base_mod
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_c_csr_sparse_mat%free()
return
@ -193,7 +199,7 @@ contains
function c_oacc_csr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'CSR_oacc'
res = 'CSROA'
end function c_oacc_csr_get_fmt
subroutine c_oacc_csr_all(m, n, nz, a, info)
@ -202,19 +208,8 @@ contains
class(psb_c_oacc_csr_sparse_mat), intent(out) :: a
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(a%val)) then
!$acc exit data delete(a%val) finalize
deallocate(a%val, stat=info)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja) finalize
deallocate(a%ja, stat=info)
end if
if (allocated(a%irp)) then
!$acc exit data delete(a%irp) finalize
deallocate(a%irp, stat=info)
end if
info = 0
call a%free()
call a%set_nrows(m)
call a%set_ncols(n)
@ -274,26 +269,9 @@ contains
subroutine c_oacc_csr_sync_space(a)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call c_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irp)) then
call i_oacc_create_dev(a%irp)
end if
contains
subroutine c_oacc_create_dev(v)
implicit none
complex(psb_spk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine c_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irp)) call acc_create(a%irp)
end subroutine c_oacc_csr_sync_space
subroutine c_oacc_csr_sync(a)
@ -304,40 +282,16 @@ contains
tmpa => a
if (a%is_dev()) then
call c_oacc_csr_to_host(a%val)
call i_oacc_csr_to_host(a%ja)
call i_oacc_csr_to_host(a%irp)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irp)
else if (a%is_host()) then
call c_oacc_csr_to_dev(a%val)
call i_oacc_csr_to_dev(a%ja)
call i_oacc_csr_to_dev(a%irp)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine c_oacc_csr_sync
subroutine c_oacc_csr_to_dev(v)
implicit none
complex(psb_spk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine c_oacc_csr_to_dev
subroutine c_oacc_csr_to_host(v)
implicit none
complex(psb_spk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine c_oacc_csr_to_host
subroutine i_oacc_csr_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_csr_to_dev
subroutine i_oacc_csr_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_csr_to_host
end module psb_c_oacc_csr_mat_mod

@ -1,5 +1,6 @@
module psb_c_oacc_ell_mat_mod
use iso_c_binding
use openacc
use psb_c_mat_mod
use psb_c_ell_mat_mod
use psb_c_oacc_vect_mod
@ -21,6 +22,7 @@ module psb_c_oacc_ell_mat_mod
procedure, pass(a) :: set_dev => c_oacc_ell_set_dev
procedure, pass(a) :: sync_space => c_oacc_ell_sync_space
procedure, pass(a) :: sync => c_oacc_ell_sync
procedure, pass(a) :: free_space => c_oacc_ell_free_space
procedure, pass(a) :: free => c_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv
@ -152,31 +154,32 @@ module psb_c_oacc_ell_mat_mod
contains
subroutine c_oacc_ell_free(a)
subroutine c_oacc_ell_free_space(a)
use psb_base_mod
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irn)) then
!$acc exit data delete(a%irn)
end if
if (allocated(a%idiag)) then
!$acc exit data delete(a%idiag)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irn)) call acc_delete_finalize(a%irn)
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
return
end subroutine c_oacc_ell_free_space
subroutine c_oacc_ell_free(a)
use psb_base_mod
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_c_ell_sparse_mat%free()
return
end subroutine c_oacc_ell_free
function c_oacc_ell_sizeof(a) result(res)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
@ -196,41 +199,12 @@ contains
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call c_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irn)) then
call i_oacc_create_dev_scalar(a%irn)
end if
if (allocated(a%idiag)) then
call i_oacc_create_dev_scalar(a%idiag)
end if
contains
subroutine c_oacc_create_dev(v)
implicit none
complex(psb_spk_), intent(in) :: v(:,:)
!$acc enter data copyin(v)
end subroutine c_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
subroutine i_oacc_create_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev_scalar
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
end subroutine c_oacc_ell_sync_space
function c_oacc_ell_is_host(a) result(res)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
@ -279,7 +253,7 @@ contains
function c_oacc_ell_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ELL_oacc'
res = 'ELLOA'
end function c_oacc_ell_get_fmt
subroutine c_oacc_ell_sync(a)
@ -290,64 +264,17 @@ contains
tmpa => a
if (a%is_dev()) then
call c_oacc_ell_to_host(a%val)
call i_oacc_ell_to_host(a%ja)
call i_oacc_ell_to_host_scalar(a%irn)
call i_oacc_ell_to_host_scalar(a%idiag)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irn)
call acc_update_self(a%idiag)
else if (a%is_host()) then
call c_oacc_ell_to_dev(a%val)
call i_oacc_ell_to_dev(a%ja)
call i_oacc_ell_to_dev_scalar(a%irn)
call i_oacc_ell_to_dev_scalar(a%idiag)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irn)
call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine c_oacc_ell_sync
subroutine c_oacc_ell_to_dev_scalar(v)
implicit none
complex(psb_spk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine c_oacc_ell_to_dev_scalar
subroutine c_oacc_ell_to_dev(v)
implicit none
complex(psb_spk_), intent(in) :: v(:,:)
!$acc update device(v)
end subroutine c_oacc_ell_to_dev
subroutine c_oacc_ell_to_host(v)
implicit none
complex(psb_spk_), intent(in) :: v(:,:)
!$acc update self(v)
end subroutine c_oacc_ell_to_host
subroutine c_oacc_ell_to_host_scalar(v)
implicit none
complex(psb_spk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine c_oacc_ell_to_host_scalar
subroutine i_oacc_ell_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc update device(v)
end subroutine i_oacc_ell_to_dev
subroutine i_oacc_ell_to_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_ell_to_dev_scalar
subroutine i_oacc_ell_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc update self(v)
end subroutine i_oacc_ell_to_host
subroutine i_oacc_ell_to_host_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_ell_to_host_scalar
end module psb_c_oacc_ell_mat_mod

@ -1,5 +1,6 @@
module psb_c_oacc_hll_mat_mod
use iso_c_binding
use openacc
use psb_c_mat_mod
use psb_c_hll_mat_mod
use psb_c_oacc_vect_mod
@ -21,6 +22,7 @@ module psb_c_oacc_hll_mat_mod
procedure, pass(a) :: set_dev => c_oacc_hll_set_dev
procedure, pass(a) :: sync_space => c_oacc_hll_sync_space
procedure, pass(a) :: sync => c_oacc_hll_sync
procedure, pass(a) :: free_space => c_oacc_hll_free_space
procedure, pass(a) :: free => c_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv
@ -152,28 +154,28 @@ module psb_c_oacc_hll_mat_mod
contains
subroutine c_oacc_hll_free(a)
subroutine c_oacc_hll_free_space(a)
use psb_base_mod
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irn)) then
!$acc exit data delete(a%irn)
end if
if (allocated(a%idiag)) then
!$acc exit data delete(a%idiag)
end if
if (allocated(a%hkoffs)) then
!$acc exit data delete(a%hkoffs)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irn)) call acc_delete_finalize(a%irn)
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
return
end subroutine c_oacc_hll_free_space
subroutine c_oacc_hll_free(a)
use psb_base_mod
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_c_hll_sparse_mat%free()
return
@ -244,48 +246,18 @@ contains
function c_oacc_hll_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'HLL_oacc'
res = 'HLLOA'
end function c_oacc_hll_get_fmt
subroutine c_oacc_hll_sync_space(a)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call c_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irn)) then
call i_oacc_create_dev_scalar(a%irn)
end if
if (allocated(a%idiag)) then
call i_oacc_create_dev_scalar(a%idiag)
end if
if (allocated(a%hkoffs)) then
call i_oacc_create_dev_scalar(a%hkoffs)
end if
contains
subroutine c_oacc_create_dev(v)
implicit none
complex(psb_spk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine c_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
subroutine i_oacc_create_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev_scalar
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
end subroutine c_oacc_hll_sync_space
@ -297,56 +269,19 @@ contains
tmpa => a
if (a%is_dev()) then
call c_oacc_hll_to_host(a%val)
call i_oacc_hll_to_host(a%ja)
call i_oacc_hll_to_host_scalar(a%irn)
call i_oacc_hll_to_host_scalar(a%idiag)
call i_oacc_hll_to_host_scalar(a%hkoffs)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irn)
call acc_update_self(a%idiag)
call acc_update_self(a%hkoffs)
else if (a%is_host()) then
call c_oacc_hll_to_dev(a%val)
call i_oacc_hll_to_dev(a%ja)
call i_oacc_hll_to_dev_scalar(a%irn)
call i_oacc_hll_to_dev_scalar(a%idiag)
call i_oacc_hll_to_dev_scalar(a%hkoffs)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irn)
call acc_update_device(a%idiag)
call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine c_oacc_hll_sync
subroutine c_oacc_hll_to_host(v)
implicit none
complex(psb_spk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine c_oacc_hll_to_host
subroutine c_oacc_hll_to_dev(v)
implicit none
complex(psb_spk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine c_oacc_hll_to_dev
subroutine i_oacc_hll_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_hll_to_host
subroutine i_oacc_hll_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_hll_to_dev
subroutine i_oacc_hll_to_host_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_hll_to_host_scalar
subroutine i_oacc_hll_to_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_hll_to_dev_scalar
end module psb_c_oacc_hll_mat_mod

@ -59,7 +59,7 @@ module psb_c_oacc_vect_mod
procedure, pass(x) :: asum => c_oacc_asum
procedure, pass(x) :: absval1 => c_oacc_absval1
procedure, pass(x) :: absval2 => c_oacc_absval2
final :: c_oacc_final_vect_free
end type psb_c_vect_oacc
interface
@ -164,21 +164,25 @@ contains
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
real(psb_spk_) :: mx
real(psb_spk_) :: res
integer(psb_ipk_) :: info
if (x%is_host()) call x%sync()
mx = c_oacc_amax(n,x)
res = c_inner_oacc_nrm2(n, mx, x%v)
!!$ write(0,*)'oacc_nrm2'
res = c_inner_oacc_nrm2(n, x%v)
contains
function c_inner_oacc_nrm2(n, mx,x) result(res)
function c_inner_oacc_nrm2(n, x) result(res)
integer(psb_ipk_) :: n
complex(psb_spk_) :: x(:)
real(psb_spk_) :: mx, res
real(psb_spk_) :: sum
real(psb_spk_) :: res
real(psb_spk_) :: sum, mx
integer(psb_ipk_) :: i
sum = 0.0
mx = szero
!$acc parallel loop reduction(max:mx)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
sum = szero
!$acc parallel loop reduction(+:sum)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
@ -203,7 +207,7 @@ contains
real(psb_spk_) :: res
real(psb_spk_) :: max_val
integer(psb_ipk_) :: i
max_val = -huge(0.0)
max_val = szero
!$acc parallel loop reduction(max:max_val)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
@ -228,7 +232,7 @@ contains
complex(psb_spk_) :: x(:)
real(psb_spk_) :: res
integer(psb_ipk_) :: i
res = 0.0
res = szero
!$acc parallel loop reduction(+:res)
do i = 1, n
res = res + abs(x(i))
@ -271,92 +275,6 @@ contains
call z%set_host()
end subroutine c_oacc_mlt_a_2
!!$ subroutine c_oacc_mlt_v(x, y, info)
!!$ implicit none
!!$ class(psb_c_base_vect_type), intent(inout) :: x
!!$ class(psb_c_vect_oacc), intent(inout) :: y
!!$ integer(psb_ipk_), intent(out) :: info
!!$
!!$ integer(psb_ipk_) :: i, n
!!$
!!$ info = 0
!!$ n = min(x%get_nrows(), y%get_nrows())
!!$ select type(xx => x)
!!$ type is (psb_c_base_vect_type)
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ end select
!!$ end subroutine c_oacc_mlt_v
!!$
!!$ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
!!$ use psi_serial_mod
!!$ use psb_string_mod
!!$ implicit none
!!$ complex(psb_spk_), intent(in) :: alpha, beta
!!$ class(psb_c_base_vect_type), intent(inout) :: x
!!$ class(psb_c_base_vect_type), intent(inout) :: y
!!$ class(psb_c_vect_oacc), intent(inout) :: z
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character(len=1), intent(in), optional :: conjgx, conjgy
!!$ integer(psb_ipk_) :: i, n
!!$ logical :: conjgx_, conjgy_
!!$
!!$ conjgx_ = .false.
!!$ conjgy_ = .false.
!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
!!$
!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
!!$
!!$ info = 0
!!$ select type(xx => x)
!!$ class is (psb_c_vect_oacc)
!!$ select type (yy => y)
!!$ class is (psb_c_vect_oacc)
!!$ if (xx%is_host()) call xx%sync()
!!$ if (yy%is_host()) call yy%sync()
!!$ if ((beta /= czero) .and. (z%is_host())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_dev()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync()
!!$ if (yy%is_dev()) call yy%sync()
!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ end subroutine c_oacc_mlt_v_2
subroutine c_oacc_axpby_v(m, alpha, x, beta, y, info)
!use psi_serial_mod
implicit none
@ -414,7 +332,7 @@ contains
integer(psb_ipk_) :: i
if ((beta /= czero) .and. (y%is_dev())) call y%sync()
!$acc parallel loop
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
end do
@ -432,44 +350,44 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
write(0,*)'upd_xyz'
info = psb_success_
gpu_done = .false.
select type(xx => x)
class is (psb_c_vect_oacc)
select type(yy => y)
select type(yy => y)
class is (psb_c_vect_oacc)
select type(zz => z)
class is (psb_c_vect_oacc)
select type(zz => z)
class is (psb_c_vect_oacc)
if ((beta /= czero) .and. yy%is_host()) call yy%sync()
if ((delta /= czero) .and. zz%is_host()) call zz%sync()
if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
end do
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
if ((beta /= czero) .and. yy%is_host()) call yy%sync()
if ((delta /= czero) .and. zz%is_host()) call zz%sync()
if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
end do
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not. gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
end if
end subroutine c_oacc_upd_xyz
@ -676,7 +594,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_c_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
end subroutine c_oacc_ins_a
@ -687,16 +605,14 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'c_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine c_oacc_bld_mn
@ -707,6 +623,7 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@ -714,13 +631,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
x%v(:) = this(:)
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine c_oacc_bld_x
@ -848,54 +761,21 @@ contains
end function c_oacc_dot_a
! subroutine c_oacc_set_vect(x,y)
! implicit none
! class(psb_c_vect_oacc), intent(inout) :: x
! complex(psb_spk_), intent(in) :: y(:)
! integer(psb_ipk_) :: info
! if (size(x%v) /= size(y)) then
! call x%free(info)
! call x%all(size(y),info)
! end if
! x%v(:) = y(:)
! call x%set_host()
! end subroutine c_oacc_set_vect
subroutine c_oacc_to_dev(v)
implicit none
complex(psb_spk_) :: v(:)
!$acc update device(v)
end subroutine c_oacc_to_dev
subroutine c_oacc_to_host(v)
implicit none
complex(psb_spk_) :: v(:)
!$acc update self(v)
end subroutine c_oacc_to_host
subroutine c_oacc_sync_space(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
if (.not.acc_is_present(x%v)) call c_oacc_create_dev(x%v)
end if
contains
subroutine c_oacc_create_dev(v)
implicit none
complex(psb_spk_) :: v(:)
!$acc enter data copyin(v)
end subroutine c_oacc_create_dev
if (allocated(x%v)) call acc_create(x%v)
end subroutine c_oacc_sync_space
subroutine c_oacc_sync(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call c_oacc_to_host(x%v)
call acc_update_self(x%v)
end if
if (x%is_host()) then
call c_oacc_to_dev(x%v)
call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine c_oacc_sync
@ -954,33 +834,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data create(x%v)
call x%sync_space()
end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'c_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
call x%sync_space()
end subroutine c_oacc_vect_all
subroutine c_oacc_final_vect_free(x)
implicit none
type(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine c_oacc_final_vect_free
subroutine c_oacc_vect_free(x, info)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine c_oacc_vect_free
function c_oacc_get_size(x) result(res)

@ -1,6 +1,7 @@
module psb_d_oacc_csr_mat_mod
use iso_c_binding
use openacc
use psb_d_mat_mod
use psb_d_oacc_vect_mod
!use oaccsparse_mod
@ -35,6 +36,7 @@ module psb_d_oacc_csr_mat_mod
procedure, pass(a) :: set_host => d_oacc_csr_set_host
procedure, pass(a) :: set_sync => d_oacc_csr_set_sync
procedure, pass(a) :: set_dev => d_oacc_csr_set_dev
procedure, pass(a) :: free_space => d_oacc_csr_free_space
procedure, pass(a) :: sync_space => d_oacc_csr_sync_space
procedure, pass(a) :: sync => d_oacc_csr_sync
end type psb_d_oacc_csr_sparse_mat
@ -154,22 +156,26 @@ module psb_d_oacc_csr_mat_mod
contains
subroutine d_oacc_csr_free(a)
subroutine d_oacc_csr_free_space(a)
use psb_base_mod
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irp)) then
!$acc exit data delete(a%irp)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irp)) call acc_delete_finalize(a%irp)
return
end subroutine d_oacc_csr_free_space
subroutine d_oacc_csr_free(a)
use psb_base_mod
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_d_csr_sparse_mat%free()
return
@ -193,7 +199,7 @@ contains
function d_oacc_csr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'CSR_oacc'
res = 'CSROA'
end function d_oacc_csr_get_fmt
subroutine d_oacc_csr_all(m, n, nz, a, info)
@ -202,19 +208,8 @@ contains
class(psb_d_oacc_csr_sparse_mat), intent(out) :: a
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(a%val)) then
!$acc exit data delete(a%val) finalize
deallocate(a%val, stat=info)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja) finalize
deallocate(a%ja, stat=info)
end if
if (allocated(a%irp)) then
!$acc exit data delete(a%irp) finalize
deallocate(a%irp, stat=info)
end if
info = 0
call a%free()
call a%set_nrows(m)
call a%set_ncols(n)
@ -274,26 +269,9 @@ contains
subroutine d_oacc_csr_sync_space(a)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call d_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irp)) then
call i_oacc_create_dev(a%irp)
end if
contains
subroutine d_oacc_create_dev(v)
implicit none
real(psb_dpk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine d_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irp)) call acc_create(a%irp)
end subroutine d_oacc_csr_sync_space
subroutine d_oacc_csr_sync(a)
@ -304,40 +282,16 @@ contains
tmpa => a
if (a%is_dev()) then
call d_oacc_csr_to_host(a%val)
call i_oacc_csr_to_host(a%ja)
call i_oacc_csr_to_host(a%irp)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irp)
else if (a%is_host()) then
call d_oacc_csr_to_dev(a%val)
call i_oacc_csr_to_dev(a%ja)
call i_oacc_csr_to_dev(a%irp)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine d_oacc_csr_sync
subroutine d_oacc_csr_to_dev(v)
implicit none
real(psb_dpk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine d_oacc_csr_to_dev
subroutine d_oacc_csr_to_host(v)
implicit none
real(psb_dpk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine d_oacc_csr_to_host
subroutine i_oacc_csr_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_csr_to_dev
subroutine i_oacc_csr_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_csr_to_host
end module psb_d_oacc_csr_mat_mod

@ -1,5 +1,6 @@
module psb_d_oacc_ell_mat_mod
use iso_c_binding
use openacc
use psb_d_mat_mod
use psb_d_ell_mat_mod
use psb_d_oacc_vect_mod
@ -21,6 +22,7 @@ module psb_d_oacc_ell_mat_mod
procedure, pass(a) :: set_dev => d_oacc_ell_set_dev
procedure, pass(a) :: sync_space => d_oacc_ell_sync_space
procedure, pass(a) :: sync => d_oacc_ell_sync
procedure, pass(a) :: free_space => d_oacc_ell_free_space
procedure, pass(a) :: free => d_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv
@ -152,31 +154,32 @@ module psb_d_oacc_ell_mat_mod
contains
subroutine d_oacc_ell_free(a)
subroutine d_oacc_ell_free_space(a)
use psb_base_mod
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irn)) then
!$acc exit data delete(a%irn)
end if
if (allocated(a%idiag)) then
!$acc exit data delete(a%idiag)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irn)) call acc_delete_finalize(a%irn)
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
return
end subroutine d_oacc_ell_free_space
subroutine d_oacc_ell_free(a)
use psb_base_mod
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_d_ell_sparse_mat%free()
return
end subroutine d_oacc_ell_free
function d_oacc_ell_sizeof(a) result(res)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
@ -196,41 +199,12 @@ contains
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call d_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irn)) then
call i_oacc_create_dev_scalar(a%irn)
end if
if (allocated(a%idiag)) then
call i_oacc_create_dev_scalar(a%idiag)
end if
contains
subroutine d_oacc_create_dev(v)
implicit none
real(psb_dpk_), intent(in) :: v(:,:)
!$acc enter data copyin(v)
end subroutine d_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
subroutine i_oacc_create_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev_scalar
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
end subroutine d_oacc_ell_sync_space
function d_oacc_ell_is_host(a) result(res)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
@ -279,7 +253,7 @@ contains
function d_oacc_ell_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ELL_oacc'
res = 'ELLOA'
end function d_oacc_ell_get_fmt
subroutine d_oacc_ell_sync(a)
@ -290,64 +264,17 @@ contains
tmpa => a
if (a%is_dev()) then
call d_oacc_ell_to_host(a%val)
call i_oacc_ell_to_host(a%ja)
call i_oacc_ell_to_host_scalar(a%irn)
call i_oacc_ell_to_host_scalar(a%idiag)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irn)
call acc_update_self(a%idiag)
else if (a%is_host()) then
call d_oacc_ell_to_dev(a%val)
call i_oacc_ell_to_dev(a%ja)
call i_oacc_ell_to_dev_scalar(a%irn)
call i_oacc_ell_to_dev_scalar(a%idiag)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irn)
call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine d_oacc_ell_sync
subroutine d_oacc_ell_to_dev_scalar(v)
implicit none
real(psb_dpk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine d_oacc_ell_to_dev_scalar
subroutine d_oacc_ell_to_dev(v)
implicit none
real(psb_dpk_), intent(in) :: v(:,:)
!$acc update device(v)
end subroutine d_oacc_ell_to_dev
subroutine d_oacc_ell_to_host(v)
implicit none
real(psb_dpk_), intent(in) :: v(:,:)
!$acc update self(v)
end subroutine d_oacc_ell_to_host
subroutine d_oacc_ell_to_host_scalar(v)
implicit none
real(psb_dpk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine d_oacc_ell_to_host_scalar
subroutine i_oacc_ell_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc update device(v)
end subroutine i_oacc_ell_to_dev
subroutine i_oacc_ell_to_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_ell_to_dev_scalar
subroutine i_oacc_ell_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc update self(v)
end subroutine i_oacc_ell_to_host
subroutine i_oacc_ell_to_host_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_ell_to_host_scalar
end module psb_d_oacc_ell_mat_mod

@ -1,5 +1,6 @@
module psb_d_oacc_hll_mat_mod
use iso_c_binding
use openacc
use psb_d_mat_mod
use psb_d_hll_mat_mod
use psb_d_oacc_vect_mod
@ -21,6 +22,7 @@ module psb_d_oacc_hll_mat_mod
procedure, pass(a) :: set_dev => d_oacc_hll_set_dev
procedure, pass(a) :: sync_space => d_oacc_hll_sync_space
procedure, pass(a) :: sync => d_oacc_hll_sync
procedure, pass(a) :: free_space => d_oacc_hll_free_space
procedure, pass(a) :: free => d_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv
@ -152,28 +154,28 @@ module psb_d_oacc_hll_mat_mod
contains
subroutine d_oacc_hll_free(a)
subroutine d_oacc_hll_free_space(a)
use psb_base_mod
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irn)) then
!$acc exit data delete(a%irn)
end if
if (allocated(a%idiag)) then
!$acc exit data delete(a%idiag)
end if
if (allocated(a%hkoffs)) then
!$acc exit data delete(a%hkoffs)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irn)) call acc_delete_finalize(a%irn)
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
return
end subroutine d_oacc_hll_free_space
subroutine d_oacc_hll_free(a)
use psb_base_mod
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_d_hll_sparse_mat%free()
return
@ -244,48 +246,18 @@ contains
function d_oacc_hll_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'HLL_oacc'
res = 'HLLOA'
end function d_oacc_hll_get_fmt
subroutine d_oacc_hll_sync_space(a)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call d_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irn)) then
call i_oacc_create_dev_scalar(a%irn)
end if
if (allocated(a%idiag)) then
call i_oacc_create_dev_scalar(a%idiag)
end if
if (allocated(a%hkoffs)) then
call i_oacc_create_dev_scalar(a%hkoffs)
end if
contains
subroutine d_oacc_create_dev(v)
implicit none
real(psb_dpk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine d_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
subroutine i_oacc_create_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev_scalar
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
end subroutine d_oacc_hll_sync_space
@ -297,56 +269,19 @@ contains
tmpa => a
if (a%is_dev()) then
call d_oacc_hll_to_host(a%val)
call i_oacc_hll_to_host(a%ja)
call i_oacc_hll_to_host_scalar(a%irn)
call i_oacc_hll_to_host_scalar(a%idiag)
call i_oacc_hll_to_host_scalar(a%hkoffs)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irn)
call acc_update_self(a%idiag)
call acc_update_self(a%hkoffs)
else if (a%is_host()) then
call d_oacc_hll_to_dev(a%val)
call i_oacc_hll_to_dev(a%ja)
call i_oacc_hll_to_dev_scalar(a%irn)
call i_oacc_hll_to_dev_scalar(a%idiag)
call i_oacc_hll_to_dev_scalar(a%hkoffs)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irn)
call acc_update_device(a%idiag)
call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine d_oacc_hll_sync
subroutine d_oacc_hll_to_host(v)
implicit none
real(psb_dpk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine d_oacc_hll_to_host
subroutine d_oacc_hll_to_dev(v)
implicit none
real(psb_dpk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine d_oacc_hll_to_dev
subroutine i_oacc_hll_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_hll_to_host
subroutine i_oacc_hll_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_hll_to_dev
subroutine i_oacc_hll_to_host_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_hll_to_host_scalar
subroutine i_oacc_hll_to_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_hll_to_dev_scalar
end module psb_d_oacc_hll_mat_mod

@ -59,7 +59,7 @@ module psb_d_oacc_vect_mod
procedure, pass(x) :: asum => d_oacc_asum
procedure, pass(x) :: absval1 => d_oacc_absval1
procedure, pass(x) :: absval2 => d_oacc_absval2
final :: d_oacc_final_vect_free
end type psb_d_vect_oacc
interface
@ -164,21 +164,25 @@ contains
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res
real(psb_dpk_) :: mx
real(psb_dpk_) :: res
integer(psb_ipk_) :: info
if (x%is_host()) call x%sync()
mx = d_oacc_amax(n,x)
res = d_inner_oacc_nrm2(n, mx, x%v)
!!$ write(0,*)'oacc_nrm2'
res = d_inner_oacc_nrm2(n, x%v)
contains
function d_inner_oacc_nrm2(n, mx,x) result(res)
function d_inner_oacc_nrm2(n, x) result(res)
integer(psb_ipk_) :: n
real(psb_dpk_) :: x(:)
real(psb_dpk_) :: mx, res
real(psb_dpk_) :: sum
real(psb_dpk_) :: res
real(psb_dpk_) :: sum, mx
integer(psb_ipk_) :: i
sum = 0.0
mx = dzero
!$acc parallel loop reduction(max:mx)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
sum = dzero
!$acc parallel loop reduction(+:sum)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
@ -203,7 +207,7 @@ contains
real(psb_dpk_) :: res
real(psb_dpk_) :: max_val
integer(psb_ipk_) :: i
max_val = -huge(0.0)
max_val = dzero
!$acc parallel loop reduction(max:max_val)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
@ -228,7 +232,7 @@ contains
real(psb_dpk_) :: x(:)
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
res = 0.0
res = dzero
!$acc parallel loop reduction(+:res)
do i = 1, n
res = res + abs(x(i))
@ -271,92 +275,6 @@ contains
call z%set_host()
end subroutine d_oacc_mlt_a_2
!!$ subroutine d_oacc_mlt_v(x, y, info)
!!$ implicit none
!!$ class(psb_d_base_vect_type), intent(inout) :: x
!!$ class(psb_d_vect_oacc), intent(inout) :: y
!!$ integer(psb_ipk_), intent(out) :: info
!!$
!!$ integer(psb_ipk_) :: i, n
!!$
!!$ info = 0
!!$ n = min(x%get_nrows(), y%get_nrows())
!!$ select type(xx => x)
!!$ type is (psb_d_base_vect_type)
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ end select
!!$ end subroutine d_oacc_mlt_v
!!$
!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
!!$ use psi_serial_mod
!!$ use psb_string_mod
!!$ implicit none
!!$ real(psb_dpk_), intent(in) :: alpha, beta
!!$ class(psb_d_base_vect_type), intent(inout) :: x
!!$ class(psb_d_base_vect_type), intent(inout) :: y
!!$ class(psb_d_vect_oacc), intent(inout) :: z
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character(len=1), intent(in), optional :: conjgx, conjgy
!!$ integer(psb_ipk_) :: i, n
!!$ logical :: conjgx_, conjgy_
!!$
!!$ conjgx_ = .false.
!!$ conjgy_ = .false.
!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
!!$
!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
!!$
!!$ info = 0
!!$ select type(xx => x)
!!$ class is (psb_d_vect_oacc)
!!$ select type (yy => y)
!!$ class is (psb_d_vect_oacc)
!!$ if (xx%is_host()) call xx%sync()
!!$ if (yy%is_host()) call yy%sync()
!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_dev()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync()
!!$ if (yy%is_dev()) call yy%sync()
!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ end subroutine d_oacc_mlt_v_2
subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
!use psi_serial_mod
implicit none
@ -414,7 +332,7 @@ contains
integer(psb_ipk_) :: i
if ((beta /= dzero) .and. (y%is_dev())) call y%sync()
!$acc parallel loop
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
end do
@ -432,44 +350,44 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
write(0,*)'upd_xyz'
info = psb_success_
gpu_done = .false.
select type(xx => x)
class is (psb_d_vect_oacc)
select type(yy => y)
select type(yy => y)
class is (psb_d_vect_oacc)
select type(zz => z)
class is (psb_d_vect_oacc)
select type(zz => z)
class is (psb_d_vect_oacc)
if ((beta /= dzero) .and. yy%is_host()) call yy%sync()
if ((delta /= dzero) .and. zz%is_host()) call zz%sync()
if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
end do
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
if ((beta /= dzero) .and. yy%is_host()) call yy%sync()
if ((delta /= dzero) .and. zz%is_host()) call zz%sync()
if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
end do
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not. gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
end if
end subroutine d_oacc_upd_xyz
@ -676,7 +594,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
end subroutine d_oacc_ins_a
@ -687,16 +605,14 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine d_oacc_bld_mn
@ -707,6 +623,7 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@ -714,13 +631,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
x%v(:) = this(:)
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine d_oacc_bld_x
@ -848,54 +761,21 @@ contains
end function d_oacc_dot_a
! subroutine d_oacc_set_vect(x,y)
! implicit none
! class(psb_d_vect_oacc), intent(inout) :: x
! real(psb_dpk_), intent(in) :: y(:)
! integer(psb_ipk_) :: info
! if (size(x%v) /= size(y)) then
! call x%free(info)
! call x%all(size(y),info)
! end if
! x%v(:) = y(:)
! call x%set_host()
! end subroutine d_oacc_set_vect
subroutine d_oacc_to_dev(v)
implicit none
real(psb_dpk_) :: v(:)
!$acc update device(v)
end subroutine d_oacc_to_dev
subroutine d_oacc_to_host(v)
implicit none
real(psb_dpk_) :: v(:)
!$acc update self(v)
end subroutine d_oacc_to_host
subroutine d_oacc_sync_space(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
if (.not.acc_is_present(x%v)) call d_oacc_create_dev(x%v)
end if
contains
subroutine d_oacc_create_dev(v)
implicit none
real(psb_dpk_) :: v(:)
!$acc enter data copyin(v)
end subroutine d_oacc_create_dev
if (allocated(x%v)) call acc_create(x%v)
end subroutine d_oacc_sync_space
subroutine d_oacc_sync(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call d_oacc_to_host(x%v)
call acc_update_self(x%v)
end if
if (x%is_host()) then
call d_oacc_to_dev(x%v)
call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine d_oacc_sync
@ -954,33 +834,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data create(x%v)
call x%sync_space()
end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'd_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
call x%sync_space()
end subroutine d_oacc_vect_all
subroutine d_oacc_final_vect_free(x)
implicit none
type(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine d_oacc_final_vect_free
subroutine d_oacc_vect_free(x, info)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine d_oacc_vect_free
function d_oacc_get_size(x) result(res)

@ -42,7 +42,7 @@ module psb_i_oacc_vect_mod
procedure, pass(x) :: get_size => i_oacc_get_size
final :: i_oacc_final_vect_free
end type psb_i_vect_oacc
@ -252,7 +252,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
end subroutine i_oacc_ins_a
@ -263,16 +263,14 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine i_oacc_bld_mn
@ -283,6 +281,7 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@ -290,13 +289,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
x%v(:) = this(:)
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine i_oacc_bld_x
@ -370,54 +365,21 @@ contains
end function i_oacc_get_fmt
! subroutine i_oacc_set_vect(x,y)
! implicit none
! class(psb_i_vect_oacc), intent(inout) :: x
! integer(psb_ipk_), intent(in) :: y(:)
! integer(psb_ipk_) :: info
! if (size(x%v) /= size(y)) then
! call x%free(info)
! call x%all(size(y),info)
! end if
! x%v(:) = y(:)
! call x%set_host()
! end subroutine i_oacc_set_vect
subroutine i_oacc_to_dev(v)
implicit none
integer(psb_ipk_) :: v(:)
!$acc update device(v)
end subroutine i_oacc_to_dev
subroutine i_oacc_to_host(v)
implicit none
integer(psb_ipk_) :: v(:)
!$acc update self(v)
end subroutine i_oacc_to_host
subroutine i_oacc_sync_space(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
if (.not.acc_is_present(x%v)) call i_oacc_create_dev(x%v)
end if
contains
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
if (allocated(x%v)) call acc_create(x%v)
end subroutine i_oacc_sync_space
subroutine i_oacc_sync(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call i_oacc_to_host(x%v)
call acc_update_self(x%v)
end if
if (x%is_host()) then
call i_oacc_to_dev(x%v)
call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine i_oacc_sync
@ -476,33 +438,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data create(x%v)
call x%sync_space()
end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'i_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
call x%sync_space()
end subroutine i_oacc_vect_all
subroutine i_oacc_final_vect_free(x)
implicit none
type(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine i_oacc_final_vect_free
subroutine i_oacc_vect_free(x, info)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine i_oacc_vect_free
function i_oacc_get_size(x) result(res)

@ -44,7 +44,7 @@ module psb_l_oacc_vect_mod
procedure, pass(x) :: get_size => l_oacc_get_size
final :: l_oacc_final_vect_free
end type psb_l_vect_oacc
@ -254,7 +254,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_l_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
end subroutine l_oacc_ins_a
@ -265,16 +265,14 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine l_oacc_bld_mn
@ -285,6 +283,7 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@ -292,13 +291,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
x%v(:) = this(:)
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine l_oacc_bld_x
@ -372,54 +367,21 @@ contains
end function l_oacc_get_fmt
! subroutine l_oacc_set_vect(x,y)
! implicit none
! class(psb_l_vect_oacc), intent(inout) :: x
! integer(psb_lpk_), intent(in) :: y(:)
! integer(psb_ipk_) :: info
! if (size(x%v) /= size(y)) then
! call x%free(info)
! call x%all(size(y),info)
! end if
! x%v(:) = y(:)
! call x%set_host()
! end subroutine l_oacc_set_vect
subroutine l_oacc_to_dev(v)
implicit none
integer(psb_lpk_) :: v(:)
!$acc update device(v)
end subroutine l_oacc_to_dev
subroutine l_oacc_to_host(v)
implicit none
integer(psb_lpk_) :: v(:)
!$acc update self(v)
end subroutine l_oacc_to_host
subroutine l_oacc_sync_space(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
if (.not.acc_is_present(x%v)) call l_oacc_create_dev(x%v)
end if
contains
subroutine l_oacc_create_dev(v)
implicit none
integer(psb_lpk_) :: v(:)
!$acc enter data copyin(v)
end subroutine l_oacc_create_dev
if (allocated(x%v)) call acc_create(x%v)
end subroutine l_oacc_sync_space
subroutine l_oacc_sync(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call l_oacc_to_host(x%v)
call acc_update_self(x%v)
end if
if (x%is_host()) then
call l_oacc_to_dev(x%v)
call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine l_oacc_sync
@ -478,33 +440,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data create(x%v)
call x%sync_space()
end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'l_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
call x%sync_space()
end subroutine l_oacc_vect_all
subroutine l_oacc_final_vect_free(x)
implicit none
type(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine l_oacc_final_vect_free
subroutine l_oacc_vect_free(x, info)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine l_oacc_vect_free
function l_oacc_get_size(x) result(res)

@ -1,6 +1,7 @@
module psb_s_oacc_csr_mat_mod
use iso_c_binding
use openacc
use psb_s_mat_mod
use psb_s_oacc_vect_mod
!use oaccsparse_mod
@ -35,6 +36,7 @@ module psb_s_oacc_csr_mat_mod
procedure, pass(a) :: set_host => s_oacc_csr_set_host
procedure, pass(a) :: set_sync => s_oacc_csr_set_sync
procedure, pass(a) :: set_dev => s_oacc_csr_set_dev
procedure, pass(a) :: free_space => s_oacc_csr_free_space
procedure, pass(a) :: sync_space => s_oacc_csr_sync_space
procedure, pass(a) :: sync => s_oacc_csr_sync
end type psb_s_oacc_csr_sparse_mat
@ -154,22 +156,26 @@ module psb_s_oacc_csr_mat_mod
contains
subroutine s_oacc_csr_free(a)
subroutine s_oacc_csr_free_space(a)
use psb_base_mod
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irp)) then
!$acc exit data delete(a%irp)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irp)) call acc_delete_finalize(a%irp)
return
end subroutine s_oacc_csr_free_space
subroutine s_oacc_csr_free(a)
use psb_base_mod
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_s_csr_sparse_mat%free()
return
@ -193,7 +199,7 @@ contains
function s_oacc_csr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'CSR_oacc'
res = 'CSROA'
end function s_oacc_csr_get_fmt
subroutine s_oacc_csr_all(m, n, nz, a, info)
@ -202,19 +208,8 @@ contains
class(psb_s_oacc_csr_sparse_mat), intent(out) :: a
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(a%val)) then
!$acc exit data delete(a%val) finalize
deallocate(a%val, stat=info)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja) finalize
deallocate(a%ja, stat=info)
end if
if (allocated(a%irp)) then
!$acc exit data delete(a%irp) finalize
deallocate(a%irp, stat=info)
end if
info = 0
call a%free()
call a%set_nrows(m)
call a%set_ncols(n)
@ -274,26 +269,9 @@ contains
subroutine s_oacc_csr_sync_space(a)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call s_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irp)) then
call i_oacc_create_dev(a%irp)
end if
contains
subroutine s_oacc_create_dev(v)
implicit none
real(psb_spk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine s_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irp)) call acc_create(a%irp)
end subroutine s_oacc_csr_sync_space
subroutine s_oacc_csr_sync(a)
@ -304,40 +282,16 @@ contains
tmpa => a
if (a%is_dev()) then
call s_oacc_csr_to_host(a%val)
call i_oacc_csr_to_host(a%ja)
call i_oacc_csr_to_host(a%irp)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irp)
else if (a%is_host()) then
call s_oacc_csr_to_dev(a%val)
call i_oacc_csr_to_dev(a%ja)
call i_oacc_csr_to_dev(a%irp)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine s_oacc_csr_sync
subroutine s_oacc_csr_to_dev(v)
implicit none
real(psb_spk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine s_oacc_csr_to_dev
subroutine s_oacc_csr_to_host(v)
implicit none
real(psb_spk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine s_oacc_csr_to_host
subroutine i_oacc_csr_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_csr_to_dev
subroutine i_oacc_csr_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_csr_to_host
end module psb_s_oacc_csr_mat_mod

@ -1,5 +1,6 @@
module psb_s_oacc_ell_mat_mod
use iso_c_binding
use openacc
use psb_s_mat_mod
use psb_s_ell_mat_mod
use psb_s_oacc_vect_mod
@ -21,6 +22,7 @@ module psb_s_oacc_ell_mat_mod
procedure, pass(a) :: set_dev => s_oacc_ell_set_dev
procedure, pass(a) :: sync_space => s_oacc_ell_sync_space
procedure, pass(a) :: sync => s_oacc_ell_sync
procedure, pass(a) :: free_space => s_oacc_ell_free_space
procedure, pass(a) :: free => s_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv
@ -152,31 +154,32 @@ module psb_s_oacc_ell_mat_mod
contains
subroutine s_oacc_ell_free(a)
subroutine s_oacc_ell_free_space(a)
use psb_base_mod
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irn)) then
!$acc exit data delete(a%irn)
end if
if (allocated(a%idiag)) then
!$acc exit data delete(a%idiag)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irn)) call acc_delete_finalize(a%irn)
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
return
end subroutine s_oacc_ell_free_space
subroutine s_oacc_ell_free(a)
use psb_base_mod
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_s_ell_sparse_mat%free()
return
end subroutine s_oacc_ell_free
function s_oacc_ell_sizeof(a) result(res)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
@ -196,41 +199,12 @@ contains
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call s_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irn)) then
call i_oacc_create_dev_scalar(a%irn)
end if
if (allocated(a%idiag)) then
call i_oacc_create_dev_scalar(a%idiag)
end if
contains
subroutine s_oacc_create_dev(v)
implicit none
real(psb_spk_), intent(in) :: v(:,:)
!$acc enter data copyin(v)
end subroutine s_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
subroutine i_oacc_create_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev_scalar
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
end subroutine s_oacc_ell_sync_space
function s_oacc_ell_is_host(a) result(res)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
@ -279,7 +253,7 @@ contains
function s_oacc_ell_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ELL_oacc'
res = 'ELLOA'
end function s_oacc_ell_get_fmt
subroutine s_oacc_ell_sync(a)
@ -290,64 +264,17 @@ contains
tmpa => a
if (a%is_dev()) then
call s_oacc_ell_to_host(a%val)
call i_oacc_ell_to_host(a%ja)
call i_oacc_ell_to_host_scalar(a%irn)
call i_oacc_ell_to_host_scalar(a%idiag)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irn)
call acc_update_self(a%idiag)
else if (a%is_host()) then
call s_oacc_ell_to_dev(a%val)
call i_oacc_ell_to_dev(a%ja)
call i_oacc_ell_to_dev_scalar(a%irn)
call i_oacc_ell_to_dev_scalar(a%idiag)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irn)
call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine s_oacc_ell_sync
subroutine s_oacc_ell_to_dev_scalar(v)
implicit none
real(psb_spk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine s_oacc_ell_to_dev_scalar
subroutine s_oacc_ell_to_dev(v)
implicit none
real(psb_spk_), intent(in) :: v(:,:)
!$acc update device(v)
end subroutine s_oacc_ell_to_dev
subroutine s_oacc_ell_to_host(v)
implicit none
real(psb_spk_), intent(in) :: v(:,:)
!$acc update self(v)
end subroutine s_oacc_ell_to_host
subroutine s_oacc_ell_to_host_scalar(v)
implicit none
real(psb_spk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine s_oacc_ell_to_host_scalar
subroutine i_oacc_ell_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc update device(v)
end subroutine i_oacc_ell_to_dev
subroutine i_oacc_ell_to_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_ell_to_dev_scalar
subroutine i_oacc_ell_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc update self(v)
end subroutine i_oacc_ell_to_host
subroutine i_oacc_ell_to_host_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_ell_to_host_scalar
end module psb_s_oacc_ell_mat_mod

@ -1,5 +1,6 @@
module psb_s_oacc_hll_mat_mod
use iso_c_binding
use openacc
use psb_s_mat_mod
use psb_s_hll_mat_mod
use psb_s_oacc_vect_mod
@ -21,6 +22,7 @@ module psb_s_oacc_hll_mat_mod
procedure, pass(a) :: set_dev => s_oacc_hll_set_dev
procedure, pass(a) :: sync_space => s_oacc_hll_sync_space
procedure, pass(a) :: sync => s_oacc_hll_sync
procedure, pass(a) :: free_space => s_oacc_hll_free_space
procedure, pass(a) :: free => s_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv
@ -152,28 +154,28 @@ module psb_s_oacc_hll_mat_mod
contains
subroutine s_oacc_hll_free(a)
subroutine s_oacc_hll_free_space(a)
use psb_base_mod
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irn)) then
!$acc exit data delete(a%irn)
end if
if (allocated(a%idiag)) then
!$acc exit data delete(a%idiag)
end if
if (allocated(a%hkoffs)) then
!$acc exit data delete(a%hkoffs)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irn)) call acc_delete_finalize(a%irn)
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
return
end subroutine s_oacc_hll_free_space
subroutine s_oacc_hll_free(a)
use psb_base_mod
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_s_hll_sparse_mat%free()
return
@ -244,48 +246,18 @@ contains
function s_oacc_hll_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'HLL_oacc'
res = 'HLLOA'
end function s_oacc_hll_get_fmt
subroutine s_oacc_hll_sync_space(a)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call s_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irn)) then
call i_oacc_create_dev_scalar(a%irn)
end if
if (allocated(a%idiag)) then
call i_oacc_create_dev_scalar(a%idiag)
end if
if (allocated(a%hkoffs)) then
call i_oacc_create_dev_scalar(a%hkoffs)
end if
contains
subroutine s_oacc_create_dev(v)
implicit none
real(psb_spk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine s_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
subroutine i_oacc_create_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev_scalar
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
end subroutine s_oacc_hll_sync_space
@ -297,56 +269,19 @@ contains
tmpa => a
if (a%is_dev()) then
call s_oacc_hll_to_host(a%val)
call i_oacc_hll_to_host(a%ja)
call i_oacc_hll_to_host_scalar(a%irn)
call i_oacc_hll_to_host_scalar(a%idiag)
call i_oacc_hll_to_host_scalar(a%hkoffs)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irn)
call acc_update_self(a%idiag)
call acc_update_self(a%hkoffs)
else if (a%is_host()) then
call s_oacc_hll_to_dev(a%val)
call i_oacc_hll_to_dev(a%ja)
call i_oacc_hll_to_dev_scalar(a%irn)
call i_oacc_hll_to_dev_scalar(a%idiag)
call i_oacc_hll_to_dev_scalar(a%hkoffs)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irn)
call acc_update_device(a%idiag)
call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine s_oacc_hll_sync
subroutine s_oacc_hll_to_host(v)
implicit none
real(psb_spk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine s_oacc_hll_to_host
subroutine s_oacc_hll_to_dev(v)
implicit none
real(psb_spk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine s_oacc_hll_to_dev
subroutine i_oacc_hll_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_hll_to_host
subroutine i_oacc_hll_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_hll_to_dev
subroutine i_oacc_hll_to_host_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_hll_to_host_scalar
subroutine i_oacc_hll_to_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_hll_to_dev_scalar
end module psb_s_oacc_hll_mat_mod

@ -59,7 +59,7 @@ module psb_s_oacc_vect_mod
procedure, pass(x) :: asum => s_oacc_asum
procedure, pass(x) :: absval1 => s_oacc_absval1
procedure, pass(x) :: absval2 => s_oacc_absval2
final :: s_oacc_final_vect_free
end type psb_s_vect_oacc
interface
@ -164,21 +164,25 @@ contains
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
real(psb_spk_) :: mx
real(psb_spk_) :: res
integer(psb_ipk_) :: info
if (x%is_host()) call x%sync()
mx = s_oacc_amax(n,x)
res = s_inner_oacc_nrm2(n, mx, x%v)
!!$ write(0,*)'oacc_nrm2'
res = s_inner_oacc_nrm2(n, x%v)
contains
function s_inner_oacc_nrm2(n, mx,x) result(res)
function s_inner_oacc_nrm2(n, x) result(res)
integer(psb_ipk_) :: n
real(psb_spk_) :: x(:)
real(psb_spk_) :: mx, res
real(psb_spk_) :: sum
real(psb_spk_) :: res
real(psb_spk_) :: sum, mx
integer(psb_ipk_) :: i
sum = 0.0
mx = szero
!$acc parallel loop reduction(max:mx)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
sum = szero
!$acc parallel loop reduction(+:sum)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
@ -203,7 +207,7 @@ contains
real(psb_spk_) :: res
real(psb_spk_) :: max_val
integer(psb_ipk_) :: i
max_val = -huge(0.0)
max_val = szero
!$acc parallel loop reduction(max:max_val)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
@ -228,7 +232,7 @@ contains
real(psb_spk_) :: x(:)
real(psb_spk_) :: res
integer(psb_ipk_) :: i
res = 0.0
res = szero
!$acc parallel loop reduction(+:res)
do i = 1, n
res = res + abs(x(i))
@ -271,92 +275,6 @@ contains
call z%set_host()
end subroutine s_oacc_mlt_a_2
!!$ subroutine s_oacc_mlt_v(x, y, info)
!!$ implicit none
!!$ class(psb_s_base_vect_type), intent(inout) :: x
!!$ class(psb_s_vect_oacc), intent(inout) :: y
!!$ integer(psb_ipk_), intent(out) :: info
!!$
!!$ integer(psb_ipk_) :: i, n
!!$
!!$ info = 0
!!$ n = min(x%get_nrows(), y%get_nrows())
!!$ select type(xx => x)
!!$ type is (psb_s_base_vect_type)
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ end select
!!$ end subroutine s_oacc_mlt_v
!!$
!!$ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
!!$ use psi_serial_mod
!!$ use psb_string_mod
!!$ implicit none
!!$ real(psb_spk_), intent(in) :: alpha, beta
!!$ class(psb_s_base_vect_type), intent(inout) :: x
!!$ class(psb_s_base_vect_type), intent(inout) :: y
!!$ class(psb_s_vect_oacc), intent(inout) :: z
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character(len=1), intent(in), optional :: conjgx, conjgy
!!$ integer(psb_ipk_) :: i, n
!!$ logical :: conjgx_, conjgy_
!!$
!!$ conjgx_ = .false.
!!$ conjgy_ = .false.
!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
!!$
!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
!!$
!!$ info = 0
!!$ select type(xx => x)
!!$ class is (psb_s_vect_oacc)
!!$ select type (yy => y)
!!$ class is (psb_s_vect_oacc)
!!$ if (xx%is_host()) call xx%sync()
!!$ if (yy%is_host()) call yy%sync()
!!$ if ((beta /= szero) .and. (z%is_host())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_dev()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync()
!!$ if (yy%is_dev()) call yy%sync()
!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ end subroutine s_oacc_mlt_v_2
subroutine s_oacc_axpby_v(m, alpha, x, beta, y, info)
!use psi_serial_mod
implicit none
@ -414,7 +332,7 @@ contains
integer(psb_ipk_) :: i
if ((beta /= szero) .and. (y%is_dev())) call y%sync()
!$acc parallel loop
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
end do
@ -432,44 +350,44 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
write(0,*)'upd_xyz'
info = psb_success_
gpu_done = .false.
select type(xx => x)
class is (psb_s_vect_oacc)
select type(yy => y)
select type(yy => y)
class is (psb_s_vect_oacc)
select type(zz => z)
class is (psb_s_vect_oacc)
select type(zz => z)
class is (psb_s_vect_oacc)
if ((beta /= szero) .and. yy%is_host()) call yy%sync()
if ((delta /= szero) .and. zz%is_host()) call zz%sync()
if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
end do
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
if ((beta /= szero) .and. yy%is_host()) call yy%sync()
if ((delta /= szero) .and. zz%is_host()) call zz%sync()
if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
end do
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not. gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
end if
end subroutine s_oacc_upd_xyz
@ -676,7 +594,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_s_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
end subroutine s_oacc_ins_a
@ -687,16 +605,14 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 's_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine s_oacc_bld_mn
@ -707,6 +623,7 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@ -714,13 +631,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
x%v(:) = this(:)
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine s_oacc_bld_x
@ -848,54 +761,21 @@ contains
end function s_oacc_dot_a
! subroutine s_oacc_set_vect(x,y)
! implicit none
! class(psb_s_vect_oacc), intent(inout) :: x
! real(psb_spk_), intent(in) :: y(:)
! integer(psb_ipk_) :: info
! if (size(x%v) /= size(y)) then
! call x%free(info)
! call x%all(size(y),info)
! end if
! x%v(:) = y(:)
! call x%set_host()
! end subroutine s_oacc_set_vect
subroutine s_oacc_to_dev(v)
implicit none
real(psb_spk_) :: v(:)
!$acc update device(v)
end subroutine s_oacc_to_dev
subroutine s_oacc_to_host(v)
implicit none
real(psb_spk_) :: v(:)
!$acc update self(v)
end subroutine s_oacc_to_host
subroutine s_oacc_sync_space(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
if (.not.acc_is_present(x%v)) call s_oacc_create_dev(x%v)
end if
contains
subroutine s_oacc_create_dev(v)
implicit none
real(psb_spk_) :: v(:)
!$acc enter data copyin(v)
end subroutine s_oacc_create_dev
if (allocated(x%v)) call acc_create(x%v)
end subroutine s_oacc_sync_space
subroutine s_oacc_sync(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call s_oacc_to_host(x%v)
call acc_update_self(x%v)
end if
if (x%is_host()) then
call s_oacc_to_dev(x%v)
call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine s_oacc_sync
@ -954,33 +834,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data create(x%v)
call x%sync_space()
end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 's_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
call x%sync_space()
end subroutine s_oacc_vect_all
subroutine s_oacc_final_vect_free(x)
implicit none
type(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine s_oacc_final_vect_free
subroutine s_oacc_vect_free(x, info)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine s_oacc_vect_free
function s_oacc_get_size(x) result(res)

@ -1,6 +1,7 @@
module psb_z_oacc_csr_mat_mod
use iso_c_binding
use openacc
use psb_z_mat_mod
use psb_z_oacc_vect_mod
!use oaccsparse_mod
@ -35,6 +36,7 @@ module psb_z_oacc_csr_mat_mod
procedure, pass(a) :: set_host => z_oacc_csr_set_host
procedure, pass(a) :: set_sync => z_oacc_csr_set_sync
procedure, pass(a) :: set_dev => z_oacc_csr_set_dev
procedure, pass(a) :: free_space => z_oacc_csr_free_space
procedure, pass(a) :: sync_space => z_oacc_csr_sync_space
procedure, pass(a) :: sync => z_oacc_csr_sync
end type psb_z_oacc_csr_sparse_mat
@ -154,22 +156,26 @@ module psb_z_oacc_csr_mat_mod
contains
subroutine z_oacc_csr_free(a)
subroutine z_oacc_csr_free_space(a)
use psb_base_mod
implicit none
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irp)) then
!$acc exit data delete(a%irp)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irp)) call acc_delete_finalize(a%irp)
return
end subroutine z_oacc_csr_free_space
subroutine z_oacc_csr_free(a)
use psb_base_mod
implicit none
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_z_csr_sparse_mat%free()
return
@ -193,7 +199,7 @@ contains
function z_oacc_csr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'CSR_oacc'
res = 'CSROA'
end function z_oacc_csr_get_fmt
subroutine z_oacc_csr_all(m, n, nz, a, info)
@ -202,19 +208,8 @@ contains
class(psb_z_oacc_csr_sparse_mat), intent(out) :: a
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(a%val)) then
!$acc exit data delete(a%val) finalize
deallocate(a%val, stat=info)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja) finalize
deallocate(a%ja, stat=info)
end if
if (allocated(a%irp)) then
!$acc exit data delete(a%irp) finalize
deallocate(a%irp, stat=info)
end if
info = 0
call a%free()
call a%set_nrows(m)
call a%set_ncols(n)
@ -274,26 +269,9 @@ contains
subroutine z_oacc_csr_sync_space(a)
implicit none
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call z_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irp)) then
call i_oacc_create_dev(a%irp)
end if
contains
subroutine z_oacc_create_dev(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine z_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irp)) call acc_create(a%irp)
end subroutine z_oacc_csr_sync_space
subroutine z_oacc_csr_sync(a)
@ -304,40 +282,16 @@ contains
tmpa => a
if (a%is_dev()) then
call z_oacc_csr_to_host(a%val)
call i_oacc_csr_to_host(a%ja)
call i_oacc_csr_to_host(a%irp)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irp)
else if (a%is_host()) then
call z_oacc_csr_to_dev(a%val)
call i_oacc_csr_to_dev(a%ja)
call i_oacc_csr_to_dev(a%irp)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine z_oacc_csr_sync
subroutine z_oacc_csr_to_dev(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine z_oacc_csr_to_dev
subroutine z_oacc_csr_to_host(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine z_oacc_csr_to_host
subroutine i_oacc_csr_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_csr_to_dev
subroutine i_oacc_csr_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_csr_to_host
end module psb_z_oacc_csr_mat_mod

@ -1,5 +1,6 @@
module psb_z_oacc_ell_mat_mod
use iso_c_binding
use openacc
use psb_z_mat_mod
use psb_z_ell_mat_mod
use psb_z_oacc_vect_mod
@ -21,6 +22,7 @@ module psb_z_oacc_ell_mat_mod
procedure, pass(a) :: set_dev => z_oacc_ell_set_dev
procedure, pass(a) :: sync_space => z_oacc_ell_sync_space
procedure, pass(a) :: sync => z_oacc_ell_sync
procedure, pass(a) :: free_space => z_oacc_ell_free_space
procedure, pass(a) :: free => z_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv
@ -152,31 +154,32 @@ module psb_z_oacc_ell_mat_mod
contains
subroutine z_oacc_ell_free(a)
subroutine z_oacc_ell_free_space(a)
use psb_base_mod
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irn)) then
!$acc exit data delete(a%irn)
end if
if (allocated(a%idiag)) then
!$acc exit data delete(a%idiag)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irn)) call acc_delete_finalize(a%irn)
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
return
end subroutine z_oacc_ell_free_space
subroutine z_oacc_ell_free(a)
use psb_base_mod
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_z_ell_sparse_mat%free()
return
end subroutine z_oacc_ell_free
function z_oacc_ell_sizeof(a) result(res)
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
@ -196,41 +199,12 @@ contains
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call z_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irn)) then
call i_oacc_create_dev_scalar(a%irn)
end if
if (allocated(a%idiag)) then
call i_oacc_create_dev_scalar(a%idiag)
end if
contains
subroutine z_oacc_create_dev(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:,:)
!$acc enter data copyin(v)
end subroutine z_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
subroutine i_oacc_create_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev_scalar
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
end subroutine z_oacc_ell_sync_space
function z_oacc_ell_is_host(a) result(res)
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
@ -279,7 +253,7 @@ contains
function z_oacc_ell_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ELL_oacc'
res = 'ELLOA'
end function z_oacc_ell_get_fmt
subroutine z_oacc_ell_sync(a)
@ -290,64 +264,17 @@ contains
tmpa => a
if (a%is_dev()) then
call z_oacc_ell_to_host(a%val)
call i_oacc_ell_to_host(a%ja)
call i_oacc_ell_to_host_scalar(a%irn)
call i_oacc_ell_to_host_scalar(a%idiag)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irn)
call acc_update_self(a%idiag)
else if (a%is_host()) then
call z_oacc_ell_to_dev(a%val)
call i_oacc_ell_to_dev(a%ja)
call i_oacc_ell_to_dev_scalar(a%irn)
call i_oacc_ell_to_dev_scalar(a%idiag)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irn)
call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine z_oacc_ell_sync
subroutine z_oacc_ell_to_dev_scalar(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine z_oacc_ell_to_dev_scalar
subroutine z_oacc_ell_to_dev(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:,:)
!$acc update device(v)
end subroutine z_oacc_ell_to_dev
subroutine z_oacc_ell_to_host(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:,:)
!$acc update self(v)
end subroutine z_oacc_ell_to_host
subroutine z_oacc_ell_to_host_scalar(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine z_oacc_ell_to_host_scalar
subroutine i_oacc_ell_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc update device(v)
end subroutine i_oacc_ell_to_dev
subroutine i_oacc_ell_to_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_ell_to_dev_scalar
subroutine i_oacc_ell_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:,:)
!$acc update self(v)
end subroutine i_oacc_ell_to_host
subroutine i_oacc_ell_to_host_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_ell_to_host_scalar
end module psb_z_oacc_ell_mat_mod

@ -1,5 +1,6 @@
module psb_z_oacc_hll_mat_mod
use iso_c_binding
use openacc
use psb_z_mat_mod
use psb_z_hll_mat_mod
use psb_z_oacc_vect_mod
@ -21,6 +22,7 @@ module psb_z_oacc_hll_mat_mod
procedure, pass(a) :: set_dev => z_oacc_hll_set_dev
procedure, pass(a) :: sync_space => z_oacc_hll_sync_space
procedure, pass(a) :: sync => z_oacc_hll_sync
procedure, pass(a) :: free_space => z_oacc_hll_free_space
procedure, pass(a) :: free => z_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv
@ -152,28 +154,28 @@ module psb_z_oacc_hll_mat_mod
contains
subroutine z_oacc_hll_free(a)
subroutine z_oacc_hll_free_space(a)
use psb_base_mod
implicit none
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
if (allocated(a%val)) then
!$acc exit data delete(a%val)
end if
if (allocated(a%ja)) then
!$acc exit data delete(a%ja)
end if
if (allocated(a%irn)) then
!$acc exit data delete(a%irn)
end if
if (allocated(a%idiag)) then
!$acc exit data delete(a%idiag)
end if
if (allocated(a%hkoffs)) then
!$acc exit data delete(a%hkoffs)
end if
if (allocated(a%val)) call acc_delete_finalize(a%val)
if (allocated(a%ja)) call acc_delete_finalize(a%ja)
if (allocated(a%irn)) call acc_delete_finalize(a%irn)
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
return
end subroutine z_oacc_hll_free_space
subroutine z_oacc_hll_free(a)
use psb_base_mod
implicit none
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
call a%free_space()
call a%psb_z_hll_sparse_mat%free()
return
@ -244,48 +246,18 @@ contains
function z_oacc_hll_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'HLL_oacc'
res = 'HLLOA'
end function z_oacc_hll_get_fmt
subroutine z_oacc_hll_sync_space(a)
implicit none
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
if (allocated(a%val)) then
call z_oacc_create_dev(a%val)
end if
if (allocated(a%ja)) then
call i_oacc_create_dev(a%ja)
end if
if (allocated(a%irn)) then
call i_oacc_create_dev_scalar(a%irn)
end if
if (allocated(a%idiag)) then
call i_oacc_create_dev_scalar(a%idiag)
end if
if (allocated(a%hkoffs)) then
call i_oacc_create_dev_scalar(a%hkoffs)
end if
contains
subroutine z_oacc_create_dev(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine z_oacc_create_dev
subroutine i_oacc_create_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev
subroutine i_oacc_create_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc enter data copyin(v)
end subroutine i_oacc_create_dev_scalar
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
end subroutine z_oacc_hll_sync_space
@ -297,56 +269,19 @@ contains
tmpa => a
if (a%is_dev()) then
call z_oacc_hll_to_host(a%val)
call i_oacc_hll_to_host(a%ja)
call i_oacc_hll_to_host_scalar(a%irn)
call i_oacc_hll_to_host_scalar(a%idiag)
call i_oacc_hll_to_host_scalar(a%hkoffs)
call acc_update_self(a%val)
call acc_update_self(a%ja)
call acc_update_self(a%irn)
call acc_update_self(a%idiag)
call acc_update_self(a%hkoffs)
else if (a%is_host()) then
call z_oacc_hll_to_dev(a%val)
call i_oacc_hll_to_dev(a%ja)
call i_oacc_hll_to_dev_scalar(a%irn)
call i_oacc_hll_to_dev_scalar(a%idiag)
call i_oacc_hll_to_dev_scalar(a%hkoffs)
call acc_update_device(a%val)
call acc_update_device(a%ja)
call acc_update_device(a%irn)
call acc_update_device(a%idiag)
call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine z_oacc_hll_sync
subroutine z_oacc_hll_to_host(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine z_oacc_hll_to_host
subroutine z_oacc_hll_to_dev(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine z_oacc_hll_to_dev
subroutine i_oacc_hll_to_host(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_hll_to_host
subroutine i_oacc_hll_to_dev(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_hll_to_dev
subroutine i_oacc_hll_to_host_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update self(v)
end subroutine i_oacc_hll_to_host_scalar
subroutine i_oacc_hll_to_dev_scalar(v)
implicit none
integer(psb_ipk_), intent(in) :: v(:)
!$acc update device(v)
end subroutine i_oacc_hll_to_dev_scalar
end module psb_z_oacc_hll_mat_mod

@ -59,7 +59,7 @@ module psb_z_oacc_vect_mod
procedure, pass(x) :: asum => z_oacc_asum
procedure, pass(x) :: absval1 => z_oacc_absval1
procedure, pass(x) :: absval2 => z_oacc_absval2
final :: z_oacc_final_vect_free
end type psb_z_vect_oacc
interface
@ -164,21 +164,25 @@ contains
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res
real(psb_dpk_) :: mx
real(psb_dpk_) :: res
integer(psb_ipk_) :: info
if (x%is_host()) call x%sync()
mx = z_oacc_amax(n,x)
res = z_inner_oacc_nrm2(n, mx, x%v)
!!$ write(0,*)'oacc_nrm2'
res = z_inner_oacc_nrm2(n, x%v)
contains
function z_inner_oacc_nrm2(n, mx,x) result(res)
function z_inner_oacc_nrm2(n, x) result(res)
integer(psb_ipk_) :: n
complex(psb_dpk_) :: x(:)
real(psb_dpk_) :: mx, res
real(psb_dpk_) :: sum
real(psb_dpk_) :: res
real(psb_dpk_) :: sum, mx
integer(psb_ipk_) :: i
sum = 0.0
mx = dzero
!$acc parallel loop reduction(max:mx)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
sum = dzero
!$acc parallel loop reduction(+:sum)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
@ -203,7 +207,7 @@ contains
real(psb_dpk_) :: res
real(psb_dpk_) :: max_val
integer(psb_ipk_) :: i
max_val = -huge(0.0)
max_val = dzero
!$acc parallel loop reduction(max:max_val)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
@ -228,7 +232,7 @@ contains
complex(psb_dpk_) :: x(:)
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
res = 0.0
res = dzero
!$acc parallel loop reduction(+:res)
do i = 1, n
res = res + abs(x(i))
@ -271,92 +275,6 @@ contains
call z%set_host()
end subroutine z_oacc_mlt_a_2
!!$ subroutine z_oacc_mlt_v(x, y, info)
!!$ implicit none
!!$ class(psb_z_base_vect_type), intent(inout) :: x
!!$ class(psb_z_vect_oacc), intent(inout) :: y
!!$ integer(psb_ipk_), intent(out) :: info
!!$
!!$ integer(psb_ipk_) :: i, n
!!$
!!$ info = 0
!!$ n = min(x%get_nrows(), y%get_nrows())
!!$ select type(xx => x)
!!$ type is (psb_z_base_vect_type)
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ end select
!!$ end subroutine z_oacc_mlt_v
!!$
!!$ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
!!$ use psi_serial_mod
!!$ use psb_string_mod
!!$ implicit none
!!$ complex(psb_dpk_), intent(in) :: alpha, beta
!!$ class(psb_z_base_vect_type), intent(inout) :: x
!!$ class(psb_z_base_vect_type), intent(inout) :: y
!!$ class(psb_z_vect_oacc), intent(inout) :: z
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character(len=1), intent(in), optional :: conjgx, conjgy
!!$ integer(psb_ipk_) :: i, n
!!$ logical :: conjgx_, conjgy_
!!$
!!$ conjgx_ = .false.
!!$ conjgy_ = .false.
!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
!!$
!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
!!$
!!$ info = 0
!!$ select type(xx => x)
!!$ class is (psb_z_vect_oacc)
!!$ select type (yy => y)
!!$ class is (psb_z_vect_oacc)
!!$ if (xx%is_host()) call xx%sync()
!!$ if (yy%is_host()) call yy%sync()
!!$ if ((beta /= zzero) .and. (z%is_host())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_dev()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync()
!!$ if (yy%is_dev()) call yy%sync()
!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ end subroutine z_oacc_mlt_v_2
subroutine z_oacc_axpby_v(m, alpha, x, beta, y, info)
!use psi_serial_mod
implicit none
@ -414,7 +332,7 @@ contains
integer(psb_ipk_) :: i
if ((beta /= zzero) .and. (y%is_dev())) call y%sync()
!$acc parallel loop
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
end do
@ -432,44 +350,44 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
write(0,*)'upd_xyz'
info = psb_success_
gpu_done = .false.
select type(xx => x)
class is (psb_z_vect_oacc)
select type(yy => y)
select type(yy => y)
class is (psb_z_vect_oacc)
select type(zz => z)
class is (psb_z_vect_oacc)
select type(zz => z)
class is (psb_z_vect_oacc)
if ((beta /= zzero) .and. yy%is_host()) call yy%sync()
if ((delta /= zzero) .and. zz%is_host()) call zz%sync()
if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
end do
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
if ((beta /= zzero) .and. yy%is_host()) call yy%sync()
if ((delta /= zzero) .and. zz%is_host()) call zz%sync()
if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
!$acc parallel loop
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
end do
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not. gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
end if
end subroutine z_oacc_upd_xyz
@ -676,7 +594,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_z_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
end subroutine z_oacc_ins_a
@ -687,16 +605,14 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'z_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine z_oacc_bld_mn
@ -707,6 +623,7 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@ -714,13 +631,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
x%v(:) = this(:)
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data copyin(x%v)
call x%sync_space()
end subroutine z_oacc_bld_x
@ -848,54 +761,21 @@ contains
end function z_oacc_dot_a
! subroutine z_oacc_set_vect(x,y)
! implicit none
! class(psb_z_vect_oacc), intent(inout) :: x
! complex(psb_dpk_), intent(in) :: y(:)
! integer(psb_ipk_) :: info
! if (size(x%v) /= size(y)) then
! call x%free(info)
! call x%all(size(y),info)
! end if
! x%v(:) = y(:)
! call x%set_host()
! end subroutine z_oacc_set_vect
subroutine z_oacc_to_dev(v)
implicit none
complex(psb_dpk_) :: v(:)
!$acc update device(v)
end subroutine z_oacc_to_dev
subroutine z_oacc_to_host(v)
implicit none
complex(psb_dpk_) :: v(:)
!$acc update self(v)
end subroutine z_oacc_to_host
subroutine z_oacc_sync_space(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
if (.not.acc_is_present(x%v)) call z_oacc_create_dev(x%v)
end if
contains
subroutine z_oacc_create_dev(v)
implicit none
complex(psb_dpk_) :: v(:)
!$acc enter data copyin(v)
end subroutine z_oacc_create_dev
if (allocated(x%v)) call acc_create(x%v)
end subroutine z_oacc_sync_space
subroutine z_oacc_sync(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
call z_oacc_to_host(x%v)
call acc_update_self(x%v)
end if
if (x%is_host()) then
call z_oacc_to_dev(x%v)
call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine z_oacc_sync
@ -954,33 +834,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
!$acc enter data create(x%v)
call x%sync_space()
end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'z_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
call x%sync_space()
end subroutine z_oacc_vect_all
subroutine z_oacc_final_vect_free(x)
implicit none
type(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine z_oacc_final_vect_free
subroutine z_oacc_vect_free(x, info)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
if (acc_is_present(x%v)) then
!$acc exit data delete(x%v) finalize
end if
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
end subroutine z_oacc_vect_free
function z_oacc_get_size(x) result(res)

Loading…
Cancel
Save