diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 index 70380c95..a411cc6a 100644 --- a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 @@ -10,11 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_c_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - - call a%set_dev() - if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 index 7e664791..a8cd93a0 100644 --- a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_c_oacc_csr_cp_from_fmt diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 index f8c5c39d..30691030 100644 --- a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_c_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 index 7ba971b4..b37011c0 100644 --- a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_c_oacc_csr_mv_from_fmt diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 index 0fd1ed35..db56d9fc 100644 --- a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 @@ -16,8 +16,8 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (n /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 index 7bd17478..836874fe 100644 --- a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 @@ -8,65 +8,14 @@ contains class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - complex(psb_spk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza info = psb_success_ - hacksize = 1 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(a%get_nrows(), nz_per_row)) - allocate(a%ja(a%get_nrows(), nz_per_row)) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - end if - a%val = czero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row, row_counts(row) + 1) = value - a%ja(row, row_counts(row) + 1) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(i, j) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - deallocate(row_counts) - - call a%set_dev() + call a%free_space() + call a%psb_c_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 index 7ee231c5..31d6c4b4 100644 --- a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_c_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_c_oacc_ell_cp_from_fmt diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 index 7e703aa2..1ca43435 100644 --- a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 @@ -9,11 +9,13 @@ contains integer(psb_ipk_), intent(out) :: info info = psb_success_ - + + call a%free_space() call a%psb_c_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 index 7d1f790d..95798429 100644 --- a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_c_oacc_ell_mv_from_fmt diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 index 4c12cdf8..32391cc8 100644 --- a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 @@ -2,78 +2,20 @@ submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl use psb_base_mod contains module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info) - implicit none + implicit none class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_c_coo_sparse_mat), intent(in) :: b + class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - complex(psb_spk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza - info = psb_success_ - hacksize = 32 ! Assuming a default hack size of 32 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(nz_per_row * a%get_nrows())) - allocate(a%ja(nz_per_row * a%get_nrows())) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) - end if - a%val = czero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value - a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(j + (i - 1) * nz_per_row) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - ! Calculate hkoffs for HLL format - !$acc parallel loop present(a) - do i = 1, size(a%hkoffs) - a%hkoffs(i) = (i - 1) * hacksize - end do - - deallocate(row_counts) - call a%set_dev() + call a%free_space() + call a%psb_c_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 index af6cc1d5..e442b668 100644 --- a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_c_oacc_hll_cp_from_fmt diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 index dec52d40..30d723fe 100644 --- a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_c_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 index f2a064cb..0ac69af8 100644 --- a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 @@ -14,11 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select - end subroutine psb_c_oacc_hll_mv_from_fmt end submodule psb_c_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 index 3b74d11a..68141e42 100644 --- a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 @@ -40,28 +40,28 @@ contains complex(psb_spk_) :: val(:), x(:), y(:) integer(psb_ipk_) :: ja(:), hkoffs(:) integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, idx, k + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii complex(psb_spk_) :: tmp info = 0 - - !$acc parallel loop present(val, ja, hkoffs, x, y) + !$acc parallel loop private(nlc, isz,ir,nr) do i = 1, nhacks - do k = 0, hksz - 1 - idx = hkoffs(i) + k - if (idx <= hkoffs(i + 1) - 1) then - tmp = 0.0_psb_dpk_ - !$acc loop seq - do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz - if (ja(j) > 0) then - tmp = tmp + val(j) * x(ja(j)) - end if - end do - y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) - end if + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = czero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) end do end do end subroutine inner_spmv - end subroutine psb_c_oacc_hll_vect_mv end submodule psb_c_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 index ec92f618..50da7692 100644 --- a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 @@ -10,11 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_d_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - - call a%set_dev() - if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 index 37541ea9..0d35e247 100644 --- a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_d_oacc_csr_cp_from_fmt diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 index 2ed9b032..97fa07d1 100644 --- a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_d_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 index 197ed911..e992f41a 100644 --- a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_d_oacc_csr_mv_from_fmt diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 index 1dca2ba2..0001cc76 100644 --- a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 @@ -16,8 +16,8 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (n /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 index c13d1edd..6c24098e 100644 --- a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 @@ -8,65 +8,14 @@ contains class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - real(psb_dpk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza info = psb_success_ - hacksize = 1 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(a%get_nrows(), nz_per_row)) - allocate(a%ja(a%get_nrows(), nz_per_row)) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - end if - a%val = dzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row, row_counts(row) + 1) = value - a%ja(row, row_counts(row) + 1) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(i, j) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - deallocate(row_counts) - - call a%set_dev() + call a%free_space() + call a%psb_d_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 index b62a40d5..991681e9 100644 --- a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_d_oacc_ell_cp_from_fmt diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 index 688d182b..9214ba3f 100644 --- a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 @@ -9,11 +9,13 @@ contains integer(psb_ipk_), intent(out) :: info info = psb_success_ - + + call a%free_space() call a%psb_d_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 index 8bda6e6e..292165fc 100644 --- a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_d_oacc_ell_mv_from_fmt diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 index 18bd768b..e39a29b7 100644 --- a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 @@ -2,78 +2,20 @@ submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_coo_impl use psb_base_mod contains module subroutine psb_d_oacc_hll_cp_from_coo(a, b, info) - implicit none + implicit none class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b + class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - real(psb_dpk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza - info = psb_success_ - hacksize = 32 ! Assuming a default hack size of 32 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(nz_per_row * a%get_nrows())) - allocate(a%ja(nz_per_row * a%get_nrows())) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) - end if - a%val = dzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value - a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(j + (i - 1) * nz_per_row) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - ! Calculate hkoffs for HLL format - !$acc parallel loop present(a) - do i = 1, size(a%hkoffs) - a%hkoffs(i) = (i - 1) * hacksize - end do - - deallocate(row_counts) - call a%set_dev() + call a%free_space() + call a%psb_d_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 index fb99737c..a838e31e 100644 --- a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_d_oacc_hll_cp_from_fmt diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 index 7bf22c13..29494a39 100644 --- a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_d_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 index e6615365..e03e2f30 100644 --- a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 @@ -14,11 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select - end subroutine psb_d_oacc_hll_mv_from_fmt end submodule psb_d_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 index 875b646f..e7c47b7a 100644 --- a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 @@ -40,28 +40,28 @@ contains real(psb_dpk_) :: val(:), x(:), y(:) integer(psb_ipk_) :: ja(:), hkoffs(:) integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, idx, k + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii real(psb_dpk_) :: tmp info = 0 - - !$acc parallel loop present(val, ja, hkoffs, x, y) + !$acc parallel loop private(nlc, isz,ir,nr) do i = 1, nhacks - do k = 0, hksz - 1 - idx = hkoffs(i) + k - if (idx <= hkoffs(i + 1) - 1) then - tmp = 0.0_psb_dpk_ - !$acc loop seq - do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz - if (ja(j) > 0) then - tmp = tmp + val(j) * x(ja(j)) - end if - end do - y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) - end if + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = dzero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) end do end do end subroutine inner_spmv - end subroutine psb_d_oacc_hll_vect_mv end submodule psb_d_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 index 94ef67b3..3c2fb1cb 100644 --- a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 @@ -10,11 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_s_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - - call a%set_dev() - if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 index 2c64b5fe..e47959f8 100644 --- a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_s_oacc_csr_cp_from_fmt diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 index e531d309..fdbf3a0c 100644 --- a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_s_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 index a9dc0c70..a7a581b8 100644 --- a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_s_oacc_csr_mv_from_fmt diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 index c2bbd6b1..13ee1651 100644 --- a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 @@ -16,8 +16,8 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (n /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 index 9aaaff73..0f6fbc48 100644 --- a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 @@ -8,65 +8,14 @@ contains class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - real(psb_spk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza info = psb_success_ - hacksize = 1 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(a%get_nrows(), nz_per_row)) - allocate(a%ja(a%get_nrows(), nz_per_row)) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - end if - a%val = szero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row, row_counts(row) + 1) = value - a%ja(row, row_counts(row) + 1) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(i, j) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - deallocate(row_counts) - - call a%set_dev() + call a%free_space() + call a%psb_s_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 index d4c1a233..793c2779 100644 --- a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_s_oacc_ell_cp_from_fmt diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 index d6bbec13..ba82049f 100644 --- a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 @@ -9,11 +9,13 @@ contains integer(psb_ipk_), intent(out) :: info info = psb_success_ - + + call a%free_space() call a%psb_s_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 index ebb82901..df789664 100644 --- a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_s_oacc_ell_mv_from_fmt diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 index 34a0b5d5..dfba3c6c 100644 --- a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 @@ -2,78 +2,20 @@ submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_coo_impl use psb_base_mod contains module subroutine psb_s_oacc_hll_cp_from_coo(a, b, info) - implicit none + implicit none class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_s_coo_sparse_mat), intent(in) :: b + class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - real(psb_spk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza - info = psb_success_ - hacksize = 32 ! Assuming a default hack size of 32 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(nz_per_row * a%get_nrows())) - allocate(a%ja(nz_per_row * a%get_nrows())) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) - end if - a%val = szero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value - a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(j + (i - 1) * nz_per_row) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - ! Calculate hkoffs for HLL format - !$acc parallel loop present(a) - do i = 1, size(a%hkoffs) - a%hkoffs(i) = (i - 1) * hacksize - end do - - deallocate(row_counts) - call a%set_dev() + call a%free_space() + call a%psb_s_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 index 4d023f8b..849e03b7 100644 --- a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_s_oacc_hll_cp_from_fmt diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 index 08b553b7..c22818fa 100644 --- a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_s_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 index d5867289..992b1c7b 100644 --- a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 @@ -14,11 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select - end subroutine psb_s_oacc_hll_mv_from_fmt end submodule psb_s_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 index 9d9e9197..efe9a9ca 100644 --- a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 @@ -40,28 +40,28 @@ contains real(psb_spk_) :: val(:), x(:), y(:) integer(psb_ipk_) :: ja(:), hkoffs(:) integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, idx, k + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii real(psb_spk_) :: tmp info = 0 - - !$acc parallel loop present(val, ja, hkoffs, x, y) + !$acc parallel loop private(nlc, isz,ir,nr) do i = 1, nhacks - do k = 0, hksz - 1 - idx = hkoffs(i) + k - if (idx <= hkoffs(i + 1) - 1) then - tmp = 0.0_psb_dpk_ - !$acc loop seq - do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz - if (ja(j) > 0) then - tmp = tmp + val(j) * x(ja(j)) - end if - end do - y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) - end if + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = szero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) end do end do end subroutine inner_spmv - end subroutine psb_s_oacc_hll_vect_mv end submodule psb_s_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 index 0485c9ca..6c40d0d2 100644 --- a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 @@ -10,11 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_z_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - - call a%set_dev() - if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 index f2c68816..3025fde2 100644 --- a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_z_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_z_oacc_csr_cp_from_fmt diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 index 44b01b68..6dae625a 100644 --- a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_z_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 index bf777e85..1d7dd723 100644 --- a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_z_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_z_oacc_csr_mv_from_fmt diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 index b8da5c8f..cb34dce1 100644 --- a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 @@ -16,8 +16,8 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (n /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 index e4d3b731..6dd60bd7 100644 --- a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 @@ -8,65 +8,14 @@ contains class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - complex(psb_dpk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza info = psb_success_ - hacksize = 1 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(a%get_nrows(), nz_per_row)) - allocate(a%ja(a%get_nrows(), nz_per_row)) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - end if - a%val = zzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row, row_counts(row) + 1) = value - a%ja(row, row_counts(row) + 1) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(i, j) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - deallocate(row_counts) - - call a%set_dev() + call a%free_space() + call a%psb_z_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 index 98404ae2..60d94bb2 100644 --- a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_z_oacc_ell_cp_from_fmt diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 index 26388e5e..db70b944 100644 --- a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 @@ -9,11 +9,13 @@ contains integer(psb_ipk_), intent(out) :: info info = psb_success_ - + + call a%free_space() call a%psb_z_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 index e0f75828..f99b3817 100644 --- a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_z_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_z_oacc_ell_mv_from_fmt diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 index 62be2252..e018e762 100644 --- a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 @@ -2,78 +2,20 @@ submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_coo_impl use psb_base_mod contains module subroutine psb_z_oacc_hll_cp_from_coo(a, b, info) - implicit none + implicit none class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_z_coo_sparse_mat), intent(in) :: b + class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - complex(psb_dpk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza - info = psb_success_ - hacksize = 32 ! Assuming a default hack size of 32 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(nz_per_row * a%get_nrows())) - allocate(a%ja(nz_per_row * a%get_nrows())) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) - end if - a%val = zzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value - a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(j + (i - 1) * nz_per_row) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - ! Calculate hkoffs for HLL format - !$acc parallel loop present(a) - do i = 1, size(a%hkoffs) - a%hkoffs(i) = (i - 1) * hacksize - end do - - deallocate(row_counts) - call a%set_dev() + call a%free_space() + call a%psb_z_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 index f267e1c6..7b18b255 100644 --- a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_z_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_z_oacc_hll_cp_from_fmt diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 index 2ff574d3..267f13f3 100644 --- a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_z_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 index 5fa00e38..151dc6ce 100644 --- a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 @@ -14,11 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_z_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select - end subroutine psb_z_oacc_hll_mv_from_fmt end submodule psb_z_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 index 89d970c0..dbadf034 100644 --- a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 @@ -40,28 +40,28 @@ contains complex(psb_dpk_) :: val(:), x(:), y(:) integer(psb_ipk_) :: ja(:), hkoffs(:) integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, idx, k + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii complex(psb_dpk_) :: tmp info = 0 - - !$acc parallel loop present(val, ja, hkoffs, x, y) + !$acc parallel loop private(nlc, isz,ir,nr) do i = 1, nhacks - do k = 0, hksz - 1 - idx = hkoffs(i) + k - if (idx <= hkoffs(i + 1) - 1) then - tmp = 0.0_psb_dpk_ - !$acc loop seq - do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz - if (ja(j) > 0) then - tmp = tmp + val(j) * x(ja(j)) - end if - end do - y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) - end if + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = zzero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) end do end do end subroutine inner_spmv - end subroutine psb_z_oacc_hll_vect_mv end submodule psb_z_oacc_hll_vect_mv_impl diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90 index 00e79570..07734762 100644 --- a/openacc/psb_c_oacc_csr_mat_mod.F90 +++ b/openacc/psb_c_oacc_csr_mat_mod.F90 @@ -1,6 +1,7 @@ module psb_c_oacc_csr_mat_mod use iso_c_binding + use openacc use psb_c_mat_mod use psb_c_oacc_vect_mod !use oaccsparse_mod @@ -35,6 +36,7 @@ module psb_c_oacc_csr_mat_mod procedure, pass(a) :: set_host => c_oacc_csr_set_host procedure, pass(a) :: set_sync => c_oacc_csr_set_sync procedure, pass(a) :: set_dev => c_oacc_csr_set_dev + procedure, pass(a) :: free_space => c_oacc_csr_free_space procedure, pass(a) :: sync_space => c_oacc_csr_sync_space procedure, pass(a) :: sync => c_oacc_csr_sync end type psb_c_oacc_csr_sparse_mat @@ -154,22 +156,26 @@ module psb_c_oacc_csr_mat_mod contains - subroutine c_oacc_csr_free(a) + subroutine c_oacc_csr_free_space(a) use psb_base_mod implicit none class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irp)) call acc_delete_finalize(a%irp) + + return + end subroutine c_oacc_csr_free_space + + subroutine c_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_c_csr_sparse_mat%free() return @@ -193,7 +199,7 @@ contains function c_oacc_csr_get_fmt() result(res) implicit none character(len=5) :: res - res = 'CSR_oacc' + res = 'CSROA' end function c_oacc_csr_get_fmt subroutine c_oacc_csr_all(m, n, nz, a, info) @@ -202,19 +208,8 @@ contains class(psb_c_oacc_csr_sparse_mat), intent(out) :: a integer(psb_ipk_), intent(out) :: info - info = 0 - if (allocated(a%val)) then - !$acc exit data delete(a%val) finalize - deallocate(a%val, stat=info) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) finalize - deallocate(a%ja, stat=info) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) finalize - deallocate(a%irp, stat=info) - end if + info = 0 + call a%free() call a%set_nrows(m) call a%set_ncols(n) @@ -274,26 +269,9 @@ contains subroutine c_oacc_csr_sync_space(a) implicit none class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call c_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irp)) then - call i_oacc_create_dev(a%irp) - end if - contains - subroutine c_oacc_create_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine c_oacc_create_dev - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irp)) call acc_create(a%irp) end subroutine c_oacc_csr_sync_space subroutine c_oacc_csr_sync(a) @@ -304,40 +282,16 @@ contains tmpa => a if (a%is_dev()) then - call c_oacc_csr_to_host(a%val) - call i_oacc_csr_to_host(a%ja) - call i_oacc_csr_to_host(a%irp) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irp) else if (a%is_host()) then - call c_oacc_csr_to_dev(a%val) - call i_oacc_csr_to_dev(a%ja) - call i_oacc_csr_to_dev(a%irp) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine c_oacc_csr_sync - subroutine c_oacc_csr_to_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine c_oacc_csr_to_dev - - subroutine c_oacc_csr_to_host(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine c_oacc_csr_to_host - - subroutine i_oacc_csr_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_csr_to_dev - - subroutine i_oacc_csr_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_csr_to_host - end module psb_c_oacc_csr_mat_mod diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90 index 5e5dc302..d23d4d0a 100644 --- a/openacc/psb_c_oacc_ell_mat_mod.F90 +++ b/openacc/psb_c_oacc_ell_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_c_oacc_ell_mat_mod use iso_c_binding + use openacc use psb_c_mat_mod use psb_c_ell_mat_mod use psb_c_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_c_oacc_ell_mat_mod procedure, pass(a) :: set_dev => c_oacc_ell_set_dev procedure, pass(a) :: sync_space => c_oacc_ell_sync_space procedure, pass(a) :: sync => c_oacc_ell_sync + procedure, pass(a) :: free_space => c_oacc_ell_free_space procedure, pass(a) :: free => c_oacc_ell_free procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv @@ -152,31 +154,32 @@ module psb_c_oacc_ell_mat_mod contains - subroutine c_oacc_ell_free(a) + subroutine c_oacc_ell_free_space(a) use psb_base_mod implicit none class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + + return + end subroutine c_oacc_ell_free_space + + subroutine c_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_c_ell_sparse_mat%free() return end subroutine c_oacc_ell_free - function c_oacc_ell_sizeof(a) result(res) implicit none class(psb_c_oacc_ell_sparse_mat), intent(in) :: a @@ -196,41 +199,12 @@ contains implicit none class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call c_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - - contains - subroutine c_oacc_create_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine c_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) end subroutine c_oacc_ell_sync_space - - function c_oacc_ell_is_host(a) result(res) implicit none class(psb_c_oacc_ell_sparse_mat), intent(in) :: a @@ -279,7 +253,7 @@ contains function c_oacc_ell_get_fmt() result(res) implicit none character(len=5) :: res - res = 'ELL_oacc' + res = 'ELLOA' end function c_oacc_ell_get_fmt subroutine c_oacc_ell_sync(a) @@ -290,64 +264,17 @@ contains tmpa => a if (a%is_dev()) then - call c_oacc_ell_to_host(a%val) - call i_oacc_ell_to_host(a%ja) - call i_oacc_ell_to_host_scalar(a%irn) - call i_oacc_ell_to_host_scalar(a%idiag) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) else if (a%is_host()) then - call c_oacc_ell_to_dev(a%val) - call i_oacc_ell_to_dev(a%ja) - call i_oacc_ell_to_dev_scalar(a%irn) - call i_oacc_ell_to_dev_scalar(a%idiag) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine c_oacc_ell_sync - subroutine c_oacc_ell_to_dev_scalar(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine c_oacc_ell_to_dev_scalar - - subroutine c_oacc_ell_to_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine c_oacc_ell_to_dev - - subroutine c_oacc_ell_to_host(v) - implicit none - complex(psb_spk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine c_oacc_ell_to_host - - subroutine c_oacc_ell_to_host_scalar(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine c_oacc_ell_to_host_scalar - - subroutine i_oacc_ell_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev - - subroutine i_oacc_ell_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev_scalar - - subroutine i_oacc_ell_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host - - subroutine i_oacc_ell_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host_scalar end module psb_c_oacc_ell_mat_mod diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90 index faad0a1b..2d2095bf 100644 --- a/openacc/psb_c_oacc_hll_mat_mod.F90 +++ b/openacc/psb_c_oacc_hll_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_c_oacc_hll_mat_mod use iso_c_binding + use openacc use psb_c_mat_mod use psb_c_hll_mat_mod use psb_c_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_c_oacc_hll_mat_mod procedure, pass(a) :: set_dev => c_oacc_hll_set_dev procedure, pass(a) :: sync_space => c_oacc_hll_sync_space procedure, pass(a) :: sync => c_oacc_hll_sync + procedure, pass(a) :: free_space => c_oacc_hll_free_space procedure, pass(a) :: free => c_oacc_hll_free procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv @@ -152,28 +154,28 @@ module psb_c_oacc_hll_mat_mod contains - subroutine c_oacc_hll_free(a) + subroutine c_oacc_hll_free_space(a) use psb_base_mod implicit none class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - if (allocated(a%hkoffs)) then - !$acc exit data delete(a%hkoffs) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) + + return + end subroutine c_oacc_hll_free_space + + subroutine c_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_c_hll_sparse_mat%free() return @@ -244,48 +246,18 @@ contains function c_oacc_hll_get_fmt() result(res) implicit none character(len=5) :: res - res = 'HLL_oacc' + res = 'HLLOA' end function c_oacc_hll_get_fmt subroutine c_oacc_hll_sync_space(a) implicit none class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call c_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - if (allocated(a%hkoffs)) then - call i_oacc_create_dev_scalar(a%hkoffs) - end if - - contains - subroutine c_oacc_create_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine c_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) + if (allocated(a%hkoffs)) call acc_create(a%hkoffs) end subroutine c_oacc_hll_sync_space @@ -297,56 +269,19 @@ contains tmpa => a if (a%is_dev()) then - call c_oacc_hll_to_host(a%val) - call i_oacc_hll_to_host(a%ja) - call i_oacc_hll_to_host_scalar(a%irn) - call i_oacc_hll_to_host_scalar(a%idiag) - call i_oacc_hll_to_host_scalar(a%hkoffs) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) + call acc_update_self(a%hkoffs) else if (a%is_host()) then - call c_oacc_hll_to_dev(a%val) - call i_oacc_hll_to_dev(a%ja) - call i_oacc_hll_to_dev_scalar(a%irn) - call i_oacc_hll_to_dev_scalar(a%idiag) - call i_oacc_hll_to_dev_scalar(a%hkoffs) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) + call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine c_oacc_hll_sync - subroutine c_oacc_hll_to_host(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine c_oacc_hll_to_host - - subroutine c_oacc_hll_to_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine c_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host - - subroutine i_oacc_hll_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host_scalar - - subroutine i_oacc_hll_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev_scalar - - end module psb_c_oacc_hll_mat_mod diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index 7362ba0e..79ff0ca3 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -59,7 +59,7 @@ module psb_c_oacc_vect_mod procedure, pass(x) :: asum => c_oacc_asum procedure, pass(x) :: absval1 => c_oacc_absval1 procedure, pass(x) :: absval2 => c_oacc_absval2 - + final :: c_oacc_final_vect_free end type psb_c_vect_oacc interface @@ -164,21 +164,25 @@ contains implicit none class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_spk_) :: res - real(psb_spk_) :: mx + real(psb_spk_) :: res integer(psb_ipk_) :: info if (x%is_host()) call x%sync() - mx = c_oacc_amax(n,x) - res = c_inner_oacc_nrm2(n, mx, x%v) +!!$ write(0,*)'oacc_nrm2' + res = c_inner_oacc_nrm2(n, x%v) contains - function c_inner_oacc_nrm2(n, mx,x) result(res) + function c_inner_oacc_nrm2(n, x) result(res) integer(psb_ipk_) :: n complex(psb_spk_) :: x(:) - real(psb_spk_) :: mx, res - real(psb_spk_) :: sum + real(psb_spk_) :: res + real(psb_spk_) :: sum, mx integer(psb_ipk_) :: i - sum = 0.0 + mx = szero + !$acc parallel loop reduction(max:mx) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + sum = szero !$acc parallel loop reduction(+:sum) do i = 1, n sum = sum + abs(x(i)/mx)**2 @@ -203,7 +207,7 @@ contains real(psb_spk_) :: res real(psb_spk_) :: max_val integer(psb_ipk_) :: i - max_val = -huge(0.0) + max_val = szero !$acc parallel loop reduction(max:max_val) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) @@ -228,7 +232,7 @@ contains complex(psb_spk_) :: x(:) real(psb_spk_) :: res integer(psb_ipk_) :: i - res = 0.0 + res = szero !$acc parallel loop reduction(+:res) do i = 1, n res = res + abs(x(i)) @@ -271,92 +275,6 @@ contains call z%set_host() end subroutine c_oacc_mlt_a_2 - -!!$ subroutine c_oacc_mlt_v(x, y, info) -!!$ implicit none -!!$ class(psb_c_base_vect_type), intent(inout) :: x -!!$ class(psb_c_vect_oacc), intent(inout) :: y -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ n = min(x%get_nrows(), y%get_nrows()) -!!$ select type(xx => x) -!!$ type is (psb_c_base_vect_type) -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end select -!!$ end subroutine c_oacc_mlt_v -!!$ -!!$ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) -!!$ use psi_serial_mod -!!$ use psb_string_mod -!!$ implicit none -!!$ complex(psb_spk_), intent(in) :: alpha, beta -!!$ class(psb_c_base_vect_type), intent(inout) :: x -!!$ class(psb_c_base_vect_type), intent(inout) :: y -!!$ class(psb_c_vect_oacc), intent(inout) :: z -!!$ integer(psb_ipk_), intent(out) :: info -!!$ character(len=1), intent(in), optional :: conjgx, conjgy -!!$ integer(psb_ipk_) :: i, n -!!$ logical :: conjgx_, conjgy_ -!!$ -!!$ conjgx_ = .false. -!!$ conjgy_ = .false. -!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') -!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') -!!$ -!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) -!!$ -!!$ info = 0 -!!$ select type(xx => x) -!!$ class is (psb_c_vect_oacc) -!!$ select type (yy => y) -!!$ class is (psb_c_vect_oacc) -!!$ if (xx%is_host()) call xx%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ if ((beta /= czero) .and. (z%is_host())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_dev() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ end subroutine c_oacc_mlt_v_2 - - subroutine c_oacc_axpby_v(m, alpha, x, beta, y, info) !use psi_serial_mod implicit none @@ -414,7 +332,7 @@ contains integer(psb_ipk_) :: i if ((beta /= czero) .and. (y%is_dev())) call y%sync() - !$acc parallel loop + do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) end do @@ -432,44 +350,44 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - write(0,*)'upd_xyz' + info = psb_success_ gpu_done = .false. select type(xx => x) class is (psb_c_vect_oacc) - select type(yy => y) + select type(yy => y) + class is (psb_c_vect_oacc) + select type(zz => z) class is (psb_c_vect_oacc) - select type(zz => z) - class is (psb_c_vect_oacc) - if ((beta /= czero) .and. yy%is_host()) call yy%sync() - if ((delta /= czero) .and. zz%is_host()) call zz%sync() - if (xx%is_host()) call xx%sync() - nx = size(xx%v) - ny = size(yy%v) - nz = size(zz%v) - if ((nx < m) .or. (ny < m) .or. (nz < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) - zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) - end do - end if - call yy%set_dev() - call zz%set_dev() - gpu_done = .true. - end select + if ((beta /= czero) .and. yy%is_host()) call yy%sync() + if ((delta /= czero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. end select + end select end select if (.not. gpu_done) then - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - if (z%is_host()) call z%sync() - call y%axpby(m, alpha, x, beta, info) - call z%axpby(m, gamma, y, delta, info) + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) end if end subroutine c_oacc_upd_xyz @@ -676,7 +594,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_c_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine c_oacc_ins_a @@ -687,16 +605,14 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 'c_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine c_oacc_bld_mn @@ -707,6 +623,7 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -714,13 +631,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine c_oacc_bld_x @@ -848,54 +761,21 @@ contains end function c_oacc_dot_a - ! subroutine c_oacc_set_vect(x,y) - ! implicit none - ! class(psb_c_vect_oacc), intent(inout) :: x - ! complex(psb_spk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine c_oacc_set_vect - - subroutine c_oacc_to_dev(v) - implicit none - complex(psb_spk_) :: v(:) - !$acc update device(v) - end subroutine c_oacc_to_dev - - subroutine c_oacc_to_host(v) - implicit none - complex(psb_spk_) :: v(:) - !$acc update self(v) - end subroutine c_oacc_to_host subroutine c_oacc_sync_space(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call c_oacc_create_dev(x%v) - end if - contains - subroutine c_oacc_create_dev(v) - implicit none - complex(psb_spk_) :: v(:) - !$acc enter data copyin(v) - end subroutine c_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine c_oacc_sync_space subroutine c_oacc_sync(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call c_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call c_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine c_oacc_sync @@ -954,33 +834,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 'c_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine c_oacc_vect_all + subroutine c_oacc_final_vect_free(x) + implicit none + type(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine c_oacc_final_vect_free + subroutine c_oacc_vect_free(x, info) implicit none class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine c_oacc_vect_free function c_oacc_get_size(x) result(res) diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index 8b7e111e..74031e89 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -1,6 +1,7 @@ module psb_d_oacc_csr_mat_mod use iso_c_binding + use openacc use psb_d_mat_mod use psb_d_oacc_vect_mod !use oaccsparse_mod @@ -35,6 +36,7 @@ module psb_d_oacc_csr_mat_mod procedure, pass(a) :: set_host => d_oacc_csr_set_host procedure, pass(a) :: set_sync => d_oacc_csr_set_sync procedure, pass(a) :: set_dev => d_oacc_csr_set_dev + procedure, pass(a) :: free_space => d_oacc_csr_free_space procedure, pass(a) :: sync_space => d_oacc_csr_sync_space procedure, pass(a) :: sync => d_oacc_csr_sync end type psb_d_oacc_csr_sparse_mat @@ -154,22 +156,26 @@ module psb_d_oacc_csr_mat_mod contains - subroutine d_oacc_csr_free(a) + subroutine d_oacc_csr_free_space(a) use psb_base_mod implicit none class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irp)) call acc_delete_finalize(a%irp) + + return + end subroutine d_oacc_csr_free_space + + subroutine d_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_d_csr_sparse_mat%free() return @@ -193,7 +199,7 @@ contains function d_oacc_csr_get_fmt() result(res) implicit none character(len=5) :: res - res = 'CSR_oacc' + res = 'CSROA' end function d_oacc_csr_get_fmt subroutine d_oacc_csr_all(m, n, nz, a, info) @@ -202,19 +208,8 @@ contains class(psb_d_oacc_csr_sparse_mat), intent(out) :: a integer(psb_ipk_), intent(out) :: info - info = 0 - if (allocated(a%val)) then - !$acc exit data delete(a%val) finalize - deallocate(a%val, stat=info) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) finalize - deallocate(a%ja, stat=info) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) finalize - deallocate(a%irp, stat=info) - end if + info = 0 + call a%free() call a%set_nrows(m) call a%set_ncols(n) @@ -274,26 +269,9 @@ contains subroutine d_oacc_csr_sync_space(a) implicit none class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irp)) then - call i_oacc_create_dev(a%irp) - end if - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irp)) call acc_create(a%irp) end subroutine d_oacc_csr_sync_space subroutine d_oacc_csr_sync(a) @@ -304,40 +282,16 @@ contains tmpa => a if (a%is_dev()) then - call d_oacc_csr_to_host(a%val) - call i_oacc_csr_to_host(a%ja) - call i_oacc_csr_to_host(a%irp) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irp) else if (a%is_host()) then - call d_oacc_csr_to_dev(a%val) - call i_oacc_csr_to_dev(a%ja) - call i_oacc_csr_to_dev(a%irp) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine d_oacc_csr_sync - subroutine d_oacc_csr_to_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine d_oacc_csr_to_dev - - subroutine d_oacc_csr_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_csr_to_host - - subroutine i_oacc_csr_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_csr_to_dev - - subroutine i_oacc_csr_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_csr_to_host - end module psb_d_oacc_csr_mat_mod diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 962ad2db..45ffc92d 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_d_oacc_ell_mat_mod use iso_c_binding + use openacc use psb_d_mat_mod use psb_d_ell_mat_mod use psb_d_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_d_oacc_ell_mat_mod procedure, pass(a) :: set_dev => d_oacc_ell_set_dev procedure, pass(a) :: sync_space => d_oacc_ell_sync_space procedure, pass(a) :: sync => d_oacc_ell_sync + procedure, pass(a) :: free_space => d_oacc_ell_free_space procedure, pass(a) :: free => d_oacc_ell_free procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv @@ -152,31 +154,32 @@ module psb_d_oacc_ell_mat_mod contains - subroutine d_oacc_ell_free(a) + subroutine d_oacc_ell_free_space(a) use psb_base_mod implicit none class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + + return + end subroutine d_oacc_ell_free_space + + subroutine d_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_d_ell_sparse_mat%free() return end subroutine d_oacc_ell_free - function d_oacc_ell_sizeof(a) result(res) implicit none class(psb_d_oacc_ell_sparse_mat), intent(in) :: a @@ -196,41 +199,12 @@ contains implicit none class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) end subroutine d_oacc_ell_sync_space - - function d_oacc_ell_is_host(a) result(res) implicit none class(psb_d_oacc_ell_sparse_mat), intent(in) :: a @@ -279,7 +253,7 @@ contains function d_oacc_ell_get_fmt() result(res) implicit none character(len=5) :: res - res = 'ELL_oacc' + res = 'ELLOA' end function d_oacc_ell_get_fmt subroutine d_oacc_ell_sync(a) @@ -290,64 +264,17 @@ contains tmpa => a if (a%is_dev()) then - call d_oacc_ell_to_host(a%val) - call i_oacc_ell_to_host(a%ja) - call i_oacc_ell_to_host_scalar(a%irn) - call i_oacc_ell_to_host_scalar(a%idiag) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) else if (a%is_host()) then - call d_oacc_ell_to_dev(a%val) - call i_oacc_ell_to_dev(a%ja) - call i_oacc_ell_to_dev_scalar(a%irn) - call i_oacc_ell_to_dev_scalar(a%idiag) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine d_oacc_ell_sync - subroutine d_oacc_ell_to_dev_scalar(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine d_oacc_ell_to_dev_scalar - - subroutine d_oacc_ell_to_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine d_oacc_ell_to_dev - - subroutine d_oacc_ell_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine d_oacc_ell_to_host - - subroutine d_oacc_ell_to_host_scalar(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_ell_to_host_scalar - - subroutine i_oacc_ell_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev - - subroutine i_oacc_ell_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev_scalar - - subroutine i_oacc_ell_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host - - subroutine i_oacc_ell_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host_scalar end module psb_d_oacc_ell_mat_mod diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 index b1c36a65..8009f085 100644 --- a/openacc/psb_d_oacc_hll_mat_mod.F90 +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_d_oacc_hll_mat_mod use iso_c_binding + use openacc use psb_d_mat_mod use psb_d_hll_mat_mod use psb_d_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_d_oacc_hll_mat_mod procedure, pass(a) :: set_dev => d_oacc_hll_set_dev procedure, pass(a) :: sync_space => d_oacc_hll_sync_space procedure, pass(a) :: sync => d_oacc_hll_sync + procedure, pass(a) :: free_space => d_oacc_hll_free_space procedure, pass(a) :: free => d_oacc_hll_free procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv @@ -152,28 +154,28 @@ module psb_d_oacc_hll_mat_mod contains - subroutine d_oacc_hll_free(a) + subroutine d_oacc_hll_free_space(a) use psb_base_mod implicit none class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - if (allocated(a%hkoffs)) then - !$acc exit data delete(a%hkoffs) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) + + return + end subroutine d_oacc_hll_free_space + + subroutine d_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_d_hll_sparse_mat%free() return @@ -244,48 +246,18 @@ contains function d_oacc_hll_get_fmt() result(res) implicit none character(len=5) :: res - res = 'HLL_oacc' + res = 'HLLOA' end function d_oacc_hll_get_fmt subroutine d_oacc_hll_sync_space(a) implicit none class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - if (allocated(a%hkoffs)) then - call i_oacc_create_dev_scalar(a%hkoffs) - end if - - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) + if (allocated(a%hkoffs)) call acc_create(a%hkoffs) end subroutine d_oacc_hll_sync_space @@ -297,56 +269,19 @@ contains tmpa => a if (a%is_dev()) then - call d_oacc_hll_to_host(a%val) - call i_oacc_hll_to_host(a%ja) - call i_oacc_hll_to_host_scalar(a%irn) - call i_oacc_hll_to_host_scalar(a%idiag) - call i_oacc_hll_to_host_scalar(a%hkoffs) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) + call acc_update_self(a%hkoffs) else if (a%is_host()) then - call d_oacc_hll_to_dev(a%val) - call i_oacc_hll_to_dev(a%ja) - call i_oacc_hll_to_dev_scalar(a%irn) - call i_oacc_hll_to_dev_scalar(a%idiag) - call i_oacc_hll_to_dev_scalar(a%hkoffs) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) + call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine d_oacc_hll_sync - subroutine d_oacc_hll_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_hll_to_host - - subroutine d_oacc_hll_to_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine d_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host - - subroutine i_oacc_hll_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host_scalar - - subroutine i_oacc_hll_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev_scalar - - end module psb_d_oacc_hll_mat_mod diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 9ecbccb4..c7804bc1 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -59,7 +59,7 @@ module psb_d_oacc_vect_mod procedure, pass(x) :: asum => d_oacc_asum procedure, pass(x) :: absval1 => d_oacc_absval1 procedure, pass(x) :: absval2 => d_oacc_absval2 - + final :: d_oacc_final_vect_free end type psb_d_vect_oacc interface @@ -164,21 +164,25 @@ contains implicit none class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - real(psb_dpk_) :: mx + real(psb_dpk_) :: res integer(psb_ipk_) :: info if (x%is_host()) call x%sync() - mx = d_oacc_amax(n,x) - res = d_inner_oacc_nrm2(n, mx, x%v) +!!$ write(0,*)'oacc_nrm2' + res = d_inner_oacc_nrm2(n, x%v) contains - function d_inner_oacc_nrm2(n, mx,x) result(res) + function d_inner_oacc_nrm2(n, x) result(res) integer(psb_ipk_) :: n real(psb_dpk_) :: x(:) - real(psb_dpk_) :: mx, res - real(psb_dpk_) :: sum + real(psb_dpk_) :: res + real(psb_dpk_) :: sum, mx integer(psb_ipk_) :: i - sum = 0.0 + mx = dzero + !$acc parallel loop reduction(max:mx) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + sum = dzero !$acc parallel loop reduction(+:sum) do i = 1, n sum = sum + abs(x(i)/mx)**2 @@ -203,7 +207,7 @@ contains real(psb_dpk_) :: res real(psb_dpk_) :: max_val integer(psb_ipk_) :: i - max_val = -huge(0.0) + max_val = dzero !$acc parallel loop reduction(max:max_val) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) @@ -228,7 +232,7 @@ contains real(psb_dpk_) :: x(:) real(psb_dpk_) :: res integer(psb_ipk_) :: i - res = 0.0 + res = dzero !$acc parallel loop reduction(+:res) do i = 1, n res = res + abs(x(i)) @@ -271,92 +275,6 @@ contains call z%set_host() end subroutine d_oacc_mlt_a_2 - -!!$ subroutine d_oacc_mlt_v(x, y, info) -!!$ implicit none -!!$ class(psb_d_base_vect_type), intent(inout) :: x -!!$ class(psb_d_vect_oacc), intent(inout) :: y -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ n = min(x%get_nrows(), y%get_nrows()) -!!$ select type(xx => x) -!!$ type is (psb_d_base_vect_type) -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end select -!!$ end subroutine d_oacc_mlt_v -!!$ -!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) -!!$ use psi_serial_mod -!!$ use psb_string_mod -!!$ implicit none -!!$ real(psb_dpk_), intent(in) :: alpha, beta -!!$ class(psb_d_base_vect_type), intent(inout) :: x -!!$ class(psb_d_base_vect_type), intent(inout) :: y -!!$ class(psb_d_vect_oacc), intent(inout) :: z -!!$ integer(psb_ipk_), intent(out) :: info -!!$ character(len=1), intent(in), optional :: conjgx, conjgy -!!$ integer(psb_ipk_) :: i, n -!!$ logical :: conjgx_, conjgy_ -!!$ -!!$ conjgx_ = .false. -!!$ conjgy_ = .false. -!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') -!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') -!!$ -!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) -!!$ -!!$ info = 0 -!!$ select type(xx => x) -!!$ class is (psb_d_vect_oacc) -!!$ select type (yy => y) -!!$ class is (psb_d_vect_oacc) -!!$ if (xx%is_host()) call xx%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_dev() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ end subroutine d_oacc_mlt_v_2 - - subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) !use psi_serial_mod implicit none @@ -414,7 +332,7 @@ contains integer(psb_ipk_) :: i if ((beta /= dzero) .and. (y%is_dev())) call y%sync() - !$acc parallel loop + do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) end do @@ -432,44 +350,44 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - write(0,*)'upd_xyz' + info = psb_success_ gpu_done = .false. select type(xx => x) class is (psb_d_vect_oacc) - select type(yy => y) + select type(yy => y) + class is (psb_d_vect_oacc) + select type(zz => z) class is (psb_d_vect_oacc) - select type(zz => z) - class is (psb_d_vect_oacc) - if ((beta /= dzero) .and. yy%is_host()) call yy%sync() - if ((delta /= dzero) .and. zz%is_host()) call zz%sync() - if (xx%is_host()) call xx%sync() - nx = size(xx%v) - ny = size(yy%v) - nz = size(zz%v) - if ((nx < m) .or. (ny < m) .or. (nz < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) - zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) - end do - end if - call yy%set_dev() - call zz%set_dev() - gpu_done = .true. - end select + if ((beta /= dzero) .and. yy%is_host()) call yy%sync() + if ((delta /= dzero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. end select + end select end select if (.not. gpu_done) then - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - if (z%is_host()) call z%sync() - call y%axpby(m, alpha, x, beta, info) - call z%axpby(m, gamma, y, delta, info) + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) end if end subroutine d_oacc_upd_xyz @@ -676,7 +594,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine d_oacc_ins_a @@ -687,16 +605,14 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine d_oacc_bld_mn @@ -707,6 +623,7 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -714,13 +631,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine d_oacc_bld_x @@ -848,54 +761,21 @@ contains end function d_oacc_dot_a - ! subroutine d_oacc_set_vect(x,y) - ! implicit none - ! class(psb_d_vect_oacc), intent(inout) :: x - ! real(psb_dpk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine d_oacc_set_vect - - subroutine d_oacc_to_dev(v) - implicit none - real(psb_dpk_) :: v(:) - !$acc update device(v) - end subroutine d_oacc_to_dev - - subroutine d_oacc_to_host(v) - implicit none - real(psb_dpk_) :: v(:) - !$acc update self(v) - end subroutine d_oacc_to_host subroutine d_oacc_sync_space(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call d_oacc_create_dev(x%v) - end if - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_) :: v(:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine d_oacc_sync_space subroutine d_oacc_sync(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call d_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call d_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine d_oacc_sync @@ -954,33 +834,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 'd_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine d_oacc_vect_all + subroutine d_oacc_final_vect_free(x) + implicit none + type(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine d_oacc_final_vect_free + subroutine d_oacc_vect_free(x, info) implicit none class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine d_oacc_vect_free function d_oacc_get_size(x) result(res) diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 index 3dbc48f1..8c813134 100644 --- a/openacc/psb_i_oacc_vect_mod.F90 +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -42,7 +42,7 @@ module psb_i_oacc_vect_mod procedure, pass(x) :: get_size => i_oacc_get_size - + final :: i_oacc_final_vect_free end type psb_i_vect_oacc @@ -252,7 +252,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine i_oacc_ins_a @@ -263,16 +263,14 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine i_oacc_bld_mn @@ -283,6 +281,7 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -290,13 +289,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine i_oacc_bld_x @@ -370,54 +365,21 @@ contains end function i_oacc_get_fmt - ! subroutine i_oacc_set_vect(x,y) - ! implicit none - ! class(psb_i_vect_oacc), intent(inout) :: x - ! integer(psb_ipk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine i_oacc_set_vect - - subroutine i_oacc_to_dev(v) - implicit none - integer(psb_ipk_) :: v(:) - !$acc update device(v) - end subroutine i_oacc_to_dev - - subroutine i_oacc_to_host(v) - implicit none - integer(psb_ipk_) :: v(:) - !$acc update self(v) - end subroutine i_oacc_to_host subroutine i_oacc_sync_space(x) implicit none class(psb_i_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call i_oacc_create_dev(x%v) - end if - contains - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine i_oacc_sync_space subroutine i_oacc_sync(x) implicit none class(psb_i_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call i_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call i_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine i_oacc_sync @@ -476,33 +438,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 'i_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine i_oacc_vect_all + subroutine i_oacc_final_vect_free(x) + implicit none + type(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine i_oacc_final_vect_free + subroutine i_oacc_vect_free(x, info) implicit none class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine i_oacc_vect_free function i_oacc_get_size(x) result(res) diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 index cdf28366..9ff100bc 100644 --- a/openacc/psb_l_oacc_vect_mod.F90 +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -44,7 +44,7 @@ module psb_l_oacc_vect_mod procedure, pass(x) :: get_size => l_oacc_get_size - + final :: l_oacc_final_vect_free end type psb_l_vect_oacc @@ -254,7 +254,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_l_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine l_oacc_ins_a @@ -265,16 +265,14 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine l_oacc_bld_mn @@ -285,6 +283,7 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -292,13 +291,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine l_oacc_bld_x @@ -372,54 +367,21 @@ contains end function l_oacc_get_fmt - ! subroutine l_oacc_set_vect(x,y) - ! implicit none - ! class(psb_l_vect_oacc), intent(inout) :: x - ! integer(psb_lpk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine l_oacc_set_vect - - subroutine l_oacc_to_dev(v) - implicit none - integer(psb_lpk_) :: v(:) - !$acc update device(v) - end subroutine l_oacc_to_dev - - subroutine l_oacc_to_host(v) - implicit none - integer(psb_lpk_) :: v(:) - !$acc update self(v) - end subroutine l_oacc_to_host subroutine l_oacc_sync_space(x) implicit none class(psb_l_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call l_oacc_create_dev(x%v) - end if - contains - subroutine l_oacc_create_dev(v) - implicit none - integer(psb_lpk_) :: v(:) - !$acc enter data copyin(v) - end subroutine l_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine l_oacc_sync_space subroutine l_oacc_sync(x) implicit none class(psb_l_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call l_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call l_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine l_oacc_sync @@ -478,33 +440,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 'l_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine l_oacc_vect_all + subroutine l_oacc_final_vect_free(x) + implicit none + type(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine l_oacc_final_vect_free + subroutine l_oacc_vect_free(x, info) implicit none class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine l_oacc_vect_free function l_oacc_get_size(x) result(res) diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90 index 89b10d08..b577daac 100644 --- a/openacc/psb_s_oacc_csr_mat_mod.F90 +++ b/openacc/psb_s_oacc_csr_mat_mod.F90 @@ -1,6 +1,7 @@ module psb_s_oacc_csr_mat_mod use iso_c_binding + use openacc use psb_s_mat_mod use psb_s_oacc_vect_mod !use oaccsparse_mod @@ -35,6 +36,7 @@ module psb_s_oacc_csr_mat_mod procedure, pass(a) :: set_host => s_oacc_csr_set_host procedure, pass(a) :: set_sync => s_oacc_csr_set_sync procedure, pass(a) :: set_dev => s_oacc_csr_set_dev + procedure, pass(a) :: free_space => s_oacc_csr_free_space procedure, pass(a) :: sync_space => s_oacc_csr_sync_space procedure, pass(a) :: sync => s_oacc_csr_sync end type psb_s_oacc_csr_sparse_mat @@ -154,22 +156,26 @@ module psb_s_oacc_csr_mat_mod contains - subroutine s_oacc_csr_free(a) + subroutine s_oacc_csr_free_space(a) use psb_base_mod implicit none class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irp)) call acc_delete_finalize(a%irp) + + return + end subroutine s_oacc_csr_free_space + + subroutine s_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_s_csr_sparse_mat%free() return @@ -193,7 +199,7 @@ contains function s_oacc_csr_get_fmt() result(res) implicit none character(len=5) :: res - res = 'CSR_oacc' + res = 'CSROA' end function s_oacc_csr_get_fmt subroutine s_oacc_csr_all(m, n, nz, a, info) @@ -202,19 +208,8 @@ contains class(psb_s_oacc_csr_sparse_mat), intent(out) :: a integer(psb_ipk_), intent(out) :: info - info = 0 - if (allocated(a%val)) then - !$acc exit data delete(a%val) finalize - deallocate(a%val, stat=info) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) finalize - deallocate(a%ja, stat=info) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) finalize - deallocate(a%irp, stat=info) - end if + info = 0 + call a%free() call a%set_nrows(m) call a%set_ncols(n) @@ -274,26 +269,9 @@ contains subroutine s_oacc_csr_sync_space(a) implicit none class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call s_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irp)) then - call i_oacc_create_dev(a%irp) - end if - contains - subroutine s_oacc_create_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine s_oacc_create_dev - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irp)) call acc_create(a%irp) end subroutine s_oacc_csr_sync_space subroutine s_oacc_csr_sync(a) @@ -304,40 +282,16 @@ contains tmpa => a if (a%is_dev()) then - call s_oacc_csr_to_host(a%val) - call i_oacc_csr_to_host(a%ja) - call i_oacc_csr_to_host(a%irp) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irp) else if (a%is_host()) then - call s_oacc_csr_to_dev(a%val) - call i_oacc_csr_to_dev(a%ja) - call i_oacc_csr_to_dev(a%irp) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine s_oacc_csr_sync - subroutine s_oacc_csr_to_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine s_oacc_csr_to_dev - - subroutine s_oacc_csr_to_host(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine s_oacc_csr_to_host - - subroutine i_oacc_csr_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_csr_to_dev - - subroutine i_oacc_csr_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_csr_to_host - end module psb_s_oacc_csr_mat_mod diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90 index 9924ba77..793bf353 100644 --- a/openacc/psb_s_oacc_ell_mat_mod.F90 +++ b/openacc/psb_s_oacc_ell_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_s_oacc_ell_mat_mod use iso_c_binding + use openacc use psb_s_mat_mod use psb_s_ell_mat_mod use psb_s_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_s_oacc_ell_mat_mod procedure, pass(a) :: set_dev => s_oacc_ell_set_dev procedure, pass(a) :: sync_space => s_oacc_ell_sync_space procedure, pass(a) :: sync => s_oacc_ell_sync + procedure, pass(a) :: free_space => s_oacc_ell_free_space procedure, pass(a) :: free => s_oacc_ell_free procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv @@ -152,31 +154,32 @@ module psb_s_oacc_ell_mat_mod contains - subroutine s_oacc_ell_free(a) + subroutine s_oacc_ell_free_space(a) use psb_base_mod implicit none class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + + return + end subroutine s_oacc_ell_free_space + + subroutine s_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_s_ell_sparse_mat%free() return end subroutine s_oacc_ell_free - function s_oacc_ell_sizeof(a) result(res) implicit none class(psb_s_oacc_ell_sparse_mat), intent(in) :: a @@ -196,41 +199,12 @@ contains implicit none class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call s_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - - contains - subroutine s_oacc_create_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine s_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) end subroutine s_oacc_ell_sync_space - - function s_oacc_ell_is_host(a) result(res) implicit none class(psb_s_oacc_ell_sparse_mat), intent(in) :: a @@ -279,7 +253,7 @@ contains function s_oacc_ell_get_fmt() result(res) implicit none character(len=5) :: res - res = 'ELL_oacc' + res = 'ELLOA' end function s_oacc_ell_get_fmt subroutine s_oacc_ell_sync(a) @@ -290,64 +264,17 @@ contains tmpa => a if (a%is_dev()) then - call s_oacc_ell_to_host(a%val) - call i_oacc_ell_to_host(a%ja) - call i_oacc_ell_to_host_scalar(a%irn) - call i_oacc_ell_to_host_scalar(a%idiag) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) else if (a%is_host()) then - call s_oacc_ell_to_dev(a%val) - call i_oacc_ell_to_dev(a%ja) - call i_oacc_ell_to_dev_scalar(a%irn) - call i_oacc_ell_to_dev_scalar(a%idiag) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine s_oacc_ell_sync - subroutine s_oacc_ell_to_dev_scalar(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine s_oacc_ell_to_dev_scalar - - subroutine s_oacc_ell_to_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine s_oacc_ell_to_dev - - subroutine s_oacc_ell_to_host(v) - implicit none - real(psb_spk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine s_oacc_ell_to_host - - subroutine s_oacc_ell_to_host_scalar(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine s_oacc_ell_to_host_scalar - - subroutine i_oacc_ell_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev - - subroutine i_oacc_ell_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev_scalar - - subroutine i_oacc_ell_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host - - subroutine i_oacc_ell_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host_scalar end module psb_s_oacc_ell_mat_mod diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90 index bf8949a1..508591e4 100644 --- a/openacc/psb_s_oacc_hll_mat_mod.F90 +++ b/openacc/psb_s_oacc_hll_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_s_oacc_hll_mat_mod use iso_c_binding + use openacc use psb_s_mat_mod use psb_s_hll_mat_mod use psb_s_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_s_oacc_hll_mat_mod procedure, pass(a) :: set_dev => s_oacc_hll_set_dev procedure, pass(a) :: sync_space => s_oacc_hll_sync_space procedure, pass(a) :: sync => s_oacc_hll_sync + procedure, pass(a) :: free_space => s_oacc_hll_free_space procedure, pass(a) :: free => s_oacc_hll_free procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv @@ -152,28 +154,28 @@ module psb_s_oacc_hll_mat_mod contains - subroutine s_oacc_hll_free(a) + subroutine s_oacc_hll_free_space(a) use psb_base_mod implicit none class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - if (allocated(a%hkoffs)) then - !$acc exit data delete(a%hkoffs) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) + + return + end subroutine s_oacc_hll_free_space + + subroutine s_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_s_hll_sparse_mat%free() return @@ -244,48 +246,18 @@ contains function s_oacc_hll_get_fmt() result(res) implicit none character(len=5) :: res - res = 'HLL_oacc' + res = 'HLLOA' end function s_oacc_hll_get_fmt subroutine s_oacc_hll_sync_space(a) implicit none class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call s_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - if (allocated(a%hkoffs)) then - call i_oacc_create_dev_scalar(a%hkoffs) - end if - - contains - subroutine s_oacc_create_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine s_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) + if (allocated(a%hkoffs)) call acc_create(a%hkoffs) end subroutine s_oacc_hll_sync_space @@ -297,56 +269,19 @@ contains tmpa => a if (a%is_dev()) then - call s_oacc_hll_to_host(a%val) - call i_oacc_hll_to_host(a%ja) - call i_oacc_hll_to_host_scalar(a%irn) - call i_oacc_hll_to_host_scalar(a%idiag) - call i_oacc_hll_to_host_scalar(a%hkoffs) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) + call acc_update_self(a%hkoffs) else if (a%is_host()) then - call s_oacc_hll_to_dev(a%val) - call i_oacc_hll_to_dev(a%ja) - call i_oacc_hll_to_dev_scalar(a%irn) - call i_oacc_hll_to_dev_scalar(a%idiag) - call i_oacc_hll_to_dev_scalar(a%hkoffs) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) + call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine s_oacc_hll_sync - subroutine s_oacc_hll_to_host(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine s_oacc_hll_to_host - - subroutine s_oacc_hll_to_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine s_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host - - subroutine i_oacc_hll_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host_scalar - - subroutine i_oacc_hll_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev_scalar - - end module psb_s_oacc_hll_mat_mod diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index c3b31af7..7ce4292f 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -59,7 +59,7 @@ module psb_s_oacc_vect_mod procedure, pass(x) :: asum => s_oacc_asum procedure, pass(x) :: absval1 => s_oacc_absval1 procedure, pass(x) :: absval2 => s_oacc_absval2 - + final :: s_oacc_final_vect_free end type psb_s_vect_oacc interface @@ -164,21 +164,25 @@ contains implicit none class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_spk_) :: res - real(psb_spk_) :: mx + real(psb_spk_) :: res integer(psb_ipk_) :: info if (x%is_host()) call x%sync() - mx = s_oacc_amax(n,x) - res = s_inner_oacc_nrm2(n, mx, x%v) +!!$ write(0,*)'oacc_nrm2' + res = s_inner_oacc_nrm2(n, x%v) contains - function s_inner_oacc_nrm2(n, mx,x) result(res) + function s_inner_oacc_nrm2(n, x) result(res) integer(psb_ipk_) :: n real(psb_spk_) :: x(:) - real(psb_spk_) :: mx, res - real(psb_spk_) :: sum + real(psb_spk_) :: res + real(psb_spk_) :: sum, mx integer(psb_ipk_) :: i - sum = 0.0 + mx = szero + !$acc parallel loop reduction(max:mx) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + sum = szero !$acc parallel loop reduction(+:sum) do i = 1, n sum = sum + abs(x(i)/mx)**2 @@ -203,7 +207,7 @@ contains real(psb_spk_) :: res real(psb_spk_) :: max_val integer(psb_ipk_) :: i - max_val = -huge(0.0) + max_val = szero !$acc parallel loop reduction(max:max_val) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) @@ -228,7 +232,7 @@ contains real(psb_spk_) :: x(:) real(psb_spk_) :: res integer(psb_ipk_) :: i - res = 0.0 + res = szero !$acc parallel loop reduction(+:res) do i = 1, n res = res + abs(x(i)) @@ -271,92 +275,6 @@ contains call z%set_host() end subroutine s_oacc_mlt_a_2 - -!!$ subroutine s_oacc_mlt_v(x, y, info) -!!$ implicit none -!!$ class(psb_s_base_vect_type), intent(inout) :: x -!!$ class(psb_s_vect_oacc), intent(inout) :: y -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ n = min(x%get_nrows(), y%get_nrows()) -!!$ select type(xx => x) -!!$ type is (psb_s_base_vect_type) -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end select -!!$ end subroutine s_oacc_mlt_v -!!$ -!!$ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) -!!$ use psi_serial_mod -!!$ use psb_string_mod -!!$ implicit none -!!$ real(psb_spk_), intent(in) :: alpha, beta -!!$ class(psb_s_base_vect_type), intent(inout) :: x -!!$ class(psb_s_base_vect_type), intent(inout) :: y -!!$ class(psb_s_vect_oacc), intent(inout) :: z -!!$ integer(psb_ipk_), intent(out) :: info -!!$ character(len=1), intent(in), optional :: conjgx, conjgy -!!$ integer(psb_ipk_) :: i, n -!!$ logical :: conjgx_, conjgy_ -!!$ -!!$ conjgx_ = .false. -!!$ conjgy_ = .false. -!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') -!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') -!!$ -!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) -!!$ -!!$ info = 0 -!!$ select type(xx => x) -!!$ class is (psb_s_vect_oacc) -!!$ select type (yy => y) -!!$ class is (psb_s_vect_oacc) -!!$ if (xx%is_host()) call xx%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ if ((beta /= szero) .and. (z%is_host())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_dev() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ end subroutine s_oacc_mlt_v_2 - - subroutine s_oacc_axpby_v(m, alpha, x, beta, y, info) !use psi_serial_mod implicit none @@ -414,7 +332,7 @@ contains integer(psb_ipk_) :: i if ((beta /= szero) .and. (y%is_dev())) call y%sync() - !$acc parallel loop + do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) end do @@ -432,44 +350,44 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - write(0,*)'upd_xyz' + info = psb_success_ gpu_done = .false. select type(xx => x) class is (psb_s_vect_oacc) - select type(yy => y) + select type(yy => y) + class is (psb_s_vect_oacc) + select type(zz => z) class is (psb_s_vect_oacc) - select type(zz => z) - class is (psb_s_vect_oacc) - if ((beta /= szero) .and. yy%is_host()) call yy%sync() - if ((delta /= szero) .and. zz%is_host()) call zz%sync() - if (xx%is_host()) call xx%sync() - nx = size(xx%v) - ny = size(yy%v) - nz = size(zz%v) - if ((nx < m) .or. (ny < m) .or. (nz < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) - zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) - end do - end if - call yy%set_dev() - call zz%set_dev() - gpu_done = .true. - end select + if ((beta /= szero) .and. yy%is_host()) call yy%sync() + if ((delta /= szero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. end select + end select end select if (.not. gpu_done) then - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - if (z%is_host()) call z%sync() - call y%axpby(m, alpha, x, beta, info) - call z%axpby(m, gamma, y, delta, info) + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) end if end subroutine s_oacc_upd_xyz @@ -676,7 +594,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_s_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine s_oacc_ins_a @@ -687,16 +605,14 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 's_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine s_oacc_bld_mn @@ -707,6 +623,7 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -714,13 +631,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine s_oacc_bld_x @@ -848,54 +761,21 @@ contains end function s_oacc_dot_a - ! subroutine s_oacc_set_vect(x,y) - ! implicit none - ! class(psb_s_vect_oacc), intent(inout) :: x - ! real(psb_spk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine s_oacc_set_vect - - subroutine s_oacc_to_dev(v) - implicit none - real(psb_spk_) :: v(:) - !$acc update device(v) - end subroutine s_oacc_to_dev - - subroutine s_oacc_to_host(v) - implicit none - real(psb_spk_) :: v(:) - !$acc update self(v) - end subroutine s_oacc_to_host subroutine s_oacc_sync_space(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call s_oacc_create_dev(x%v) - end if - contains - subroutine s_oacc_create_dev(v) - implicit none - real(psb_spk_) :: v(:) - !$acc enter data copyin(v) - end subroutine s_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine s_oacc_sync_space subroutine s_oacc_sync(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call s_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call s_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine s_oacc_sync @@ -954,33 +834,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 's_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine s_oacc_vect_all + subroutine s_oacc_final_vect_free(x) + implicit none + type(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine s_oacc_final_vect_free + subroutine s_oacc_vect_free(x, info) implicit none class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine s_oacc_vect_free function s_oacc_get_size(x) result(res) diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90 index 7842d96c..dbafb391 100644 --- a/openacc/psb_z_oacc_csr_mat_mod.F90 +++ b/openacc/psb_z_oacc_csr_mat_mod.F90 @@ -1,6 +1,7 @@ module psb_z_oacc_csr_mat_mod use iso_c_binding + use openacc use psb_z_mat_mod use psb_z_oacc_vect_mod !use oaccsparse_mod @@ -35,6 +36,7 @@ module psb_z_oacc_csr_mat_mod procedure, pass(a) :: set_host => z_oacc_csr_set_host procedure, pass(a) :: set_sync => z_oacc_csr_set_sync procedure, pass(a) :: set_dev => z_oacc_csr_set_dev + procedure, pass(a) :: free_space => z_oacc_csr_free_space procedure, pass(a) :: sync_space => z_oacc_csr_sync_space procedure, pass(a) :: sync => z_oacc_csr_sync end type psb_z_oacc_csr_sparse_mat @@ -154,22 +156,26 @@ module psb_z_oacc_csr_mat_mod contains - subroutine z_oacc_csr_free(a) + subroutine z_oacc_csr_free_space(a) use psb_base_mod implicit none class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irp)) call acc_delete_finalize(a%irp) + + return + end subroutine z_oacc_csr_free_space + + subroutine z_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_z_csr_sparse_mat%free() return @@ -193,7 +199,7 @@ contains function z_oacc_csr_get_fmt() result(res) implicit none character(len=5) :: res - res = 'CSR_oacc' + res = 'CSROA' end function z_oacc_csr_get_fmt subroutine z_oacc_csr_all(m, n, nz, a, info) @@ -202,19 +208,8 @@ contains class(psb_z_oacc_csr_sparse_mat), intent(out) :: a integer(psb_ipk_), intent(out) :: info - info = 0 - if (allocated(a%val)) then - !$acc exit data delete(a%val) finalize - deallocate(a%val, stat=info) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) finalize - deallocate(a%ja, stat=info) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) finalize - deallocate(a%irp, stat=info) - end if + info = 0 + call a%free() call a%set_nrows(m) call a%set_ncols(n) @@ -274,26 +269,9 @@ contains subroutine z_oacc_csr_sync_space(a) implicit none class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call z_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irp)) then - call i_oacc_create_dev(a%irp) - end if - contains - subroutine z_oacc_create_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine z_oacc_create_dev - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irp)) call acc_create(a%irp) end subroutine z_oacc_csr_sync_space subroutine z_oacc_csr_sync(a) @@ -304,40 +282,16 @@ contains tmpa => a if (a%is_dev()) then - call z_oacc_csr_to_host(a%val) - call i_oacc_csr_to_host(a%ja) - call i_oacc_csr_to_host(a%irp) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irp) else if (a%is_host()) then - call z_oacc_csr_to_dev(a%val) - call i_oacc_csr_to_dev(a%ja) - call i_oacc_csr_to_dev(a%irp) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine z_oacc_csr_sync - subroutine z_oacc_csr_to_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine z_oacc_csr_to_dev - - subroutine z_oacc_csr_to_host(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine z_oacc_csr_to_host - - subroutine i_oacc_csr_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_csr_to_dev - - subroutine i_oacc_csr_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_csr_to_host - end module psb_z_oacc_csr_mat_mod diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90 index eb8884d6..76b0182d 100644 --- a/openacc/psb_z_oacc_ell_mat_mod.F90 +++ b/openacc/psb_z_oacc_ell_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_z_oacc_ell_mat_mod use iso_c_binding + use openacc use psb_z_mat_mod use psb_z_ell_mat_mod use psb_z_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_z_oacc_ell_mat_mod procedure, pass(a) :: set_dev => z_oacc_ell_set_dev procedure, pass(a) :: sync_space => z_oacc_ell_sync_space procedure, pass(a) :: sync => z_oacc_ell_sync + procedure, pass(a) :: free_space => z_oacc_ell_free_space procedure, pass(a) :: free => z_oacc_ell_free procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv @@ -152,31 +154,32 @@ module psb_z_oacc_ell_mat_mod contains - subroutine z_oacc_ell_free(a) + subroutine z_oacc_ell_free_space(a) use psb_base_mod implicit none class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + + return + end subroutine z_oacc_ell_free_space + + subroutine z_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_z_ell_sparse_mat%free() return end subroutine z_oacc_ell_free - function z_oacc_ell_sizeof(a) result(res) implicit none class(psb_z_oacc_ell_sparse_mat), intent(in) :: a @@ -196,41 +199,12 @@ contains implicit none class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call z_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - - contains - subroutine z_oacc_create_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine z_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) end subroutine z_oacc_ell_sync_space - - function z_oacc_ell_is_host(a) result(res) implicit none class(psb_z_oacc_ell_sparse_mat), intent(in) :: a @@ -279,7 +253,7 @@ contains function z_oacc_ell_get_fmt() result(res) implicit none character(len=5) :: res - res = 'ELL_oacc' + res = 'ELLOA' end function z_oacc_ell_get_fmt subroutine z_oacc_ell_sync(a) @@ -290,64 +264,17 @@ contains tmpa => a if (a%is_dev()) then - call z_oacc_ell_to_host(a%val) - call i_oacc_ell_to_host(a%ja) - call i_oacc_ell_to_host_scalar(a%irn) - call i_oacc_ell_to_host_scalar(a%idiag) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) else if (a%is_host()) then - call z_oacc_ell_to_dev(a%val) - call i_oacc_ell_to_dev(a%ja) - call i_oacc_ell_to_dev_scalar(a%irn) - call i_oacc_ell_to_dev_scalar(a%idiag) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine z_oacc_ell_sync - subroutine z_oacc_ell_to_dev_scalar(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine z_oacc_ell_to_dev_scalar - - subroutine z_oacc_ell_to_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine z_oacc_ell_to_dev - - subroutine z_oacc_ell_to_host(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine z_oacc_ell_to_host - - subroutine z_oacc_ell_to_host_scalar(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine z_oacc_ell_to_host_scalar - - subroutine i_oacc_ell_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev - - subroutine i_oacc_ell_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev_scalar - - subroutine i_oacc_ell_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host - - subroutine i_oacc_ell_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host_scalar end module psb_z_oacc_ell_mat_mod diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90 index e6a4929a..4a657fd1 100644 --- a/openacc/psb_z_oacc_hll_mat_mod.F90 +++ b/openacc/psb_z_oacc_hll_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_z_oacc_hll_mat_mod use iso_c_binding + use openacc use psb_z_mat_mod use psb_z_hll_mat_mod use psb_z_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_z_oacc_hll_mat_mod procedure, pass(a) :: set_dev => z_oacc_hll_set_dev procedure, pass(a) :: sync_space => z_oacc_hll_sync_space procedure, pass(a) :: sync => z_oacc_hll_sync + procedure, pass(a) :: free_space => z_oacc_hll_free_space procedure, pass(a) :: free => z_oacc_hll_free procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv @@ -152,28 +154,28 @@ module psb_z_oacc_hll_mat_mod contains - subroutine z_oacc_hll_free(a) + subroutine z_oacc_hll_free_space(a) use psb_base_mod implicit none class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - if (allocated(a%hkoffs)) then - !$acc exit data delete(a%hkoffs) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) + + return + end subroutine z_oacc_hll_free_space + + subroutine z_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_z_hll_sparse_mat%free() return @@ -244,48 +246,18 @@ contains function z_oacc_hll_get_fmt() result(res) implicit none character(len=5) :: res - res = 'HLL_oacc' + res = 'HLLOA' end function z_oacc_hll_get_fmt subroutine z_oacc_hll_sync_space(a) implicit none class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call z_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - if (allocated(a%hkoffs)) then - call i_oacc_create_dev_scalar(a%hkoffs) - end if - - contains - subroutine z_oacc_create_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine z_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) + if (allocated(a%hkoffs)) call acc_create(a%hkoffs) end subroutine z_oacc_hll_sync_space @@ -297,56 +269,19 @@ contains tmpa => a if (a%is_dev()) then - call z_oacc_hll_to_host(a%val) - call i_oacc_hll_to_host(a%ja) - call i_oacc_hll_to_host_scalar(a%irn) - call i_oacc_hll_to_host_scalar(a%idiag) - call i_oacc_hll_to_host_scalar(a%hkoffs) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) + call acc_update_self(a%hkoffs) else if (a%is_host()) then - call z_oacc_hll_to_dev(a%val) - call i_oacc_hll_to_dev(a%ja) - call i_oacc_hll_to_dev_scalar(a%irn) - call i_oacc_hll_to_dev_scalar(a%idiag) - call i_oacc_hll_to_dev_scalar(a%hkoffs) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) + call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine z_oacc_hll_sync - subroutine z_oacc_hll_to_host(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine z_oacc_hll_to_host - - subroutine z_oacc_hll_to_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine z_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host - - subroutine i_oacc_hll_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host_scalar - - subroutine i_oacc_hll_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev_scalar - - end module psb_z_oacc_hll_mat_mod diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index bab1a0a0..5d6f07be 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -59,7 +59,7 @@ module psb_z_oacc_vect_mod procedure, pass(x) :: asum => z_oacc_asum procedure, pass(x) :: absval1 => z_oacc_absval1 procedure, pass(x) :: absval2 => z_oacc_absval2 - + final :: z_oacc_final_vect_free end type psb_z_vect_oacc interface @@ -164,21 +164,25 @@ contains implicit none class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - real(psb_dpk_) :: mx + real(psb_dpk_) :: res integer(psb_ipk_) :: info if (x%is_host()) call x%sync() - mx = z_oacc_amax(n,x) - res = z_inner_oacc_nrm2(n, mx, x%v) +!!$ write(0,*)'oacc_nrm2' + res = z_inner_oacc_nrm2(n, x%v) contains - function z_inner_oacc_nrm2(n, mx,x) result(res) + function z_inner_oacc_nrm2(n, x) result(res) integer(psb_ipk_) :: n complex(psb_dpk_) :: x(:) - real(psb_dpk_) :: mx, res - real(psb_dpk_) :: sum + real(psb_dpk_) :: res + real(psb_dpk_) :: sum, mx integer(psb_ipk_) :: i - sum = 0.0 + mx = dzero + !$acc parallel loop reduction(max:mx) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + sum = dzero !$acc parallel loop reduction(+:sum) do i = 1, n sum = sum + abs(x(i)/mx)**2 @@ -203,7 +207,7 @@ contains real(psb_dpk_) :: res real(psb_dpk_) :: max_val integer(psb_ipk_) :: i - max_val = -huge(0.0) + max_val = dzero !$acc parallel loop reduction(max:max_val) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) @@ -228,7 +232,7 @@ contains complex(psb_dpk_) :: x(:) real(psb_dpk_) :: res integer(psb_ipk_) :: i - res = 0.0 + res = dzero !$acc parallel loop reduction(+:res) do i = 1, n res = res + abs(x(i)) @@ -271,92 +275,6 @@ contains call z%set_host() end subroutine z_oacc_mlt_a_2 - -!!$ subroutine z_oacc_mlt_v(x, y, info) -!!$ implicit none -!!$ class(psb_z_base_vect_type), intent(inout) :: x -!!$ class(psb_z_vect_oacc), intent(inout) :: y -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ n = min(x%get_nrows(), y%get_nrows()) -!!$ select type(xx => x) -!!$ type is (psb_z_base_vect_type) -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end select -!!$ end subroutine z_oacc_mlt_v -!!$ -!!$ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) -!!$ use psi_serial_mod -!!$ use psb_string_mod -!!$ implicit none -!!$ complex(psb_dpk_), intent(in) :: alpha, beta -!!$ class(psb_z_base_vect_type), intent(inout) :: x -!!$ class(psb_z_base_vect_type), intent(inout) :: y -!!$ class(psb_z_vect_oacc), intent(inout) :: z -!!$ integer(psb_ipk_), intent(out) :: info -!!$ character(len=1), intent(in), optional :: conjgx, conjgy -!!$ integer(psb_ipk_) :: i, n -!!$ logical :: conjgx_, conjgy_ -!!$ -!!$ conjgx_ = .false. -!!$ conjgy_ = .false. -!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') -!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') -!!$ -!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) -!!$ -!!$ info = 0 -!!$ select type(xx => x) -!!$ class is (psb_z_vect_oacc) -!!$ select type (yy => y) -!!$ class is (psb_z_vect_oacc) -!!$ if (xx%is_host()) call xx%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ if ((beta /= zzero) .and. (z%is_host())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_dev() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ end subroutine z_oacc_mlt_v_2 - - subroutine z_oacc_axpby_v(m, alpha, x, beta, y, info) !use psi_serial_mod implicit none @@ -414,7 +332,7 @@ contains integer(psb_ipk_) :: i if ((beta /= zzero) .and. (y%is_dev())) call y%sync() - !$acc parallel loop + do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) end do @@ -432,44 +350,44 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - write(0,*)'upd_xyz' + info = psb_success_ gpu_done = .false. select type(xx => x) class is (psb_z_vect_oacc) - select type(yy => y) + select type(yy => y) + class is (psb_z_vect_oacc) + select type(zz => z) class is (psb_z_vect_oacc) - select type(zz => z) - class is (psb_z_vect_oacc) - if ((beta /= zzero) .and. yy%is_host()) call yy%sync() - if ((delta /= zzero) .and. zz%is_host()) call zz%sync() - if (xx%is_host()) call xx%sync() - nx = size(xx%v) - ny = size(yy%v) - nz = size(zz%v) - if ((nx < m) .or. (ny < m) .or. (nz < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) - zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) - end do - end if - call yy%set_dev() - call zz%set_dev() - gpu_done = .true. - end select + if ((beta /= zzero) .and. yy%is_host()) call yy%sync() + if ((delta /= zzero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. end select + end select end select if (.not. gpu_done) then - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - if (z%is_host()) call z%sync() - call y%axpby(m, alpha, x, beta, info) - call z%axpby(m, gamma, y, delta, info) + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) end if end subroutine z_oacc_upd_xyz @@ -676,7 +594,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_z_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine z_oacc_ins_a @@ -687,16 +605,14 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 'z_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine z_oacc_bld_mn @@ -707,6 +623,7 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -714,13 +631,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine z_oacc_bld_x @@ -848,54 +761,21 @@ contains end function z_oacc_dot_a - ! subroutine z_oacc_set_vect(x,y) - ! implicit none - ! class(psb_z_vect_oacc), intent(inout) :: x - ! complex(psb_dpk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine z_oacc_set_vect - - subroutine z_oacc_to_dev(v) - implicit none - complex(psb_dpk_) :: v(:) - !$acc update device(v) - end subroutine z_oacc_to_dev - - subroutine z_oacc_to_host(v) - implicit none - complex(psb_dpk_) :: v(:) - !$acc update self(v) - end subroutine z_oacc_to_host subroutine z_oacc_sync_space(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call z_oacc_create_dev(x%v) - end if - contains - subroutine z_oacc_create_dev(v) - implicit none - complex(psb_dpk_) :: v(:) - !$acc enter data copyin(v) - end subroutine z_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine z_oacc_sync_space subroutine z_oacc_sync(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call z_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call z_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine z_oacc_sync @@ -954,33 +834,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 'z_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine z_oacc_vect_all + subroutine z_oacc_final_vect_free(x) + implicit none + type(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine z_oacc_final_vect_free + subroutine z_oacc_vect_free(x, info) implicit none class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine z_oacc_vect_free function z_oacc_get_size(x) result(res)