From 1f0e5918270e22d1f3dcf0f4bea0e299192ee2c5 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 9 Jul 2024 13:16:36 +0200 Subject: [PATCH] Reworked oacc_vect_mod. oacc_mlt_v_X generates ICE, to be fixed --- openacc/impl/Makefile | 2 +- openacc/impl/psb_d_oacc_mlt_v.f90 | 31 + openacc/impl/psb_d_oacc_mlt_v_2.f90 | 55 + openacc/psb_d_oacc_vect_mod.F90 | 1487 ++++++++++++++------------- 4 files changed, 831 insertions(+), 744 deletions(-) create mode 100644 openacc/impl/psb_d_oacc_mlt_v.f90 create mode 100644 openacc/impl/psb_d_oacc_mlt_v_2.f90 diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index fff48657..7bab6654 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -9,7 +9,7 @@ MODDIR=../../modules FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. LIBNAME=libpsb_openacc.a -OBJS= psb_d_oacc_csr_vect_mv.o +OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o objs: $(OBJS) diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90 new file mode 100644 index 00000000..a4eb6660 --- /dev/null +++ b/openacc/impl/psb_d_oacc_mlt_v.f90 @@ -0,0 +1,31 @@ + +subroutine d_oacc_mlt_v(x, y, info) + use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v + + implicit none + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, n + + info = 0 +!!$ n = min(x%get_nrows(), y%get_nrows()) +!!$ select type(xx => x) +!!$ type is (psb_d_base_vect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ end select +end subroutine d_oacc_mlt_v diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90 new file mode 100644 index 00000000..b59b4f56 --- /dev/null +++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90 @@ -0,0 +1,55 @@ +subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2 + use psb_string_mod + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + conjgx_ = .false. + conjgy_ = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + + info = 0 +!!$ select type(xx => x) +!!$ class is (psb_d_vect_oacc) +!!$ select type (yy => y) +!!$ class is (psb_d_vect_oacc) +!!$ if (xx%is_host()) call xx%sync_space() +!!$ if (yy%is_host()) call yy%sync_space() +!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_dev() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (yy%is_dev()) call yy%sync() +!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ class default +!!$ if (x%is_dev()) call x%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +end subroutine d_oacc_mlt_v_2 + diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index eda804ce..3139f9b8 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -5,63 +5,87 @@ module psb_d_oacc_vect_mod use psb_d_vect_mod use psb_i_vect_mod use psb_i_oacc_vect_mod - + integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - + type, extends(psb_d_base_vect_type) :: psb_d_vect_oacc integer :: state = is_host contains -!!$ procedure, pass(x) :: get_nrows => d_oacc_get_nrows -!!$ procedure, nopass :: get_fmt => d_oacc_get_fmt -!!$ -!!$ procedure, pass(x) :: all => d_oacc_vect_all -!!$ procedure, pass(x) :: zero => d_oacc_zero -!!$ procedure, pass(x) :: asb_m => d_oacc_asb_m + procedure, pass(x) :: get_nrows => d_oacc_get_nrows + procedure, nopass :: get_fmt => d_oacc_get_fmt + + procedure, pass(x) :: all => d_oacc_vect_all + procedure, pass(x) :: zero => d_oacc_zero + procedure, pass(x) :: asb_m => d_oacc_asb_m procedure, pass(x) :: sync => d_oacc_sync procedure, pass(x) :: sync_space => d_oacc_sync_space -!!$ procedure, pass(x) :: bld_x => d_oacc_bld_x -!!$ procedure, pass(x) :: bld_mn => d_oacc_bld_mn -!!$ procedure, pass(x) :: free => d_oacc_vect_free -!!$ procedure, pass(x) :: ins_a => d_oacc_ins_a -!!$ procedure, pass(x) :: ins_v => d_oacc_ins_v + procedure, pass(x) :: bld_x => d_oacc_bld_x + procedure, pass(x) :: bld_mn => d_oacc_bld_mn + procedure, pass(x) :: free => d_oacc_vect_free + procedure, pass(x) :: ins_a => d_oacc_ins_a + procedure, pass(x) :: ins_v => d_oacc_ins_v procedure, pass(x) :: is_host => d_oacc_is_host procedure, pass(x) :: is_dev => d_oacc_is_dev procedure, pass(x) :: is_sync => d_oacc_is_sync procedure, pass(x) :: set_host => d_oacc_set_host procedure, pass(x) :: set_dev => d_oacc_set_dev procedure, pass(x) :: set_sync => d_oacc_set_sync -!!$ procedure, pass(x) :: set_scal => d_oacc_set_scal -!!$ -!!$ procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x -!!$ procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf -!!$ procedure, pass(y) :: sctb => d_oacc_sctb -!!$ procedure, pass(y) :: sctb_x => d_oacc_sctb_x -!!$ procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf -!!$ -!!$ procedure, pass(x) :: get_size => oacc_get_size -!!$ procedure, pass(x) :: dot_v => d_oacc_vect_dot -!!$ procedure, pass(x) :: dot_a => d_oacc_dot_a -!!$ procedure, pass(y) :: axpby_v => d_oacc_axpby_v -!!$ procedure, pass(y) :: axpby_a => d_oacc_axpby_a -!!$ procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz -!!$ procedure, pass(y) :: mlt_v => d_oacc_mlt_v -!!$ procedure, pass(y) :: mlt_a => d_oacc_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 -!!$ procedure, pass(x) :: scal => d_oacc_scal -!!$ procedure, pass(x) :: nrm2 => d_oacc_nrm2 -!!$ procedure, pass(x) :: amax => d_oacc_amax -!!$ procedure, pass(x) :: asum => d_oacc_asum + procedure, pass(x) :: set_scal => d_oacc_set_scal + + procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf + procedure, pass(y) :: sctb => d_oacc_sctb + procedure, pass(y) :: sctb_x => d_oacc_sctb_x + procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf + + procedure, pass(x) :: get_size => oacc_get_size + procedure, pass(x) :: dot_v => d_oacc_vect_dot + procedure, pass(x) :: dot_a => d_oacc_dot_a + procedure, pass(y) :: axpby_v => d_oacc_axpby_v + procedure, pass(y) :: axpby_a => d_oacc_axpby_a + procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz + procedure, pass(y) :: mlt_a => d_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => d_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 + procedure, pass(x) :: scal => d_oacc_scal + procedure, pass(x) :: nrm2 => d_oacc_nrm2 + procedure, pass(x) :: amax => d_oacc_amax + procedure, pass(x) :: asum => d_oacc_asum procedure, pass(x) :: absval1 => d_oacc_absval1 -!!$ procedure, pass(x) :: absval2 => d_oacc_absval2 + procedure, pass(x) :: absval2 => d_oacc_absval2 end type psb_d_vect_oacc real(psb_dpk_), allocatable :: v1(:),v2(:),p(:) + interface + subroutine d_oacc_mlt_v(x, y, info) + import + implicit none + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + end subroutine d_oacc_mlt_v + end interface + + + interface + subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + import + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + end subroutine d_oacc_mlt_v_2 + end interface + contains subroutine d_oacc_absval1(x) @@ -73,106 +97,140 @@ contains n = size(x%v) !$acc parallel loop do i = 1, n - x%v(i) = abs(x%v(i)) + x%v(i) = abs(x%v(i)) end do call x%set_dev() end subroutine d_oacc_absval1 -!!$ subroutine d_oacc_absval2(x, y) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ class(psb_d_base_vect_type), intent(inout) :: y -!!$ integer(psb_ipk_) :: n -!!$ integer(psb_ipk_) :: i -!!$ -!!$ n = min(size(x%v), size(y%v)) -!!$ select type (yy => y) -!!$ class is (psb_d_vect_oacc) -!!$ if (x%is_host()) call x%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ yy%v(i) = abs(x%v(i)) -!!$ end do -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ call x%psb_d_base_vect_type%absval(y) -!!$ end select -!!$ end subroutine d_oacc_absval2 -!!$ -!!$ subroutine d_oacc_scal(alpha, x) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ real(psb_dpk_), intent(in) :: alpha -!!$ integer(psb_ipk_) :: info -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, size(x%v) -!!$ x%v(i) = alpha * x%v(i) -!!$ end do -!!$ call x%set_dev() -!!$ end subroutine d_oacc_scal -!!$ -!!$ function d_oacc_nrm2(n, x) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ integer(psb_ipk_) :: info -!!$ real(psb_dpk_) :: sum -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ sum = 0.0 -!!$ !$acc parallel loop reduction(+:sum) -!!$ do i = 1, n -!!$ sum = sum + x%v(i) * x%v(i) -!!$ end do -!!$ res = sqrt(sum) -!!$ end function d_oacc_nrm2 -!!$ -!!$ function d_oacc_amax(n, x) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ integer(psb_ipk_) :: info -!!$ real(psb_dpk_) :: max_val -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ max_val = -huge(0.0) -!!$ !$acc parallel loop reduction(max:max_val) -!!$ do i = 1, n -!!$ if (x%v(i) > max_val) max_val = x%v(i) -!!$ end do -!!$ res = max_val -!!$ end function d_oacc_amax -!!$ -!!$ function d_oacc_asum(n, x) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ integer(psb_ipk_) :: info -!!$ real(psb_dpk_) :: sum -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ sum = 0.0 -!!$ !$acc parallel loop reduction(+:sum) -!!$ do i = 1, n -!!$ sum = sum + abs(x%v(i)) -!!$ end do -!!$ res = sum -!!$ end function d_oacc_asum -!!$ -!!$ + subroutine d_oacc_absval2(x, y) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + + n = min(size(x%v), size(y%v)) + select type (yy => y) + class is (psb_d_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + !$acc parallel loop + do i = 1, n + yy%v(i) = abs(x%v(i)) + end do + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_d_base_vect_type%absval(y) + end select + end subroutine d_oacc_absval2 + + subroutine d_oacc_scal(alpha, x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: alpha + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, size(x%v) + x%v(i) = alpha * x%v(i) + end do + call x%set_dev() + end subroutine d_oacc_scal + + function d_oacc_nrm2(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + x%v(i) * x%v(i) + end do + res = sqrt(sum) + end function d_oacc_nrm2 + + function d_oacc_amax(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: max_val + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + max_val = -huge(0.0) + !$acc parallel loop reduction(max:max_val) + do i = 1, n + if (x%v(i) > max_val) max_val = x%v(i) + end do + res = max_val + end function d_oacc_amax + + function d_oacc_asum(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x%v(i)) + end do + res = sum + end function d_oacc_asum + + + subroutine d_oacc_mlt_a(x, y, info) + implicit none + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync_space() + !$acc parallel loop + do i = 1, size(x) + y%v(i) = y%v(i) * x(i) + end do + call y%set_host() + end subroutine d_oacc_mlt_a + + subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info) + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(in) :: y(:) + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync_space() + !$acc parallel loop + do i = 1, size(x) + z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) + end do + call z%set_host() + end subroutine d_oacc_mlt_a_2 + + !!$ subroutine d_oacc_mlt_v(x, y, info) -!!$ use psi_serial_mod !!$ implicit none !!$ class(psb_d_base_vect_type), intent(inout) :: x !!$ class(psb_d_vect_oacc), intent(inout) :: y @@ -201,42 +259,6 @@ contains !!$ end select !!$ end subroutine d_oacc_mlt_v !!$ -!!$ subroutine d_oacc_mlt_a(x, y, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_vect_oacc), intent(inout) :: y -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ if (y%is_dev()) call y%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, size(x) -!!$ y%v(i) = y%v(i) * x(i) -!!$ end do -!!$ call y%set_host() -!!$ end subroutine d_oacc_mlt_a -!!$ -!!$ subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ real(psb_dpk_), intent(in) :: alpha, beta -!!$ real(psb_dpk_), intent(in) :: x(:) -!!$ real(psb_dpk_), intent(in) :: y(:) -!!$ class(psb_d_vect_oacc), intent(inout) :: z -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ if (z%is_dev()) call z%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, size(x) -!!$ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end subroutine d_oacc_mlt_a_2 -!!$ !!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod @@ -291,514 +313,513 @@ contains !!$ call z%set_host() !!$ end select !!$ end subroutine d_oacc_mlt_v_2 -!!$ -!!$ -!!$ subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ integer(psb_ipk_), intent(in) :: m -!!$ class(psb_d_base_vect_type), intent(inout) :: x -!!$ class(psb_d_vect_oacc), intent(inout) :: y -!!$ real(psb_dpk_), intent(in) :: alpha, beta -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_) :: nx, ny, i -!!$ -!!$ info = psb_success_ -!!$ -!!$ select type(xx => x) -!!$ type is (psb_d_vect_oacc) -!!$ if ((beta /= dzero) .and. y%is_host()) call y%sync_space() -!!$ if (xx%is_host()) call xx%sync_space() -!!$ nx = size(xx%v) -!!$ ny = size(y%v) -!!$ if ((nx < m) .or. (ny < m)) then -!!$ info = psb_err_internal_error_ -!!$ else -!!$ !$acc parallel loop -!!$ do i = 1, m -!!$ y%v(i) = alpha * xx%v(i) + beta * y%v(i) -!!$ end do -!!$ end if -!!$ call y%set_dev() -!!$ class default -!!$ if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() -!!$ call y%axpby(m, alpha, x%v, beta, info) -!!$ end select -!!$ end subroutine d_oacc_axpby_v -!!$ -!!$ subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ integer(psb_ipk_), intent(in) :: m -!!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_vect_oacc), intent(inout) :: y -!!$ real(psb_dpk_), intent(in) :: alpha, beta -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, m -!!$ y%v(i) = alpha * x(i) + beta * y%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end subroutine d_oacc_axpby_a -!!$ -!!$ subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ integer(psb_ipk_), intent(in) :: m -!!$ class(psb_d_base_vect_type), intent(inout) :: x -!!$ class(psb_d_base_vect_type), intent(inout) :: y -!!$ class(psb_d_vect_oacc), intent(inout) :: z -!!$ real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_) :: nx, ny, nz, i -!!$ logical :: gpu_done -!!$ -!!$ info = psb_success_ -!!$ gpu_done = .false. -!!$ -!!$ select type(xx => x) -!!$ class is (psb_d_vect_oacc) -!!$ select type(yy => y) -!!$ class is (psb_d_vect_oacc) -!!$ select type(zz => z) -!!$ class is (psb_d_vect_oacc) -!!$ if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space() -!!$ if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space() -!!$ if (xx%is_host()) call xx%sync_space() -!!$ nx = size(xx%v) -!!$ ny = size(yy%v) -!!$ nz = size(zz%v) -!!$ if ((nx < m) .or. (ny < m) .or. (nz < m)) then -!!$ info = psb_err_internal_error_ -!!$ else -!!$ !$acc parallel loop -!!$ do i = 1, m -!!$ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) -!!$ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) -!!$ end do -!!$ end if -!!$ call yy%set_dev() -!!$ call zz%set_dev() -!!$ gpu_done = .true. -!!$ end select -!!$ end select -!!$ end select -!!$ -!!$ if (.not. gpu_done) then -!!$ if (x%is_host()) call x%sync() -!!$ if (y%is_host()) call y%sync() -!!$ if (z%is_host()) call z%sync() -!!$ call y%axpby(m, alpha, x, beta, info) -!!$ call z%axpby(m, gamma, y, delta, info) -!!$ end if -!!$ end subroutine d_oacc_abgdxyz -!!$ -!!$ -!!$ subroutine d_oacc_sctb_buf(i, n, idx, beta, y) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_ipk_) :: i, n -!!$ class(psb_i_base_vect_type) :: idx -!!$ real(psb_dpk_) :: beta -!!$ class(psb_d_vect_oacc) :: y -!!$ integer(psb_ipk_) :: info -!!$ -!!$ if (.not.allocated(y%combuf)) then -!!$ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') -!!$ return -!!$ end if -!!$ -!!$ select type(ii => idx) -!!$ class is (psb_i_vect_oacc) -!!$ if (ii%is_host()) call ii%sync_space(info) -!!$ if (y%is_host()) call y%sync_space() -!!$ -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) -!!$ end do -!!$ -!!$ class default -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) -!!$ end do -!!$ end select -!!$ end subroutine d_oacc_sctb_buf -!!$ -!!$ subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_ipk_):: i, n -!!$ class(psb_i_base_vect_type) :: idx -!!$ real(psb_dpk_) :: beta, x(:) -!!$ class(psb_d_vect_oacc) :: y -!!$ integer(psb_ipk_) :: info, ni -!!$ -!!$ select type(ii => idx) -!!$ class is (psb_i_vect_oacc) -!!$ if (ii%is_host()) call ii%sync_space(info) -!!$ class default -!!$ call psb_errpush(info, 'd_oacc_sctb_x') -!!$ return -!!$ end select -!!$ -!!$ if (y%is_host()) call y%sync_space() -!!$ -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) -!!$ end do -!!$ -!!$ call y%set_dev() -!!$ end subroutine d_oacc_sctb_x -!!$ -!!$ -!!$ -!!$ subroutine d_oacc_sctb(n, idx, x, beta, y) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_ipk_) :: n -!!$ integer(psb_ipk_) :: idx(:) -!!$ real(psb_dpk_) :: beta, x(:) -!!$ class(psb_d_vect_oacc) :: y -!!$ integer(psb_ipk_) :: info -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if (n == 0) return -!!$ if (y%is_dev()) call y%sync_space() -!!$ -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(idx(i)) = beta * y%v(idx(i)) + x(i) -!!$ end do -!!$ -!!$ call y%set_host() -!!$ end subroutine d_oacc_sctb -!!$ -!!$ -!!$ subroutine d_oacc_gthzbuf(i, n, idx, x) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_ipk_) :: i, n -!!$ class(psb_i_base_vect_type) :: idx -!!$ class(psb_d_vect_oacc) :: x -!!$ integer(psb_ipk_) :: info -!!$ -!!$ info = 0 -!!$ if (.not.allocated(x%combuf)) then -!!$ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') -!!$ return -!!$ end if -!!$ -!!$ select type(ii => idx) -!!$ class is (psb_i_vect_oacc) -!!$ if (ii%is_host()) call ii%sync_space(info) -!!$ class default -!!$ call psb_errpush(info, 'd_oacc_gthzbuf') -!!$ return -!!$ end select -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ x%combuf(i) = x%v(idx%v(i)) -!!$ end do -!!$ end subroutine d_oacc_gthzbuf -!!$ -!!$ subroutine d_oacc_gthzv_x(i, n, idx, x, y) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_ipk_) :: i, n -!!$ class(psb_i_base_vect_type):: idx -!!$ real(psb_dpk_) :: y(:) -!!$ class(psb_d_vect_oacc):: x -!!$ integer(psb_ipk_) :: info -!!$ -!!$ info = 0 -!!$ -!!$ select type(ii => idx) -!!$ class is (psb_i_vect_oacc) -!!$ if (ii%is_host()) call ii%sync_space(info) -!!$ class default -!!$ call psb_errpush(info, 'd_oacc_gthzv_x') -!!$ return -!!$ end select -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y(i) = x%v(idx%v(i)) -!!$ end do -!!$ end subroutine d_oacc_gthzv_x -!!$ -!!$ subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n, dupl -!!$ class(psb_i_base_vect_type), intent(inout) :: irl -!!$ class(psb_d_base_vect_type), intent(inout) :: val -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i, isz -!!$ logical :: done_oacc -!!$ -!!$ info = 0 -!!$ if (psb_errstatus_fatal()) return -!!$ -!!$ done_oacc = .false. -!!$ select type(virl => irl) -!!$ type is (psb_i_vect_oacc) -!!$ select type(vval => val) -!!$ type is (psb_d_vect_oacc) -!!$ if (vval%is_host()) call vval%sync_space() -!!$ if (virl%is_host()) call virl%sync_space(info) -!!$ if (x%is_host()) call x%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ x%v(virl%v(i)) = vval%v(i) -!!$ end do -!!$ call x%set_dev() -!!$ done_oacc = .true. -!!$ end select -!!$ end select -!!$ -!!$ if (.not.done_oacc) then -!!$ select type(virl => irl) -!!$ type is (psb_i_vect_oacc) -!!$ if (virl%is_dev()) call virl%sync_space(info) -!!$ end select -!!$ select type(vval => val) -!!$ type is (psb_d_vect_oacc) -!!$ if (vval%is_dev()) call vval%sync_space() -!!$ end select -!!$ call x%ins(n, irl%v, val%v, dupl, info) -!!$ end if -!!$ -!!$ if (info /= 0) then -!!$ call psb_errpush(info, 'oacc_vect_ins') -!!$ return -!!$ end if -!!$ -!!$ end subroutine d_oacc_ins_v -!!$ -!!$ -!!$ -!!$ subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n, dupl -!!$ integer(psb_ipk_), intent(in) :: irl(:) -!!$ real(psb_dpk_), intent(in) :: val(:) -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i -!!$ -!!$ info = 0 -!!$ if (x%is_dev()) call x%sync_space() -!!$ call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) -!!$ call x%set_host() -!!$ !$acc update device(x%v) -!!$ -!!$ end subroutine d_oacc_ins_a -!!$ -!!$ -!!$ -!!$ subroutine d_oacc_bld_mn(x, n) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_mpk_), intent(in) :: n -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_) :: info -!!$ -!!$ call x%all(n, info) -!!$ if (info /= 0) then -!!$ call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) -!!$ end if -!!$ call x%set_host() -!!$ !$acc update device(x%v) -!!$ -!!$ end subroutine d_oacc_bld_mn -!!$ -!!$ -!!$ subroutine d_oacc_bld_x(x, this) -!!$ use psb_base_mod -!!$ implicit none -!!$ real(psb_dpk_), intent(in) :: this(:) -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_) :: info -!!$ -!!$ call psb_realloc(size(this), x%v, info) -!!$ if (info /= 0) then -!!$ info = psb_err_alloc_request_ -!!$ call psb_errpush(info, 'd_oacc_bld_x', & -!!$ i_err=(/size(this), izero, izero, izero, izero/)) -!!$ return -!!$ end if -!!$ -!!$ x%v(:) = this(:) -!!$ call x%set_host() -!!$ !$acc update device(x%v) -!!$ -!!$ end subroutine d_oacc_bld_x -!!$ -!!$ -!!$ subroutine d_oacc_asb_m(n, x, info) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_mpk_), intent(in) :: n -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_mpk_) :: nd -!!$ -!!$ info = psb_success_ -!!$ -!!$ if (x%is_dev()) then -!!$ nd = size(x%v) -!!$ if (nd < n) then -!!$ call x%sync() -!!$ call x%psb_d_base_vect_type%asb(n, info) -!!$ if (info == psb_success_) call x%sync_space() -!!$ call x%set_host() -!!$ end if -!!$ else -!!$ if (size(x%v) < n) then -!!$ call x%psb_d_base_vect_type%asb(n, info) -!!$ if (info == psb_success_) call x%sync_space() -!!$ call x%set_host() -!!$ end if -!!$ end if -!!$ end subroutine d_oacc_asb_m -!!$ -!!$ -!!$ -!!$ subroutine d_oacc_set_scal(x, val, first, last) -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ real(psb_dpk_), intent(in) :: val -!!$ integer(psb_ipk_), optional :: first, last -!!$ -!!$ integer(psb_ipk_) :: first_, last_ -!!$ first_ = 1 -!!$ last_ = x%get_nrows() -!!$ if (present(first)) first_ = max(1, first) -!!$ if (present(last)) last_ = min(last, last_) -!!$ -!!$ !$acc parallel loop -!!$ do i = first_, last_ -!!$ x%v(i) = val -!!$ end do -!!$ !$acc end parallel loop -!!$ -!!$ call x%set_dev() -!!$ end subroutine d_oacc_set_scal -!!$ -!!$ -!!$ -!!$ subroutine d_oacc_zero(x) -!!$ use psi_serial_mod -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ call x%set_dev() -!!$ call x%set_scal(dzero) -!!$ end subroutine d_oacc_zero -!!$ -!!$ function d_oacc_get_nrows(x) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(in) :: x -!!$ integer(psb_ipk_) :: res -!!$ -!!$ if (allocated(x%v)) res = size(x%v) -!!$ end function d_oacc_get_nrows -!!$ -!!$ function d_oacc_get_fmt() result(res) -!!$ implicit none -!!$ character(len=5) :: res -!!$ res = "dOACC" -!!$ -!!$ end function d_oacc_get_fmt -!!$ -!!$ function d_oacc_vect_dot(n, x, y) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ class(psb_d_base_vect_type), intent(inout) :: y -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ real(psb_dpk_), external :: ddot -!!$ integer(psb_ipk_) :: info -!!$ integer(psb_ipk_) :: i -!!$ -!!$ res = dzero -!!$ -!!$ select type(yy => y) -!!$ type is (psb_d_base_vect_type) -!!$ if (x%is_dev()) call x%sync() -!!$ res = ddot(n, x%v, 1, yy%v, 1) -!!$ type is (psb_d_vect_oacc) -!!$ if (x%is_host()) call x%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ -!!$ !$acc parallel loop reduction(+:res) present(x%v, yy%v) -!!$ do i = 1, n -!!$ res = res + x%v(i) * yy%v(i) -!!$ end do -!!$ !$acc end parallel loop -!!$ -!!$ class default -!!$ call x%sync() -!!$ res = y%dot(n, x%v) -!!$ end select -!!$ -!!$ end function d_oacc_vect_dot -!!$ -!!$ -!!$ -!!$ -!!$ function d_oacc_dot_a(n, x, y) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ real(psb_dpk_), intent(in) :: y(:) -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ real(psb_dpk_), external :: ddot -!!$ -!!$ if (x%is_dev()) call x%sync() -!!$ res = ddot(n, y, 1, x%v, 1) -!!$ -!!$ end function d_oacc_dot_a -!!$ -!!$ ! subroutine d_oacc_set_vect(x,y) -!!$ ! implicit none -!!$ ! class(psb_d_vect_oacc), intent(inout) :: x -!!$ ! real(psb_dpk_), intent(in) :: y(:) -!!$ ! integer(psb_ipk_) :: info -!!$ -!!$ ! if (size(x%v) /= size(y)) then -!!$ ! call x%free(info) -!!$ ! call x%all(size(y),info) -!!$ ! end if -!!$ ! x%v(:) = y(:) -!!$ ! call x%set_host() -!!$ ! end subroutine d_oacc_set_vect -!!$ -!!$ subroutine d_oacc_to_dev(v) -!!$ implicit none -!!$ real(psb_dpk_) :: v(:) -!!$ !$acc update device(v) -!!$ end subroutine d_oacc_to_dev -!!$ -!!$ subroutine d_oacc_to_host(v) -!!$ implicit none -!!$ real(psb_dpk_) :: v(:) -!!$ !$acc update self(v) -!!$ end subroutine d_oacc_to_host -!!$ + + + subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + real(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, i + + info = psb_success_ + + select type(xx => x) + type is (psb_d_vect_oacc) + if ((beta /= dzero) .and. y%is_host()) call y%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(y%v) + if ((nx < m) .or. (ny < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * xx%v(i) + beta * y%v(i) + end do + end if + call y%set_dev() + class default + if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() + call y%axpby(m, alpha, x%v, beta, info) + end select + end subroutine d_oacc_axpby_v + + subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_oacc), intent(inout) :: y + real(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i + + if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space() + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * x(i) + beta * y%v(i) + end do + call y%set_host() + end subroutine d_oacc_axpby_a + + subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, i + logical :: gpu_done + + info = psb_success_ + gpu_done = .false. + + select type(xx => x) + class is (psb_d_vect_oacc) + select type(yy => y) + class is (psb_d_vect_oacc) + select type(zz => z) + class is (psb_d_vect_oacc) + if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space() + if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. + end select + end select + end select + + if (.not. gpu_done) then + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) + end if + end subroutine d_oacc_abgdxyz + + subroutine d_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info + + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + end do + + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + end do + end select + end subroutine d_oacc_sctb_buf + + subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info, ni + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'd_oacc_sctb_x') + return + end select + + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + + call y%set_dev() + end subroutine d_oacc_sctb_x + + + + subroutine d_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine d_oacc_sctb + + + subroutine d_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_d_vect_oacc) :: x + integer(psb_ipk_) :: info + + info = 0 + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'd_oacc_gthzbuf') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine d_oacc_gthzbuf + + subroutine d_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type):: idx + real(psb_dpk_) :: y(:) + class(psb_d_vect_oacc):: x + integer(psb_ipk_) :: info + + info = 0 + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'd_oacc_gthzv_x') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine d_oacc_gthzv_x + + subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_d_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_d_vect_oacc) + if (vval%is_host()) call vval%sync_space() + if (virl%is_host()) call virl%sync_space(info) + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync_space(info) + end select + select type(vval => val) + type is (psb_d_vect_oacc) + if (vval%is_dev()) call vval%sync_space() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine d_oacc_ins_v + + + + subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync_space() + call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + !$acc update device(x%v) + + end subroutine d_oacc_ins_a + + + + subroutine d_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + !$acc update device(x%v) + + end subroutine d_oacc_bld_mn + + + subroutine d_oacc_bld_x(x, this) + use psb_base_mod + implicit none + real(psb_dpk_), intent(in) :: this(:) + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'd_oacc_bld_x', & + i_err=(/size(this), izero, izero, izero, izero/)) + return + end if + + x%v(:) = this(:) + call x%set_host() + !$acc update device(x%v) + + end subroutine d_oacc_bld_x + + + subroutine d_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_d_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_d_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + end if + end subroutine d_oacc_asb_m + + + + subroutine d_oacc_set_scal(x, val, first, last) + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine d_oacc_set_scal + + + + subroutine d_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(dzero) + end subroutine d_oacc_zero + + function d_oacc_get_nrows(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function d_oacc_get_nrows + + function d_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "dOACC" + + end function d_oacc_get_fmt + + function d_oacc_vect_dot(n, x, y) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + res = dzero + + select type(yy => y) + type is (psb_d_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n, x%v, 1, yy%v, 1) + type is (psb_d_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + + !$acc parallel loop reduction(+:res) present(x%v, yy%v) + do i = 1, n + res = res + x%v(i) * yy%v(i) + end do + !$acc end parallel loop + + class default + call x%sync() + res = y%dot(n, x%v) + end select + + end function d_oacc_vect_dot + + + + + function d_oacc_dot_a(n, x, y) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n, y, 1, x%v, 1) + + end function d_oacc_dot_a + + ! subroutine d_oacc_set_vect(x,y) + ! implicit none + ! class(psb_d_vect_oacc), intent(inout) :: x + ! real(psb_dpk_), intent(in) :: y(:) + ! integer(psb_ipk_) :: info + + ! if (size(x%v) /= size(y)) then + ! call x%free(info) + ! call x%all(size(y),info) + ! end if + ! x%v(:) = y(:) + ! call x%set_host() + ! end subroutine d_oacc_set_vect + + subroutine d_oacc_to_dev(v) + implicit none + real(psb_dpk_) :: v(:) + !$acc update device(v) + end subroutine d_oacc_to_dev + + subroutine d_oacc_to_host(v) + implicit none + real(psb_dpk_) :: v(:) + !$acc update self(v) + end subroutine d_oacc_to_host + subroutine d_oacc_sync_space(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -828,21 +849,21 @@ contains subroutine d_oacc_set_host(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - + x%state = is_host end subroutine d_oacc_set_host subroutine d_oacc_set_dev(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - + x%state = is_dev end subroutine d_oacc_set_dev subroutine d_oacc_set_sync(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - + x%state = is_sync end subroutine d_oacc_set_sync @@ -869,50 +890,50 @@ contains res = (x%state == is_sync) end function d_oacc_is_sync -!!$ -!!$ subroutine d_oacc_vect_all(n, x, info) -!!$ use psi_serial_mod -!!$ use psb_realloc_mod -!!$ implicit none -!!$ integer(psb_ipk_), intent(in) :: n -!!$ class(psb_d_vect_oacc), intent(out) :: x -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ call psb_realloc(n, x%v, info) -!!$ if (info == 0) then -!!$ call x%set_host() -!!$ !$acc enter data create(x%v) -!!$ call x%sync_space() -!!$ end if -!!$ if (info /= 0) then -!!$ info = psb_err_alloc_request_ -!!$ call psb_errpush(info, 'd_oacc_all', & -!!$ i_err=(/n, n, n, n, n/)) -!!$ end if -!!$ end subroutine d_oacc_vect_all -!!$ -!!$ -!!$ subroutine d_oacc_vect_free(x, info) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(out) :: info -!!$ info = 0 -!!$ if (allocated(x%v)) then -!!$ !$acc exit data delete(x%v) finalize -!!$ deallocate(x%v, stat=info) -!!$ end if -!!$ -!!$ end subroutine d_oacc_vect_free -!!$ -!!$ function oacc_get_size(x) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_) :: res -!!$ -!!$ if (x%is_dev()) call x%sync() -!!$ res = size(x%v) -!!$ end function oacc_get_size -!!$ + + subroutine d_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_d_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) then + call x%set_host() + !$acc enter data create(x%v) + call x%sync_space() + end if + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'd_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + end subroutine d_oacc_vect_all + + + subroutine d_oacc_vect_free(x, info) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 + if (allocated(x%v)) then + !$acc exit data delete(x%v) finalize + deallocate(x%v, stat=info) + end if + + end subroutine d_oacc_vect_free + + function oacc_get_size(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + if (x%is_dev()) call x%sync() + res = size(x%v) + end function oacc_get_size + !!$ !!$ subroutine initialize(N) !!$ integer(psb_ipk_) :: N @@ -939,26 +960,6 @@ contains !!$ subroutine to_host() !!$ !$acc update self(v1,v2) !!$ end subroutine to_host -!!$ function d_dot(N) result(res) -!!$ real(kind(1.d0)) :: res -!!$ integer(psb_ipk_) :: i,N -!!$ real(kind(1.d0)) :: t1,t2,t3 -!!$ res = 0.0d0 -!!$ !$acc parallel -!!$ !$acc loop reduction(+:res) -!!$ do i=1,N -!!$ res = res + v1(i) * v2(i) -!!$ end do -!!$ !$acc end parallel -!!$ -!!$ end function d_dot -!!$ function h_dot(N) result(res) -!!$ integer(psb_ipk_) :: i,N -!!$ real(kind(1.d0)) :: t1,t2,t3,res -!!$ res = 0.0d0 -!!$ do i=1,N -!!$ res = res + v1(i) * v2(i) -!!$ end do -!!$ end function h_dot !!$ + end module psb_d_oacc_vect_mod